Spaces:

yuto0o
/

django-ai-chat

Running

App Files Files Community

yuto0o commited on 2 days ago

Commit

8f4b227

1 Parent(s): e1d787e

ふつうにもどす

Browse files

Files changed (10) hide show

.gitattributes copy +35 -0
Dockerfile +13 -22
README_2.md +2 -14
config/settings.py +2 -2
ml_api/apps.py +15 -0
ml_api/model_loader.py +27 -22
ml_api/views.py +63 -14
pyproject.toml +5 -7
requirements.txt +109 -36
uv.lock +0 -0

.gitattributes copy ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

Dockerfile CHANGED Viewed

@@ -3,41 +3,32 @@ FROM ghcr.io/astral-sh/uv:0.9.2-python3.12-bookworm-slim
 WORKDIR /code
-# --- 【修正】ビルドツール(gcc/g++)を入れる ---
-# 1. llama.cppのビルドに必要なツールをインストール
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    build-essential \
-    cmake \
-    gcc \
-    g++ \
-    && rm -rf /var/lib/apt/lists/*
-# ----------------------------------------
-# requirements.txt をコピー
-COPY requirements.txt .
 # user の作成（uid=1000）
 RUN useradd -m -u 1000 user
-# llama-cpp-python のインストール
-# CMAKE_ARGS="-DGGML_NATIVE=ON" をつけるとCPUに最適化されて高速化します
-RUN CMAKE_ARGS="-DGGML_NATIVE=ON" uv pip install --system --no-cache -r requirements.txt
-#  その他のライブラリ
 RUN uv pip install --system --no-cache -r requirements.txt
-# キャッシュディレクトリ
 ENV HF_HOME=/code/cache
 RUN mkdir -p /code/cache && chown -R user:user /code/cache
 RUN chown -R user:user /code
-# ユーザー切り替え
 USER user
 ENV HOME=/home/user \
     PATH=/home/user/.local/bin:$PATH
 COPY --chown=user . /code
 CMD ["sh", "-c", "python manage.py migrate && python manage.py runserver 0.0.0.0:7860"]

 WORKDIR /code
 # user の作成（uid=1000）
 RUN useradd -m -u 1000 user
+# requirements.txt をコピーしてインストール
+COPY requirements.txt .
 RUN uv pip install --system --no-cache -r requirements.txt
+# キャッシュディレクトリ設定
 ENV HF_HOME=/code/cache
+# キャッシュフォルダを作って権限を与える
 RUN mkdir -p /code/cache && chown -R user:user /code/cache
+# --- 【★修正ポイント】 ---
+# /code ディレクトリ自体の所有権を user に変更する
+# これがないと、user はこのフォルダの中に新しいファイル(db.sqlite3)を作れません
 RUN chown -R user:user /code
+# --------------------
+# ここからユーザーを切り替え
 USER user
 ENV HOME=/home/user \
     PATH=/home/user/.local/bin:$PATH
+# プロジェクトコードを user 権限でコピー
 COPY --chown=user . /code
+# 起動コマンド（migrate -> runserver）
 CMD ["sh", "-c", "python manage.py migrate && python manage.py runserver 0.0.0.0:7860"]

README_2.md CHANGED Viewed

@@ -67,26 +67,14 @@ docker run -p 7860:7860 my-django-bot
 ```
-view とか書き換えただけの時?毎回ダウンロードいらないようです
 ```
 docker run -p 7860:7860 \
-  -v ~/.cache/huggingface:/code/cache \
   my-django-bot
 ```
-docker で動かしているときは
-```
-http://localhost:7860/api/chat/ にアクセスして
-{"text": "こんにちは！元気ですか？"}
-# こんなかんじで書いてpostをクリックすると返信が返ってくる
-ターミナルで以下を打ち込んでもよい
-curl -X POST http://localhost:7860/api/chat/  -H "Content-Type: application/json"      -d '{"text": "こんにちは！元気ですか？"}'
-```
 ## 容量解放
 ```

 ```
+## view とか書き換えただけの時?毎回ダウンロードいらないようです
 ```
 docker run -p 7860:7860 \
+  -v ~/.cache/huggingface:/root/.cache/huggingface \
   my-django-bot
 ```
 ## 容量解放
 ```

config/settings.py CHANGED Viewed

@@ -25,8 +25,8 @@ SECRET_KEY = os.environ.get("SECRET_KEY", "dev-key")
 # SECURITY WARNING: don't run with debug turned on in production!
-DEBUG = True
-# "True"
 ALLOWED_HOSTS = ["huggingface.co", ".hf.space", "localhost", "127.0.0.1"]

 # SECURITY WARNING: don't run with debug turned on in production!
+DEBUG = False
+"True"
 ALLOWED_HOSTS = ["huggingface.co", ".hf.space", "localhost", "127.0.0.1"]

ml_api/apps.py CHANGED Viewed

	@@ -1,3 +1,18 @@















1	from django.apps import AppConfig
2
3

+# class MlApiConfig(AppConfig):
+#     default_auto_field = "django.db.models.BigAutoField"
+#     name = "ml_api"
+#     tokenizer = None
+#     model = None
+#     def ready(self):
+#         if MlApiConfig.model is None:
+#             model_name = "rinna/japanese-gpt-neox-small"
+#             print("Loading Model... (Download starts on first run)")
+#             # トークナイザーとモデルのロード
+#             MlApiConfig.tokenizer = AutoTokenizer.from_pretrained(
+#                 model_name, use_fast=False
+#             )
+#             MlApiConfig.model = AutoModelForCausalLM.from_pretrained(model_name)
+#             print("Model Loaded!")
 from django.apps import AppConfig

ml_api/model_loader.py CHANGED Viewed

@@ -1,31 +1,36 @@
-from huggingface_hub import hf_hub_download
-from llama_cpp import Llama
-# グローバル変数
-_llm = None
 def get_model():
-    global _llm
-    if _llm is None:
-        print("Loading Qwen2.5-7B-Instruct (GGUF)...")
-        # 1. Hugging FaceからGGUFファイルをダウンロード（キャッシュされます）
-        model_path = hf_hub_download(
-            repo_id="bartowski/Qwen2.5-7B-Instruct-GGUF",
-            filename="Qwen2.5-7B-Instruct-Q4_K_M.gguf",
-        )
-        # 2. llama.cppエンジンの初期化
-        # n_ctx: コンテキスト長（会話の記憶量）。Free枠なら4096-8192くらいが無難
-        # n_threads: SpacesのFree枠は2vCPUなので 2 に設定
-        _llm = Llama(
-            model_path=model_path,
-            n_ctx=4096,
-            n_threads=2,
-            verbose=True,  # ログを表示
         )
-        print("Model Loaded!")
-    return _llm

+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# グローバル変数として保持
+_model = None
+_tokenizer = None
 def get_model():
+    global _model, _tokenizer
+    if _model is None:
+        print("Loading Qwen2.5-3B-Instruct... (Lazy Loading)")
+        model_name = "Qwen/Qwen2.5-3B-Instruct"
+        # model_name = "Qwen/Qwen2.5-3B-Instruct" # 遅い
+        _tokenizer = AutoTokenizer.from_pretrained(model_name)
+        # --- 修正箇所: データ型の決定ロジックを安全にする ---
+        dtype = torch.float32  # 基本は float32 (約12GB使用、16GBメモリなら入るはず)
+        if torch.cuda.is_available():
+            dtype = torch.bfloat16
+        # torch.cpu に is_bf16_supported があるか確認してから使う
+        elif hasattr(torch.cpu, "is_bf16_supported") and torch.cpu.is_bf16_supported():
+            dtype = torch.bfloat16
+        # -----------------------------------------------
+        _model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=dtype,
+            trust_remote_code=True,
         )
+        print(f"Model Loaded! (dtype: {dtype})")
+    return _model, _tokenizer

ml_api/views.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from rest_framework.response import Response
 from rest_framework.views import APIView
@@ -8,29 +9,77 @@ class ChatView(APIView):
     def post(self, request):
         user_input = request.data.get("text", "")
-        # モデル取得 (llama_cpp.Llama インスタンス)
-        llm = get_model()
-        # 1. 会話履歴の作成
         messages = [
             {
                 "role": "system",
-                "content": "あなたは親切なAI「qwen」です。日本語で簡潔に答えてください。",
             },
             {"role": "user", "content": user_input},
         ]
-        # 2. 推論実行 (create_chat_completionを使う)
-        # これはトークナイズ→生成→デコードを一発でやってくれます
-        output = llm.create_chat_completion(
-            messages=messages,
-            max_tokens=256,  # 生成する最大トークン数
-            temperature=0.7,  # 創造性
-            top_p=0.9,
         )
-        # 3. レスポンスの抽出
-        # OpenAI APIと同じ形式で返ってきます
-        response_text = output["choices"][0]["message"]["content"]
         return Response({"result": response_text})

+import torch
 from rest_framework.response import Response
 from rest_framework.views import APIView
     def post(self, request):
         user_input = request.data.get("text", "")
+        # ここで呼び出す（初回のみロードが走る）
+        model, tokenizer = get_model()
+        # 1. 会話フォーマットの作成
         messages = [
             {
                 "role": "system",
+                "content": "あなたは親切でフレンドリーなAIアシスタントです。「qwen」と呼ばれています。自然な日本語で簡潔に返事をしてください。",
             },
             {"role": "user", "content": user_input},
         ]
+        # 2. プロンプトへの変換
+        text = tokenizer.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
         )
+        inputs = tokenizer([text], return_tensors="pt").to(model.device)
+        # 3. 生成
+        with torch.no_grad():
+            generated_ids = model.generate(
+                **inputs,
+                max_new_tokens=128,
+                do_sample=True,
+                temperature=0.7,
+                top_p=0.9,
+            )
+        # 4. デコード
+        generated_ids = [
+            output_ids[len(input_ids) :]
+            for input_ids, output_ids in zip(inputs.input_ids, generated_ids)
+        ]
+        response_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[
+            0
+        ]
         return Response({"result": response_text})
+# class ChatView(APIView):
+#     def post(self, request):
+#         input_text = request.data.get("text", "")
+#         # 簡易的なプロンプトエンジニアリング
+#         # モデルに「会話」であることを認識させるフォーマット
+#         prompt = f"ユーザー: {input_text}\nシステム: "
+#         app_config = apps.get_app_config("ml_api")
+#         tokenizer = app_config.tokenizer
+#         model = app_config.model
+#         # トークン化
+#         inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
+#         # 生成
+#         with torch.no_grad():
+#             output_ids = model.generate(
+#                 inputs["input_ids"],
+#                 max_new_tokens=50,  # 返信の長さ
+#                 do_sample=True,
+#                 temperature=0.7,  # 創造性（高いほどランダム）
+#                 pad_token_id=tokenizer.pad_token_id,
+#                 eos_token_id=tokenizer.eos_token_id,
+#             )
+#         # デコード
+#         output = tokenizer.decode(output_ids.tolist()[0])
+#         # プロンプト部分を除去して返信部分だけ抽出
+#         response_text = output.split("システム: ")[-1].strip()
+#         return Response({"result": response_text})

pyproject.toml CHANGED Viewed

@@ -5,13 +5,11 @@ description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
     "django>=6.0",
     "djangorestframework>=3.16.1",
-    "huggingface-hub>=1.2.1",
-    "llama-cpp-python>=0.3.16",
-]
-[dependency-groups]
-dev = [
-    "ty>=0.0.1a32",
 ]

 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
+    "accelerate>=1.12.0",
     "django>=6.0",
     "djangorestframework>=3.16.1",
+    "protobuf>=6.33.2",
+    "sentencepiece>=0.2.1",
+    "torch>=2.9.1",
+    "transformers>=4.57.3",
 ]

requirements.txt CHANGED Viewed

@@ -1,17 +1,13 @@
 # This file was autogenerated by uv via the following command:
 #    uv pip compile pyproject.toml -o requirements.txt
-anyio==4.12.0
-    # via httpx
 asgiref==3.11.0
     # via django
 certifi==2025.11.12
-    # via
-    #   httpcore
-    #   httpx
-click==8.3.1
-    # via typer-slim
-diskcache==5.6.3
-    # via llama-cpp-python
 django==6.0
     # via
     #   django-ai-chat (pyproject.toml)
@@ -19,46 +15,123 @@ django==6.0
 djangorestframework==3.16.1
     # via django-ai-chat (pyproject.toml)
 filelock==3.20.0
-    # via huggingface-hub
 fsspec==2025.12.0
-    # via huggingface-hub
-h11==0.16.0
-    # via httpcore
 hf-xet==1.2.0
     # via huggingface-hub
-httpcore==1.0.9
-    # via httpx
-httpx==0.28.1
-    # via huggingface-hub
-huggingface-hub==1.2.1
-    # via django-ai-chat (pyproject.toml)
-idna==3.11
     # via
-    #   anyio
-    #   httpx
 jinja2==3.1.6
-    # via llama-cpp-python
-llama-cpp-python==0.3.16
-    # via django-ai-chat (pyproject.toml)
 markupsafe==3.0.3
     # via jinja2
 numpy==2.3.5
-    # via llama-cpp-python
 packaging==25.0
-    # via huggingface-hub
 pyyaml==6.0.3
-    # via huggingface-hub
-shellingham==1.5.4
-    # via huggingface-hub
 sqlparse==0.5.4
     # via django
 tqdm==4.67.1
-    # via huggingface-hub
-typer-slim==0.20.0
-    # via huggingface-hub
 typing-extensions==4.15.0
     # via
-    #   anyio
     #   huggingface-hub
-    #   llama-cpp-python
-    #   typer-slim

 # This file was autogenerated by uv via the following command:
 #    uv pip compile pyproject.toml -o requirements.txt
+accelerate==1.12.0
+    # via django-ai-chat (pyproject.toml)
 asgiref==3.11.0
     # via django
 certifi==2025.11.12
+    # via requests
+charset-normalizer==3.4.4
+    # via requests
 django==6.0
     # via
     #   django-ai-chat (pyproject.toml)
 djangorestframework==3.16.1
     # via django-ai-chat (pyproject.toml)
 filelock==3.20.0
+    # via
+    #   huggingface-hub
+    #   torch
+    #   transformers
 fsspec==2025.12.0
+    # via
+    #   huggingface-hub
+    #   torch
 hf-xet==1.2.0
     # via huggingface-hub
+huggingface-hub==0.36.0
     # via
+    #   accelerate
+    #   tokenizers
+    #   transformers
+idna==3.11
+    # via requests
 jinja2==3.1.6
+    # via torch
 markupsafe==3.0.3
     # via jinja2
+mpmath==1.3.0
+    # via sympy
+networkx==3.6
+    # via torch
 numpy==2.3.5
+    # via
+    #   accelerate
+    #   transformers
+nvidia-cublas-cu12==12.8.4.1
+    # via
+    #   nvidia-cudnn-cu12
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cuda-cupti-cu12==12.8.90
+    # via torch
+nvidia-cuda-nvrtc-cu12==12.8.93
+    # via torch
+nvidia-cuda-runtime-cu12==12.8.90
+    # via torch
+nvidia-cudnn-cu12==9.10.2.21
+    # via torch
+nvidia-cufft-cu12==11.3.3.83
+    # via torch
+nvidia-cufile-cu12==1.13.1.3
+    # via torch
+nvidia-curand-cu12==10.3.9.90
+    # via torch
+nvidia-cusolver-cu12==11.7.3.90
+    # via torch
+nvidia-cusparse-cu12==12.5.8.93
+    # via
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cusparselt-cu12==0.7.1
+    # via torch
+nvidia-nccl-cu12==2.27.5
+    # via torch
+nvidia-nvjitlink-cu12==12.8.93
+    # via
+    #   nvidia-cufft-cu12
+    #   nvidia-cusolver-cu12
+    #   nvidia-cusparse-cu12
+    #   torch
+nvidia-nvshmem-cu12==3.3.20
+    # via torch
+nvidia-nvtx-cu12==12.8.90
+    # via torch
 packaging==25.0
+    # via
+    #   accelerate
+    #   huggingface-hub
+    #   transformers
+protobuf==6.33.2
+    # via django-ai-chat (pyproject.toml)
+psutil==7.1.3
+    # via accelerate
 pyyaml==6.0.3
+    # via
+    #   accelerate
+    #   huggingface-hub
+    #   transformers
+regex==2025.11.3
+    # via transformers
+requests==2.32.5
+    # via
+    #   huggingface-hub
+    #   transformers
+safetensors==0.7.0
+    # via
+    #   accelerate
+    #   transformers
+sentencepiece==0.2.1
+    # via django-ai-chat (pyproject.toml)
+setuptools==80.9.0
+    # via torch
 sqlparse==0.5.4
     # via django
+sympy==1.14.0
+    # via torch
+tokenizers==0.22.1
+    # via transformers
+torch==2.9.1
+    # via
+    #   django-ai-chat (pyproject.toml)
+    #   accelerate
 tqdm==4.67.1
+    # via
+    #   huggingface-hub
+    #   transformers
+transformers==4.57.3
+    # via django-ai-chat (pyproject.toml)
+triton==3.5.1
+    # via torch
 typing-extensions==4.15.0
     # via
     #   huggingface-hub
+    #   torch
+urllib3==2.6.0
+    # via requests

uv.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff