yuto0o commited on
Commit
8f4b227
·
1 Parent(s): e1d787e

ふつうにもどす

Browse files
Files changed (10) hide show
  1. .gitattributes copy +35 -0
  2. Dockerfile +13 -22
  3. README_2.md +2 -14
  4. config/settings.py +2 -2
  5. ml_api/apps.py +15 -0
  6. ml_api/model_loader.py +27 -22
  7. ml_api/views.py +63 -14
  8. pyproject.toml +5 -7
  9. requirements.txt +109 -36
  10. uv.lock +0 -0
.gitattributes copy ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Dockerfile CHANGED
@@ -3,41 +3,32 @@ FROM ghcr.io/astral-sh/uv:0.9.2-python3.12-bookworm-slim
3
 
4
  WORKDIR /code
5
 
6
- # --- 【修正】ビルドツール(gcc/g++)を入れる ---
7
- # 1. llama.cppのビルドに必要なツールをインストール
8
- RUN apt-get update && apt-get install -y --no-install-recommends \
9
- build-essential \
10
- cmake \
11
- gcc \
12
- g++ \
13
- && rm -rf /var/lib/apt/lists/*
14
- # ----------------------------------------
15
-
16
- # requirements.txt をコピー
17
- COPY requirements.txt .
18
-
19
  # user の作成(uid=1000)
20
  RUN useradd -m -u 1000 user
21
 
22
-
23
- # llama-cpp-python のインストール
24
- # CMAKE_ARGS="-DGGML_NATIVE=ON" をつけるとCPUに最適化されて高速化します
25
-
26
- RUN CMAKE_ARGS="-DGGML_NATIVE=ON" uv pip install --system --no-cache -r requirements.txt
27
-
28
- # その他のライブラリ
29
  RUN uv pip install --system --no-cache -r requirements.txt
30
 
31
- # キャッシュディレクトリ
32
  ENV HF_HOME=/code/cache
 
33
  RUN mkdir -p /code/cache && chown -R user:user /code/cache
 
 
 
 
34
  RUN chown -R user:user /code
 
35
 
36
- # ユーザー切り替え
37
  USER user
 
38
  ENV HOME=/home/user \
39
  PATH=/home/user/.local/bin:$PATH
40
 
 
41
  COPY --chown=user . /code
42
 
 
43
  CMD ["sh", "-c", "python manage.py migrate && python manage.py runserver 0.0.0.0:7860"]
 
3
 
4
  WORKDIR /code
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  # user の作成(uid=1000)
7
  RUN useradd -m -u 1000 user
8
 
9
+ # requirements.txt をコピーしてインストール
10
+ COPY requirements.txt .
 
 
 
 
 
11
  RUN uv pip install --system --no-cache -r requirements.txt
12
 
13
+ # キャッシュディレクトリ設定
14
  ENV HF_HOME=/code/cache
15
+ # キャッシュフォルダを作って権限を与える
16
  RUN mkdir -p /code/cache && chown -R user:user /code/cache
17
+
18
+ # --- 【★修正ポイント】 ---
19
+ # /code ディレクトリ自体の所有権を user に変更する
20
+ # これがないと、user はこのフォルダの中に新しいファイル(db.sqlite3)を作れません
21
  RUN chown -R user:user /code
22
+ # --------------------
23
 
24
+ # ここからユーザーを切り替え
25
  USER user
26
+
27
  ENV HOME=/home/user \
28
  PATH=/home/user/.local/bin:$PATH
29
 
30
+ # プロジェクトコードを user 権限でコピー
31
  COPY --chown=user . /code
32
 
33
+ # 起動コマンド(migrate -> runserver)
34
  CMD ["sh", "-c", "python manage.py migrate && python manage.py runserver 0.0.0.0:7860"]
README_2.md CHANGED
@@ -67,26 +67,14 @@ docker run -p 7860:7860 my-django-bot
67
 
68
  ```
69
 
70
- view とか書き換えただけの時?毎回ダウンロードいらないようです
71
 
72
  ```
73
  docker run -p 7860:7860 \
74
- -v ~/.cache/huggingface:/code/cache \
75
  my-django-bot
76
  ```
77
 
78
- docker で動かしているときは
79
-
80
- ```
81
- http://localhost:7860/api/chat/ にアクセスして
82
- {"text": "こんにちは!元気ですか?"}
83
- # こんなかんじで書いてpostをクリックすると返信が返ってくる
84
-
85
-
86
- ターミナルで以下を打ち込んでもよい
87
- curl -X POST http://localhost:7860/api/chat/ -H "Content-Type: application/json" -d '{"text": "こんにちは!元気ですか?"}'
88
- ```
89
-
90
  ## 容量解放
91
 
92
  ```
 
67
 
68
  ```
69
 
70
+ ## view とか書き換えただけの時?毎回ダウンロードいらないようです
71
 
72
  ```
73
  docker run -p 7860:7860 \
74
+ -v ~/.cache/huggingface:/root/.cache/huggingface \
75
  my-django-bot
76
  ```
77
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  ## 容量解放
79
 
80
  ```
config/settings.py CHANGED
@@ -25,8 +25,8 @@ SECRET_KEY = os.environ.get("SECRET_KEY", "dev-key")
25
 
26
 
27
  # SECURITY WARNING: don't run with debug turned on in production!
28
- DEBUG = True
29
- # "True"
30
  ALLOWED_HOSTS = ["huggingface.co", ".hf.space", "localhost", "127.0.0.1"]
31
 
32
 
 
25
 
26
 
27
  # SECURITY WARNING: don't run with debug turned on in production!
28
+ DEBUG = False
29
+ "True"
30
  ALLOWED_HOSTS = ["huggingface.co", ".hf.space", "localhost", "127.0.0.1"]
31
 
32
 
ml_api/apps.py CHANGED
@@ -1,3 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from django.apps import AppConfig
2
 
3
 
 
1
+ # class MlApiConfig(AppConfig):
2
+ # default_auto_field = "django.db.models.BigAutoField"
3
+ # name = "ml_api"
4
+ # tokenizer = None
5
+ # model = None
6
+ # def ready(self):
7
+ # if MlApiConfig.model is None:
8
+ # model_name = "rinna/japanese-gpt-neox-small"
9
+ # print("Loading Model... (Download starts on first run)")
10
+ # # トークナイザーとモデルのロード
11
+ # MlApiConfig.tokenizer = AutoTokenizer.from_pretrained(
12
+ # model_name, use_fast=False
13
+ # )
14
+ # MlApiConfig.model = AutoModelForCausalLM.from_pretrained(model_name)
15
+ # print("Model Loaded!")
16
  from django.apps import AppConfig
17
 
18
 
ml_api/model_loader.py CHANGED
@@ -1,31 +1,36 @@
1
- from huggingface_hub import hf_hub_download
2
- from llama_cpp import Llama
3
 
4
- # グローバル変数
5
- _llm = None
 
6
 
7
 
8
  def get_model():
9
- global _llm
10
 
11
- if _llm is None:
12
- print("Loading Qwen2.5-7B-Instruct (GGUF)...")
 
 
13
 
14
- # 1. Hugging FaceからGGUFファイルをダウンロード(キャッシュされます)
15
- model_path = hf_hub_download(
16
- repo_id="bartowski/Qwen2.5-7B-Instruct-GGUF",
17
- filename="Qwen2.5-7B-Instruct-Q4_K_M.gguf",
18
- )
 
 
 
 
 
 
19
 
20
- # 2. llama.cppエンジンの初期化
21
- # n_ctx: コンテキスト長(会話の記憶量)。Free枠なら4096-8192くらいが無難
22
- # n_threads: SpacesのFree枠は2vCPUなので 2 に設定
23
- _llm = Llama(
24
- model_path=model_path,
25
- n_ctx=4096,
26
- n_threads=2,
27
- verbose=True, # ログを表示
28
  )
29
- print("Model Loaded!")
30
 
31
- return _llm
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
 
4
+ # グローバル変数として保持
5
+ _model = None
6
+ _tokenizer = None
7
 
8
 
9
  def get_model():
10
+ global _model, _tokenizer
11
 
12
+ if _model is None:
13
+ print("Loading Qwen2.5-3B-Instruct... (Lazy Loading)")
14
+ model_name = "Qwen/Qwen2.5-3B-Instruct"
15
+ # model_name = "Qwen/Qwen2.5-3B-Instruct" # 遅い
16
 
17
+ _tokenizer = AutoTokenizer.from_pretrained(model_name)
18
+
19
+ # --- 修正箇所: データ型の決定ロジックを安全にする ---
20
+ dtype = torch.float32 # 基本は float32 (約12GB使用、16GBメモリなら入るはず)
21
+
22
+ if torch.cuda.is_available():
23
+ dtype = torch.bfloat16
24
+ # torch.cpu に is_bf16_supported があるか確認してから使う
25
+ elif hasattr(torch.cpu, "is_bf16_supported") and torch.cpu.is_bf16_supported():
26
+ dtype = torch.bfloat16
27
+ # -----------------------------------------------
28
 
29
+ _model = AutoModelForCausalLM.from_pretrained(
30
+ model_name,
31
+ torch_dtype=dtype,
32
+ trust_remote_code=True,
 
 
 
 
33
  )
34
+ print(f"Model Loaded! (dtype: {dtype})")
35
 
36
+ return _model, _tokenizer
ml_api/views.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from rest_framework.response import Response
2
  from rest_framework.views import APIView
3
 
@@ -8,29 +9,77 @@ class ChatView(APIView):
8
  def post(self, request):
9
  user_input = request.data.get("text", "")
10
 
11
- # モデル取得 (llama_cpp.Llama インスタンス)
12
- llm = get_model()
13
 
14
- # 1. 会話履歴の作成
15
  messages = [
16
  {
17
  "role": "system",
18
- "content": "あなたは親切なAIqwen」です。日本語で簡潔に答えてください。",
19
  },
20
  {"role": "user", "content": user_input},
21
  ]
22
 
23
- # 2. 推論実行 (create_chat_completionを使う)
24
- # これはトークナイズ→生成→デコードを一発でやってくれます
25
- output = llm.create_chat_completion(
26
- messages=messages,
27
- max_tokens=256, # 生成する最大トークン数
28
- temperature=0.7, # 創造性
29
- top_p=0.9,
30
  )
31
 
32
- # 3. レスポンスの抽出
33
- # OpenAI APIと同じ形式で返ってきます
34
- response_text = output["choices"][0]["message"]["content"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  return Response({"result": response_text})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
  from rest_framework.response import Response
3
  from rest_framework.views import APIView
4
 
 
9
  def post(self, request):
10
  user_input = request.data.get("text", "")
11
 
12
+ # ここで呼び出す(初回のみロードが走る)
13
+ model, tokenizer = get_model()
14
 
15
+ # 1. 会話フォーマットの作成
16
  messages = [
17
  {
18
  "role": "system",
19
+ "content": "あなたは親切でフレンドリーなAIアシスタントです。「qwen」と呼ばれています。自然な日本語で簡潔に返事をしてください。",
20
  },
21
  {"role": "user", "content": user_input},
22
  ]
23
 
24
+ # 2. プロンプトへの変換
25
+ text = tokenizer.apply_chat_template(
26
+ messages, tokenize=False, add_generation_prompt=True
 
 
 
 
27
  )
28
 
29
+ inputs = tokenizer([text], return_tensors="pt").to(model.device)
30
+
31
+ # 3. 生成
32
+ with torch.no_grad():
33
+ generated_ids = model.generate(
34
+ **inputs,
35
+ max_new_tokens=128,
36
+ do_sample=True,
37
+ temperature=0.7,
38
+ top_p=0.9,
39
+ )
40
+
41
+ # 4. デコード
42
+ generated_ids = [
43
+ output_ids[len(input_ids) :]
44
+ for input_ids, output_ids in zip(inputs.input_ids, generated_ids)
45
+ ]
46
+ response_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[
47
+ 0
48
+ ]
49
 
50
  return Response({"result": response_text})
51
+
52
+
53
+ # class ChatView(APIView):
54
+ # def post(self, request):
55
+ # input_text = request.data.get("text", "")
56
+
57
+ # # 簡易的なプロンプトエンジニアリング
58
+ # # モデルに「会話」であることを認識させるフォーマット
59
+ # prompt = f"ユーザー: {input_text}\nシステム: "
60
+
61
+ # app_config = apps.get_app_config("ml_api")
62
+ # tokenizer = app_config.tokenizer
63
+ # model = app_config.model
64
+
65
+ # # トークン化
66
+ # inputs = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
67
+
68
+ # # 生成
69
+ # with torch.no_grad():
70
+ # output_ids = model.generate(
71
+ # inputs["input_ids"],
72
+ # max_new_tokens=50, # 返信の長さ
73
+ # do_sample=True,
74
+ # temperature=0.7, # 創造性(高いほどランダム)
75
+ # pad_token_id=tokenizer.pad_token_id,
76
+ # eos_token_id=tokenizer.eos_token_id,
77
+ # )
78
+
79
+ # # デコード
80
+ # output = tokenizer.decode(output_ids.tolist()[0])
81
+
82
+ # # プロンプト部分を除去して返信部分だけ抽出
83
+ # response_text = output.split("システム: ")[-1].strip()
84
+
85
+ # return Response({"result": response_text})
pyproject.toml CHANGED
@@ -5,13 +5,11 @@ description = "Add your description here"
5
  readme = "README.md"
6
  requires-python = ">=3.12"
7
  dependencies = [
 
8
  "django>=6.0",
9
  "djangorestframework>=3.16.1",
10
- "huggingface-hub>=1.2.1",
11
- "llama-cpp-python>=0.3.16",
12
- ]
13
-
14
- [dependency-groups]
15
- dev = [
16
- "ty>=0.0.1a32",
17
  ]
 
5
  readme = "README.md"
6
  requires-python = ">=3.12"
7
  dependencies = [
8
+ "accelerate>=1.12.0",
9
  "django>=6.0",
10
  "djangorestframework>=3.16.1",
11
+ "protobuf>=6.33.2",
12
+ "sentencepiece>=0.2.1",
13
+ "torch>=2.9.1",
14
+ "transformers>=4.57.3",
 
 
 
15
  ]
requirements.txt CHANGED
@@ -1,17 +1,13 @@
1
  # This file was autogenerated by uv via the following command:
2
  # uv pip compile pyproject.toml -o requirements.txt
3
- anyio==4.12.0
4
- # via httpx
5
  asgiref==3.11.0
6
  # via django
7
  certifi==2025.11.12
8
- # via
9
- # httpcore
10
- # httpx
11
- click==8.3.1
12
- # via typer-slim
13
- diskcache==5.6.3
14
- # via llama-cpp-python
15
  django==6.0
16
  # via
17
  # django-ai-chat (pyproject.toml)
@@ -19,46 +15,123 @@ django==6.0
19
  djangorestframework==3.16.1
20
  # via django-ai-chat (pyproject.toml)
21
  filelock==3.20.0
22
- # via huggingface-hub
 
 
 
23
  fsspec==2025.12.0
24
- # via huggingface-hub
25
- h11==0.16.0
26
- # via httpcore
27
  hf-xet==1.2.0
28
  # via huggingface-hub
29
- httpcore==1.0.9
30
- # via httpx
31
- httpx==0.28.1
32
- # via huggingface-hub
33
- huggingface-hub==1.2.1
34
- # via django-ai-chat (pyproject.toml)
35
- idna==3.11
36
  # via
37
- # anyio
38
- # httpx
 
 
 
39
  jinja2==3.1.6
40
- # via llama-cpp-python
41
- llama-cpp-python==0.3.16
42
- # via django-ai-chat (pyproject.toml)
43
  markupsafe==3.0.3
44
  # via jinja2
 
 
 
 
45
  numpy==2.3.5
46
- # via llama-cpp-python
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  packaging==25.0
48
- # via huggingface-hub
 
 
 
 
 
 
 
49
  pyyaml==6.0.3
50
- # via huggingface-hub
51
- shellingham==1.5.4
52
- # via huggingface-hub
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  sqlparse==0.5.4
54
  # via django
 
 
 
 
 
 
 
 
55
  tqdm==4.67.1
56
- # via huggingface-hub
57
- typer-slim==0.20.0
58
- # via huggingface-hub
 
 
 
 
59
  typing-extensions==4.15.0
60
  # via
61
- # anyio
62
  # huggingface-hub
63
- # llama-cpp-python
64
- # typer-slim
 
 
1
  # This file was autogenerated by uv via the following command:
2
  # uv pip compile pyproject.toml -o requirements.txt
3
+ accelerate==1.12.0
4
+ # via django-ai-chat (pyproject.toml)
5
  asgiref==3.11.0
6
  # via django
7
  certifi==2025.11.12
8
+ # via requests
9
+ charset-normalizer==3.4.4
10
+ # via requests
 
 
 
 
11
  django==6.0
12
  # via
13
  # django-ai-chat (pyproject.toml)
 
15
  djangorestframework==3.16.1
16
  # via django-ai-chat (pyproject.toml)
17
  filelock==3.20.0
18
+ # via
19
+ # huggingface-hub
20
+ # torch
21
+ # transformers
22
  fsspec==2025.12.0
23
+ # via
24
+ # huggingface-hub
25
+ # torch
26
  hf-xet==1.2.0
27
  # via huggingface-hub
28
+ huggingface-hub==0.36.0
 
 
 
 
 
 
29
  # via
30
+ # accelerate
31
+ # tokenizers
32
+ # transformers
33
+ idna==3.11
34
+ # via requests
35
  jinja2==3.1.6
36
+ # via torch
 
 
37
  markupsafe==3.0.3
38
  # via jinja2
39
+ mpmath==1.3.0
40
+ # via sympy
41
+ networkx==3.6
42
+ # via torch
43
  numpy==2.3.5
44
+ # via
45
+ # accelerate
46
+ # transformers
47
+ nvidia-cublas-cu12==12.8.4.1
48
+ # via
49
+ # nvidia-cudnn-cu12
50
+ # nvidia-cusolver-cu12
51
+ # torch
52
+ nvidia-cuda-cupti-cu12==12.8.90
53
+ # via torch
54
+ nvidia-cuda-nvrtc-cu12==12.8.93
55
+ # via torch
56
+ nvidia-cuda-runtime-cu12==12.8.90
57
+ # via torch
58
+ nvidia-cudnn-cu12==9.10.2.21
59
+ # via torch
60
+ nvidia-cufft-cu12==11.3.3.83
61
+ # via torch
62
+ nvidia-cufile-cu12==1.13.1.3
63
+ # via torch
64
+ nvidia-curand-cu12==10.3.9.90
65
+ # via torch
66
+ nvidia-cusolver-cu12==11.7.3.90
67
+ # via torch
68
+ nvidia-cusparse-cu12==12.5.8.93
69
+ # via
70
+ # nvidia-cusolver-cu12
71
+ # torch
72
+ nvidia-cusparselt-cu12==0.7.1
73
+ # via torch
74
+ nvidia-nccl-cu12==2.27.5
75
+ # via torch
76
+ nvidia-nvjitlink-cu12==12.8.93
77
+ # via
78
+ # nvidia-cufft-cu12
79
+ # nvidia-cusolver-cu12
80
+ # nvidia-cusparse-cu12
81
+ # torch
82
+ nvidia-nvshmem-cu12==3.3.20
83
+ # via torch
84
+ nvidia-nvtx-cu12==12.8.90
85
+ # via torch
86
  packaging==25.0
87
+ # via
88
+ # accelerate
89
+ # huggingface-hub
90
+ # transformers
91
+ protobuf==6.33.2
92
+ # via django-ai-chat (pyproject.toml)
93
+ psutil==7.1.3
94
+ # via accelerate
95
  pyyaml==6.0.3
96
+ # via
97
+ # accelerate
98
+ # huggingface-hub
99
+ # transformers
100
+ regex==2025.11.3
101
+ # via transformers
102
+ requests==2.32.5
103
+ # via
104
+ # huggingface-hub
105
+ # transformers
106
+ safetensors==0.7.0
107
+ # via
108
+ # accelerate
109
+ # transformers
110
+ sentencepiece==0.2.1
111
+ # via django-ai-chat (pyproject.toml)
112
+ setuptools==80.9.0
113
+ # via torch
114
  sqlparse==0.5.4
115
  # via django
116
+ sympy==1.14.0
117
+ # via torch
118
+ tokenizers==0.22.1
119
+ # via transformers
120
+ torch==2.9.1
121
+ # via
122
+ # django-ai-chat (pyproject.toml)
123
+ # accelerate
124
  tqdm==4.67.1
125
+ # via
126
+ # huggingface-hub
127
+ # transformers
128
+ transformers==4.57.3
129
+ # via django-ai-chat (pyproject.toml)
130
+ triton==3.5.1
131
+ # via torch
132
  typing-extensions==4.15.0
133
  # via
 
134
  # huggingface-hub
135
+ # torch
136
+ urllib3==2.6.0
137
+ # via requests
uv.lock CHANGED
The diff for this file is too large to render. See raw diff