import torch from transformers import AutoModelForCausalLM, AutoTokenizer # グローバル変数として保持 _model = None _tokenizer = None def get_model(): global _model, _tokenizer if _model is None: print("Loading Qwen2.5-3B-Instruct... (Lazy Loading)") model_name = "Qwen/Qwen2.5-3B-Instruct" # model_name = "Qwen/Qwen2.5-1.5B-Instruct" # はやい _tokenizer = AutoTokenizer.from_pretrained(model_name) # --- 修正箇所: データ型の決定ロジックを安全にする --- dtype = torch.float32 # 基本は float32 (約12GB使用、16GBメモリなら入るはず) if torch.cuda.is_available(): dtype = torch.bfloat16 # torch.cpu に is_bf16_supported があるか確認してから使う elif hasattr(torch.cpu, "is_bf16_supported") and torch.cpu.is_bf16_supported(): dtype = torch.bfloat16 # ----------------------------------------------- _model = AutoModelForCausalLM.from_pretrained( model_name, dtype=dtype, trust_remote_code=True, ) print(f"Model Loaded! (dtype: {dtype})") return _model, _tokenizer