| { | |
| "architectures": [ | |
| "DreamLLMForCausalMLM" | |
| ], | |
| "attention_bias": false, | |
| "bos_token_id": 0, | |
| "eos_token_id": 1, | |
| "hidden_act": "silu", | |
| "hidden_size": 4096, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 11008, | |
| "log_attentions": false, | |
| "log_hidden_states": false, | |
| "loss_scale_schedule": "none", | |
| "loss_weight_lm": 1.0, | |
| "loss_weight_vm": 10.0, | |
| "max_position_embeddings": 2048, | |
| "max_sequence_length": 2048, | |
| "model_type": "dreamllm", | |
| "num_attention_heads": 32, | |
| "num_hidden_layers": 32, | |
| "num_key_value_heads": 32, | |
| "pad_token_id": -1, | |
| "plugins_init_kwargs": { | |
| "clip_vision_embedding": { | |
| "_target_": "omni.models.dreamllm.modeling_plugins.CLIPVisionEmbedding", | |
| "clip_vision_model_name_or_path": "openai/clip-vit-large-patch14", | |
| "embed_hidden_size": 4096, | |
| "freeze_clip_vision_model": true, | |
| "freeze_embedding_layers": true, | |
| "freeze_projector": false, | |
| "local_files_only": false, | |
| "pretrained_model_name_or_path": "none", | |
| "projector_depth": 1, | |
| "projector_type": "linear", | |
| "select_layer": -2, | |
| "use_additional_post_layernorm": false | |
| }, | |
| "dream_embedding": { | |
| "_target_": "omni.models.dreamllm.modeling_plugins.DreamEmbedding", | |
| "embed_hidden_size": 4096, | |
| "freeze_dream_queries": false, | |
| "num_dream_queries": 64, | |
| "pretrained_model_name_or_path": "none" | |
| }, | |
| "stable_diffusion_head": { | |
| "_target_": "omni.models.dreamllm.modeling_plugins.StableDiffusionHead", | |
| "diffusion_name_or_path": "stabilityai/stable-diffusion-2-1-base", | |
| "embed_hidden_size": 4096, | |
| "freeze_projector": false, | |
| "freeze_unet": true, | |
| "freeze_vae": true, | |
| "local_files_only": false, | |
| "pretrained_model_name_or_path": "none", | |
| "projector_depth": 1, | |
| "projector_type": "linear" | |
| } | |
| }, | |
| "plugins_type": { | |
| "clip_vision_embedding": "embedding", | |
| "dream_embedding": "embedding", | |
| "stable_diffusion_head": "head" | |
| }, | |
| "pretraining_tp": 1, | |
| "rms_norm_eps": 1e-06, | |
| "rope_scaling": null, | |
| "rope_theta": 10000.0, | |
| "special_tokens2ids_dict": { | |
| "</s>": 2, | |
| "<s>": 1, | |
| "<unk>": 0, | |
| "[PAD]": 32000, | |
| "additional_special_tokens": { | |
| "<dream>": 32007, | |
| "<dream_end>": 32002, | |
| "<dream_start>": 32003, | |
| "<im_end>": 32005, | |
| "<im_patch>": 32004, | |
| "<im_start>": 32001, | |
| "<image>": 32006 | |
| } | |
| }, | |
| "tie_word_embeddings": false, | |
| "torch_dtype": "bfloat16", | |
| "transformers_version": "4.35.2", | |
| "use_cache": true, | |
| "vocab_size": 32008 | |
| } | |