DeepSpeedEngine( (module): Qwen2_5_VLForConditionalGeneration( (visual): Qwen2_5_VisionTransformerPretrainedModel( (patch_embed): Qwen2_5_VisionPatchEmbed( (proj): Conv3d(3, 1280, kernel_size=(2, 14, 14), stride=(2, 14, 14), bias=False) ) (rotary_pos_emb): Qwen2_5_VisionRotaryEmbedding() (blocks): ModuleList( (0-15): 16 x Qwen2_5_VLVisionBlock( (norm1): Qwen2RMSNorm((0,), eps=1e-06) (norm2): Qwen2RMSNorm((0,), eps=1e-06) (attn): Qwen2_5_VLVisionSdpaAttention( (qkv): Linear(in_features=1280, out_features=3840, bias=True) (proj): Linear(in_features=1280, out_features=1280, bias=True) ) (mlp): Qwen2_5_VLMLP( (gate_proj): Linear(in_features=1280, out_features=3420, bias=True) (up_proj): Linear(in_features=1280, out_features=3420, bias=True) (down_proj): Linear(in_features=3420, out_features=1280, bias=True) (act_fn): SiLU() ) ) ) (merger): Qwen2_5_VLPatchMerger( (ln_q): Qwen2RMSNorm((0,), eps=1e-06) (mlp): Sequential( (0): Linear(in_features=5120, out_features=5120, bias=True) (1): GELU(approximate='none') (2): Linear(in_features=5120, out_features=2048, bias=True) ) ) ) (model): Qwen2_5_VLModel( (embed_tokens): Embedding(151936, 2048) (layers): ModuleList( (0-7): 8 x Qwen2_5_VLDecoderLayer( (self_attn): Qwen2_5_VLSdpaAttention( (q_proj): Linear(in_features=2048, out_features=2048, bias=True) (k_proj): Linear(in_features=2048, out_features=256, bias=True) (v_proj): Linear(in_features=2048, out_features=256, bias=True) (o_proj): Linear(in_features=2048, out_features=2048, bias=False) (rotary_emb): Qwen2_5_VLRotaryEmbedding() ) (mlp): Qwen2MLP( (gate_proj): Linear(in_features=2048, out_features=11008, bias=False) (up_proj): Linear(in_features=2048, out_features=11008, bias=False) (down_proj): Linear(in_features=11008, out_features=2048, bias=False) (act_fn): SiLU() ) (input_layernorm): Qwen2RMSNorm((0,), eps=1e-06) (post_attention_layernorm): Qwen2RMSNorm((0,), eps=1e-06) ) ) (norm): Qwen2RMSNorm((0,), eps=1e-06) (rotary_emb): Qwen2_5_VLRotaryEmbedding() ) (lm_head): Linear(in_features=2048, out_features=151936, bias=False) ) )