|
- ===============Pytorch weight Begin===================
- patch_embed.proj.weight torch.Size([128, 3, 4, 4]) torch.float32
- patch_embed.proj.bias torch.Size([128]) torch.float32
- patch_embed.norm.weight torch.Size([128]) torch.float32
- patch_embed.norm.bias torch.Size([128]) torch.float32
- layers.0.blocks.0.norm1.weight torch.Size([128]) torch.float32
- layers.0.blocks.0.norm1.bias torch.Size([128]) torch.float32
- layers.0.blocks.0.attn.logit_scale torch.Size([4, 1, 1]) torch.float32
- layers.0.blocks.0.attn.q_bias torch.Size([128]) torch.float32
- layers.0.blocks.0.attn.v_bias torch.Size([128]) torch.float32
- layers.0.blocks.0.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.0.blocks.0.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.0.blocks.0.attn.qkv.weight torch.Size([384, 128]) torch.float32
- layers.0.blocks.0.attn.proj.weight torch.Size([128, 128]) torch.float32
- layers.0.blocks.0.attn.proj.bias torch.Size([128]) torch.float32
- layers.0.blocks.0.norm2.weight torch.Size([128]) torch.float32
- layers.0.blocks.0.norm2.bias torch.Size([128]) torch.float32
- layers.0.blocks.0.mlp.fc1.weight torch.Size([512, 128]) torch.float32
- layers.0.blocks.0.mlp.fc1.bias torch.Size([512]) torch.float32
- layers.0.blocks.0.mlp.fc2.weight torch.Size([128, 512]) torch.float32
- layers.0.blocks.0.mlp.fc2.bias torch.Size([128]) torch.float32
- layers.0.blocks.1.attn_mask torch.Size([64, 64, 64]) torch.float32
- layers.0.blocks.1.norm1.weight torch.Size([128]) torch.float32
- layers.0.blocks.1.norm1.bias torch.Size([128]) torch.float32
- layers.0.blocks.1.attn.logit_scale torch.Size([4, 1, 1]) torch.float32
- layers.0.blocks.1.attn.q_bias torch.Size([128]) torch.float32
- layers.0.blocks.1.attn.v_bias torch.Size([128]) torch.float32
- layers.0.blocks.1.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.0.blocks.1.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.0.blocks.1.attn.qkv.weight torch.Size([384, 128]) torch.float32
- layers.0.blocks.1.attn.proj.weight torch.Size([128, 128]) torch.float32
- layers.0.blocks.1.attn.proj.bias torch.Size([128]) torch.float32
- layers.0.blocks.1.norm2.weight torch.Size([128]) torch.float32
- layers.0.blocks.1.norm2.bias torch.Size([128]) torch.float32
- layers.0.blocks.1.mlp.fc1.weight torch.Size([512, 128]) torch.float32
- layers.0.blocks.1.mlp.fc1.bias torch.Size([512]) torch.float32
- layers.0.blocks.1.mlp.fc2.weight torch.Size([128, 512]) torch.float32
- layers.0.blocks.1.mlp.fc2.bias torch.Size([128]) torch.float32
- layers.0.downsample.reduction.weight torch.Size([256, 512]) torch.float32
- layers.0.downsample.norm.weight torch.Size([256]) torch.float32
- layers.0.downsample.norm.bias torch.Size([256]) torch.float32
- layers.1.blocks.0.norm1.weight torch.Size([256]) torch.float32
- layers.1.blocks.0.norm1.bias torch.Size([256]) torch.float32
- layers.1.blocks.0.attn.logit_scale torch.Size([8, 1, 1]) torch.float32
- layers.1.blocks.0.attn.q_bias torch.Size([256]) torch.float32
- layers.1.blocks.0.attn.v_bias torch.Size([256]) torch.float32
- layers.1.blocks.0.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.1.blocks.0.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.1.blocks.0.attn.qkv.weight torch.Size([768, 256]) torch.float32
- layers.1.blocks.0.attn.proj.weight torch.Size([256, 256]) torch.float32
- layers.1.blocks.0.attn.proj.bias torch.Size([256]) torch.float32
- layers.1.blocks.0.norm2.weight torch.Size([256]) torch.float32
- layers.1.blocks.0.norm2.bias torch.Size([256]) torch.float32
- layers.1.blocks.0.mlp.fc1.weight torch.Size([1024, 256]) torch.float32
- layers.1.blocks.0.mlp.fc1.bias torch.Size([1024]) torch.float32
- layers.1.blocks.0.mlp.fc2.weight torch.Size([256, 1024]) torch.float32
- layers.1.blocks.0.mlp.fc2.bias torch.Size([256]) torch.float32
- layers.1.blocks.1.attn_mask torch.Size([16, 64, 64]) torch.float32
- layers.1.blocks.1.norm1.weight torch.Size([256]) torch.float32
- layers.1.blocks.1.norm1.bias torch.Size([256]) torch.float32
- layers.1.blocks.1.attn.logit_scale torch.Size([8, 1, 1]) torch.float32
- layers.1.blocks.1.attn.q_bias torch.Size([256]) torch.float32
- layers.1.blocks.1.attn.v_bias torch.Size([256]) torch.float32
- layers.1.blocks.1.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.1.blocks.1.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.1.blocks.1.attn.qkv.weight torch.Size([768, 256]) torch.float32
- layers.1.blocks.1.attn.proj.weight torch.Size([256, 256]) torch.float32
- layers.1.blocks.1.attn.proj.bias torch.Size([256]) torch.float32
- layers.1.blocks.1.norm2.weight torch.Size([256]) torch.float32
- layers.1.blocks.1.norm2.bias torch.Size([256]) torch.float32
- layers.1.blocks.1.mlp.fc1.weight torch.Size([1024, 256]) torch.float32
- layers.1.blocks.1.mlp.fc1.bias torch.Size([1024]) torch.float32
- layers.1.blocks.1.mlp.fc2.weight torch.Size([256, 1024]) torch.float32
- layers.1.blocks.1.mlp.fc2.bias torch.Size([256]) torch.float32
- layers.1.downsample.reduction.weight torch.Size([512, 1024]) torch.float32
- layers.1.downsample.norm.weight torch.Size([512]) torch.float32
- layers.1.downsample.norm.bias torch.Size([512]) torch.float32
- layers.2.blocks.0.norm1.weight torch.Size([512]) torch.float32
- layers.2.blocks.0.norm1.bias torch.Size([512]) torch.float32
- layers.2.blocks.0.attn.logit_scale torch.Size([16, 1, 1]) torch.float32
- layers.2.blocks.0.attn.q_bias torch.Size([512]) torch.float32
- layers.2.blocks.0.attn.v_bias torch.Size([512]) torch.float32
- layers.2.blocks.0.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.2.blocks.0.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.2.blocks.0.attn.qkv.weight torch.Size([1536, 512]) torch.float32
- layers.2.blocks.0.attn.proj.weight torch.Size([512, 512]) torch.float32
- layers.2.blocks.0.attn.proj.bias torch.Size([512]) torch.float32
- layers.2.blocks.0.norm2.weight torch.Size([512]) torch.float32
- layers.2.blocks.0.norm2.bias torch.Size([512]) torch.float32
- layers.2.blocks.0.mlp.fc1.weight torch.Size([2048, 512]) torch.float32
- layers.2.blocks.0.mlp.fc1.bias torch.Size([2048]) torch.float32
- layers.2.blocks.0.mlp.fc2.weight torch.Size([512, 2048]) torch.float32
- layers.2.blocks.0.mlp.fc2.bias torch.Size([512]) torch.float32
- layers.2.blocks.1.attn_mask torch.Size([4, 64, 64]) torch.float32
- layers.2.blocks.1.norm1.weight torch.Size([512]) torch.float32
- layers.2.blocks.1.norm1.bias torch.Size([512]) torch.float32
- layers.2.blocks.1.attn.logit_scale torch.Size([16, 1, 1]) torch.float32
- layers.2.blocks.1.attn.q_bias torch.Size([512]) torch.float32
- layers.2.blocks.1.attn.v_bias torch.Size([512]) torch.float32
- layers.2.blocks.1.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.2.blocks.1.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.2.blocks.1.attn.qkv.weight torch.Size([1536, 512]) torch.float32
- layers.2.blocks.1.attn.proj.weight torch.Size([512, 512]) torch.float32
- layers.2.blocks.1.attn.proj.bias torch.Size([512]) torch.float32
- layers.2.blocks.1.norm2.weight torch.Size([512]) torch.float32
- layers.2.blocks.1.norm2.bias torch.Size([512]) torch.float32
- layers.2.blocks.1.mlp.fc1.weight torch.Size([2048, 512]) torch.float32
- layers.2.blocks.1.mlp.fc1.bias torch.Size([2048]) torch.float32
- layers.2.blocks.1.mlp.fc2.weight torch.Size([512, 2048]) torch.float32
- layers.2.blocks.1.mlp.fc2.bias torch.Size([512]) torch.float32
- layers.2.blocks.2.norm1.weight torch.Size([512]) torch.float32
- layers.2.blocks.2.norm1.bias torch.Size([512]) torch.float32
- layers.2.blocks.2.attn.logit_scale torch.Size([16, 1, 1]) torch.float32
- layers.2.blocks.2.attn.q_bias torch.Size([512]) torch.float32
- layers.2.blocks.2.attn.v_bias torch.Size([512]) torch.float32
- layers.2.blocks.2.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.2.blocks.2.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.2.blocks.2.attn.qkv.weight torch.Size([1536, 512]) torch.float32
- layers.2.blocks.2.attn.proj.weight torch.Size([512, 512]) torch.float32
- layers.2.blocks.2.attn.proj.bias torch.Size([512]) torch.float32
- layers.2.blocks.2.norm2.weight torch.Size([512]) torch.float32
- layers.2.blocks.2.norm2.bias torch.Size([512]) torch.float32
- layers.2.blocks.2.mlp.fc1.weight torch.Size([2048, 512]) torch.float32
- layers.2.blocks.2.mlp.fc1.bias torch.Size([2048]) torch.float32
- layers.2.blocks.2.mlp.fc2.weight torch.Size([512, 2048]) torch.float32
- layers.2.blocks.2.mlp.fc2.bias torch.Size([512]) torch.float32
- layers.2.blocks.3.attn_mask torch.Size([4, 64, 64]) torch.float32
- layers.2.blocks.3.norm1.weight torch.Size([512]) torch.float32
- layers.2.blocks.3.norm1.bias torch.Size([512]) torch.float32
- layers.2.blocks.3.attn.logit_scale torch.Size([16, 1, 1]) torch.float32
- layers.2.blocks.3.attn.q_bias torch.Size([512]) torch.float32
- layers.2.blocks.3.attn.v_bias torch.Size([512]) torch.float32
- layers.2.blocks.3.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.2.blocks.3.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.2.blocks.3.attn.qkv.weight torch.Size([1536, 512]) torch.float32
- layers.2.blocks.3.attn.proj.weight torch.Size([512, 512]) torch.float32
- layers.2.blocks.3.attn.proj.bias torch.Size([512]) torch.float32
- layers.2.blocks.3.norm2.weight torch.Size([512]) torch.float32
- layers.2.blocks.3.norm2.bias torch.Size([512]) torch.float32
- layers.2.blocks.3.mlp.fc1.weight torch.Size([2048, 512]) torch.float32
- layers.2.blocks.3.mlp.fc1.bias torch.Size([2048]) torch.float32
- layers.2.blocks.3.mlp.fc2.weight torch.Size([512, 2048]) torch.float32
- layers.2.blocks.3.mlp.fc2.bias torch.Size([512]) torch.float32
- layers.2.blocks.4.norm1.weight torch.Size([512]) torch.float32
- layers.2.blocks.4.norm1.bias torch.Size([512]) torch.float32
- layers.2.blocks.4.attn.logit_scale torch.Size([16, 1, 1]) torch.float32
- layers.2.blocks.4.attn.q_bias torch.Size([512]) torch.float32
- layers.2.blocks.4.attn.v_bias torch.Size([512]) torch.float32
- layers.2.blocks.4.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.2.blocks.4.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.2.blocks.4.attn.qkv.weight torch.Size([1536, 512]) torch.float32
- layers.2.blocks.4.attn.proj.weight torch.Size([512, 512]) torch.float32
- layers.2.blocks.4.attn.proj.bias torch.Size([512]) torch.float32
- layers.2.blocks.4.norm2.weight torch.Size([512]) torch.float32
- layers.2.blocks.4.norm2.bias torch.Size([512]) torch.float32
- layers.2.blocks.4.mlp.fc1.weight torch.Size([2048, 512]) torch.float32
- layers.2.blocks.4.mlp.fc1.bias torch.Size([2048]) torch.float32
- layers.2.blocks.4.mlp.fc2.weight torch.Size([512, 2048]) torch.float32
- layers.2.blocks.4.mlp.fc2.bias torch.Size([512]) torch.float32
- layers.2.blocks.5.attn_mask torch.Size([4, 64, 64]) torch.float32
- layers.2.blocks.5.norm1.weight torch.Size([512]) torch.float32
- layers.2.blocks.5.norm1.bias torch.Size([512]) torch.float32
- layers.2.blocks.5.attn.logit_scale torch.Size([16, 1, 1]) torch.float32
- layers.2.blocks.5.attn.q_bias torch.Size([512]) torch.float32
- layers.2.blocks.5.attn.v_bias torch.Size([512]) torch.float32
- layers.2.blocks.5.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.2.blocks.5.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.2.blocks.5.attn.qkv.weight torch.Size([1536, 512]) torch.float32
- layers.2.blocks.5.attn.proj.weight torch.Size([512, 512]) torch.float32
- layers.2.blocks.5.attn.proj.bias torch.Size([512]) torch.float32
- layers.2.blocks.5.norm2.weight torch.Size([512]) torch.float32
- layers.2.blocks.5.norm2.bias torch.Size([512]) torch.float32
- layers.2.blocks.5.mlp.fc1.weight torch.Size([2048, 512]) torch.float32
- layers.2.blocks.5.mlp.fc1.bias torch.Size([2048]) torch.float32
- layers.2.blocks.5.mlp.fc2.weight torch.Size([512, 2048]) torch.float32
- layers.2.blocks.5.mlp.fc2.bias torch.Size([512]) torch.float32
- layers.2.blocks.6.norm1.weight torch.Size([512]) torch.float32
- layers.2.blocks.6.norm1.bias torch.Size([512]) torch.float32
- layers.2.blocks.6.attn.logit_scale torch.Size([16, 1, 1]) torch.float32
- layers.2.blocks.6.attn.q_bias torch.Size([512]) torch.float32
- layers.2.blocks.6.attn.v_bias torch.Size([512]) torch.float32
- layers.2.blocks.6.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.2.blocks.6.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.2.blocks.6.attn.qkv.weight torch.Size([1536, 512]) torch.float32
- layers.2.blocks.6.attn.proj.weight torch.Size([512, 512]) torch.float32
- layers.2.blocks.6.attn.proj.bias torch.Size([512]) torch.float32
- layers.2.blocks.6.norm2.weight torch.Size([512]) torch.float32
- layers.2.blocks.6.norm2.bias torch.Size([512]) torch.float32
- layers.2.blocks.6.mlp.fc1.weight torch.Size([2048, 512]) torch.float32
- layers.2.blocks.6.mlp.fc1.bias torch.Size([2048]) torch.float32
- layers.2.blocks.6.mlp.fc2.weight torch.Size([512, 2048]) torch.float32
- layers.2.blocks.6.mlp.fc2.bias torch.Size([512]) torch.float32
- layers.2.blocks.7.attn_mask torch.Size([4, 64, 64]) torch.float32
- layers.2.blocks.7.norm1.weight torch.Size([512]) torch.float32
- layers.2.blocks.7.norm1.bias torch.Size([512]) torch.float32
- layers.2.blocks.7.attn.logit_scale torch.Size([16, 1, 1]) torch.float32
- layers.2.blocks.7.attn.q_bias torch.Size([512]) torch.float32
- layers.2.blocks.7.attn.v_bias torch.Size([512]) torch.float32
- layers.2.blocks.7.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.2.blocks.7.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.2.blocks.7.attn.qkv.weight torch.Size([1536, 512]) torch.float32
- layers.2.blocks.7.attn.proj.weight torch.Size([512, 512]) torch.float32
- layers.2.blocks.7.attn.proj.bias torch.Size([512]) torch.float32
- layers.2.blocks.7.norm2.weight torch.Size([512]) torch.float32
- layers.2.blocks.7.norm2.bias torch.Size([512]) torch.float32
- layers.2.blocks.7.mlp.fc1.weight torch.Size([2048, 512]) torch.float32
- layers.2.blocks.7.mlp.fc1.bias torch.Size([2048]) torch.float32
- layers.2.blocks.7.mlp.fc2.weight torch.Size([512, 2048]) torch.float32
- layers.2.blocks.7.mlp.fc2.bias torch.Size([512]) torch.float32
- layers.2.blocks.8.norm1.weight torch.Size([512]) torch.float32
- layers.2.blocks.8.norm1.bias torch.Size([512]) torch.float32
- layers.2.blocks.8.attn.logit_scale torch.Size([16, 1, 1]) torch.float32
- layers.2.blocks.8.attn.q_bias torch.Size([512]) torch.float32
- layers.2.blocks.8.attn.v_bias torch.Size([512]) torch.float32
- layers.2.blocks.8.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.2.blocks.8.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.2.blocks.8.attn.qkv.weight torch.Size([1536, 512]) torch.float32
- layers.2.blocks.8.attn.proj.weight torch.Size([512, 512]) torch.float32
- layers.2.blocks.8.attn.proj.bias torch.Size([512]) torch.float32
- layers.2.blocks.8.norm2.weight torch.Size([512]) torch.float32
- layers.2.blocks.8.norm2.bias torch.Size([512]) torch.float32
- layers.2.blocks.8.mlp.fc1.weight torch.Size([2048, 512]) torch.float32
- layers.2.blocks.8.mlp.fc1.bias torch.Size([2048]) torch.float32
- layers.2.blocks.8.mlp.fc2.weight torch.Size([512, 2048]) torch.float32
- layers.2.blocks.8.mlp.fc2.bias torch.Size([512]) torch.float32
- layers.2.blocks.9.attn_mask torch.Size([4, 64, 64]) torch.float32
- layers.2.blocks.9.norm1.weight torch.Size([512]) torch.float32
- layers.2.blocks.9.norm1.bias torch.Size([512]) torch.float32
- layers.2.blocks.9.attn.logit_scale torch.Size([16, 1, 1]) torch.float32
- layers.2.blocks.9.attn.q_bias torch.Size([512]) torch.float32
- layers.2.blocks.9.attn.v_bias torch.Size([512]) torch.float32
- layers.2.blocks.9.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.2.blocks.9.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.2.blocks.9.attn.qkv.weight torch.Size([1536, 512]) torch.float32
- layers.2.blocks.9.attn.proj.weight torch.Size([512, 512]) torch.float32
- layers.2.blocks.9.attn.proj.bias torch.Size([512]) torch.float32
- layers.2.blocks.9.norm2.weight torch.Size([512]) torch.float32
- layers.2.blocks.9.norm2.bias torch.Size([512]) torch.float32
- layers.2.blocks.9.mlp.fc1.weight torch.Size([2048, 512]) torch.float32
- layers.2.blocks.9.mlp.fc1.bias torch.Size([2048]) torch.float32
- layers.2.blocks.9.mlp.fc2.weight torch.Size([512, 2048]) torch.float32
- layers.2.blocks.9.mlp.fc2.bias torch.Size([512]) torch.float32
- layers.2.blocks.10.norm1.weight torch.Size([512]) torch.float32
- layers.2.blocks.10.norm1.bias torch.Size([512]) torch.float32
- layers.2.blocks.10.attn.logit_scale torch.Size([16, 1, 1]) torch.float32
- layers.2.blocks.10.attn.q_bias torch.Size([512]) torch.float32
- layers.2.blocks.10.attn.v_bias torch.Size([512]) torch.float32
- layers.2.blocks.10.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.2.blocks.10.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.2.blocks.10.attn.qkv.weight torch.Size([1536, 512]) torch.float32
- layers.2.blocks.10.attn.proj.weight torch.Size([512, 512]) torch.float32
- layers.2.blocks.10.attn.proj.bias torch.Size([512]) torch.float32
- layers.2.blocks.10.norm2.weight torch.Size([512]) torch.float32
- layers.2.blocks.10.norm2.bias torch.Size([512]) torch.float32
- layers.2.blocks.10.mlp.fc1.weight torch.Size([2048, 512]) torch.float32
- layers.2.blocks.10.mlp.fc1.bias torch.Size([2048]) torch.float32
- layers.2.blocks.10.mlp.fc2.weight torch.Size([512, 2048]) torch.float32
- layers.2.blocks.10.mlp.fc2.bias torch.Size([512]) torch.float32
- layers.2.blocks.11.attn_mask torch.Size([4, 64, 64]) torch.float32
- layers.2.blocks.11.norm1.weight torch.Size([512]) torch.float32
- layers.2.blocks.11.norm1.bias torch.Size([512]) torch.float32
- layers.2.blocks.11.attn.logit_scale torch.Size([16, 1, 1]) torch.float32
- layers.2.blocks.11.attn.q_bias torch.Size([512]) torch.float32
- layers.2.blocks.11.attn.v_bias torch.Size([512]) torch.float32
- layers.2.blocks.11.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.2.blocks.11.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.2.blocks.11.attn.qkv.weight torch.Size([1536, 512]) torch.float32
- layers.2.blocks.11.attn.proj.weight torch.Size([512, 512]) torch.float32
- layers.2.blocks.11.attn.proj.bias torch.Size([512]) torch.float32
- layers.2.blocks.11.norm2.weight torch.Size([512]) torch.float32
- layers.2.blocks.11.norm2.bias torch.Size([512]) torch.float32
- layers.2.blocks.11.mlp.fc1.weight torch.Size([2048, 512]) torch.float32
- layers.2.blocks.11.mlp.fc1.bias torch.Size([2048]) torch.float32
- layers.2.blocks.11.mlp.fc2.weight torch.Size([512, 2048]) torch.float32
- layers.2.blocks.11.mlp.fc2.bias torch.Size([512]) torch.float32
- layers.2.blocks.12.norm1.weight torch.Size([512]) torch.float32
- layers.2.blocks.12.norm1.bias torch.Size([512]) torch.float32
- layers.2.blocks.12.attn.logit_scale torch.Size([16, 1, 1]) torch.float32
- layers.2.blocks.12.attn.q_bias torch.Size([512]) torch.float32
- layers.2.blocks.12.attn.v_bias torch.Size([512]) torch.float32
- layers.2.blocks.12.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.2.blocks.12.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.2.blocks.12.attn.qkv.weight torch.Size([1536, 512]) torch.float32
- layers.2.blocks.12.attn.proj.weight torch.Size([512, 512]) torch.float32
- layers.2.blocks.12.attn.proj.bias torch.Size([512]) torch.float32
- layers.2.blocks.12.norm2.weight torch.Size([512]) torch.float32
- layers.2.blocks.12.norm2.bias torch.Size([512]) torch.float32
- layers.2.blocks.12.mlp.fc1.weight torch.Size([2048, 512]) torch.float32
- layers.2.blocks.12.mlp.fc1.bias torch.Size([2048]) torch.float32
- layers.2.blocks.12.mlp.fc2.weight torch.Size([512, 2048]) torch.float32
- layers.2.blocks.12.mlp.fc2.bias torch.Size([512]) torch.float32
- layers.2.blocks.13.attn_mask torch.Size([4, 64, 64]) torch.float32
- layers.2.blocks.13.norm1.weight torch.Size([512]) torch.float32
- layers.2.blocks.13.norm1.bias torch.Size([512]) torch.float32
- layers.2.blocks.13.attn.logit_scale torch.Size([16, 1, 1]) torch.float32
- layers.2.blocks.13.attn.q_bias torch.Size([512]) torch.float32
- layers.2.blocks.13.attn.v_bias torch.Size([512]) torch.float32
- layers.2.blocks.13.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.2.blocks.13.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.2.blocks.13.attn.qkv.weight torch.Size([1536, 512]) torch.float32
- layers.2.blocks.13.attn.proj.weight torch.Size([512, 512]) torch.float32
- layers.2.blocks.13.attn.proj.bias torch.Size([512]) torch.float32
- layers.2.blocks.13.norm2.weight torch.Size([512]) torch.float32
- layers.2.blocks.13.norm2.bias torch.Size([512]) torch.float32
- layers.2.blocks.13.mlp.fc1.weight torch.Size([2048, 512]) torch.float32
- layers.2.blocks.13.mlp.fc1.bias torch.Size([2048]) torch.float32
- layers.2.blocks.13.mlp.fc2.weight torch.Size([512, 2048]) torch.float32
- layers.2.blocks.13.mlp.fc2.bias torch.Size([512]) torch.float32
- layers.2.blocks.14.norm1.weight torch.Size([512]) torch.float32
- layers.2.blocks.14.norm1.bias torch.Size([512]) torch.float32
- layers.2.blocks.14.attn.logit_scale torch.Size([16, 1, 1]) torch.float32
- layers.2.blocks.14.attn.q_bias torch.Size([512]) torch.float32
- layers.2.blocks.14.attn.v_bias torch.Size([512]) torch.float32
- layers.2.blocks.14.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.2.blocks.14.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.2.blocks.14.attn.qkv.weight torch.Size([1536, 512]) torch.float32
- layers.2.blocks.14.attn.proj.weight torch.Size([512, 512]) torch.float32
- layers.2.blocks.14.attn.proj.bias torch.Size([512]) torch.float32
- layers.2.blocks.14.norm2.weight torch.Size([512]) torch.float32
- layers.2.blocks.14.norm2.bias torch.Size([512]) torch.float32
- layers.2.blocks.14.mlp.fc1.weight torch.Size([2048, 512]) torch.float32
- layers.2.blocks.14.mlp.fc1.bias torch.Size([2048]) torch.float32
- layers.2.blocks.14.mlp.fc2.weight torch.Size([512, 2048]) torch.float32
- layers.2.blocks.14.mlp.fc2.bias torch.Size([512]) torch.float32
- layers.2.blocks.15.attn_mask torch.Size([4, 64, 64]) torch.float32
- layers.2.blocks.15.norm1.weight torch.Size([512]) torch.float32
- layers.2.blocks.15.norm1.bias torch.Size([512]) torch.float32
- layers.2.blocks.15.attn.logit_scale torch.Size([16, 1, 1]) torch.float32
- layers.2.blocks.15.attn.q_bias torch.Size([512]) torch.float32
- layers.2.blocks.15.attn.v_bias torch.Size([512]) torch.float32
- layers.2.blocks.15.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.2.blocks.15.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.2.blocks.15.attn.qkv.weight torch.Size([1536, 512]) torch.float32
- layers.2.blocks.15.attn.proj.weight torch.Size([512, 512]) torch.float32
- layers.2.blocks.15.attn.proj.bias torch.Size([512]) torch.float32
- layers.2.blocks.15.norm2.weight torch.Size([512]) torch.float32
- layers.2.blocks.15.norm2.bias torch.Size([512]) torch.float32
- layers.2.blocks.15.mlp.fc1.weight torch.Size([2048, 512]) torch.float32
- layers.2.blocks.15.mlp.fc1.bias torch.Size([2048]) torch.float32
- layers.2.blocks.15.mlp.fc2.weight torch.Size([512, 2048]) torch.float32
- layers.2.blocks.15.mlp.fc2.bias torch.Size([512]) torch.float32
- layers.2.blocks.16.norm1.weight torch.Size([512]) torch.float32
- layers.2.blocks.16.norm1.bias torch.Size([512]) torch.float32
- layers.2.blocks.16.attn.logit_scale torch.Size([16, 1, 1]) torch.float32
- layers.2.blocks.16.attn.q_bias torch.Size([512]) torch.float32
- layers.2.blocks.16.attn.v_bias torch.Size([512]) torch.float32
- layers.2.blocks.16.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.2.blocks.16.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.2.blocks.16.attn.qkv.weight torch.Size([1536, 512]) torch.float32
- layers.2.blocks.16.attn.proj.weight torch.Size([512, 512]) torch.float32
- layers.2.blocks.16.attn.proj.bias torch.Size([512]) torch.float32
- layers.2.blocks.16.norm2.weight torch.Size([512]) torch.float32
- layers.2.blocks.16.norm2.bias torch.Size([512]) torch.float32
- layers.2.blocks.16.mlp.fc1.weight torch.Size([2048, 512]) torch.float32
- layers.2.blocks.16.mlp.fc1.bias torch.Size([2048]) torch.float32
- layers.2.blocks.16.mlp.fc2.weight torch.Size([512, 2048]) torch.float32
- layers.2.blocks.16.mlp.fc2.bias torch.Size([512]) torch.float32
- layers.2.blocks.17.attn_mask torch.Size([4, 64, 64]) torch.float32
- layers.2.blocks.17.norm1.weight torch.Size([512]) torch.float32
- layers.2.blocks.17.norm1.bias torch.Size([512]) torch.float32
- layers.2.blocks.17.attn.logit_scale torch.Size([16, 1, 1]) torch.float32
- layers.2.blocks.17.attn.q_bias torch.Size([512]) torch.float32
- layers.2.blocks.17.attn.v_bias torch.Size([512]) torch.float32
- layers.2.blocks.17.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.2.blocks.17.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.2.blocks.17.attn.qkv.weight torch.Size([1536, 512]) torch.float32
- layers.2.blocks.17.attn.proj.weight torch.Size([512, 512]) torch.float32
- layers.2.blocks.17.attn.proj.bias torch.Size([512]) torch.float32
- layers.2.blocks.17.norm2.weight torch.Size([512]) torch.float32
- layers.2.blocks.17.norm2.bias torch.Size([512]) torch.float32
- layers.2.blocks.17.mlp.fc1.weight torch.Size([2048, 512]) torch.float32
- layers.2.blocks.17.mlp.fc1.bias torch.Size([2048]) torch.float32
- layers.2.blocks.17.mlp.fc2.weight torch.Size([512, 2048]) torch.float32
- layers.2.blocks.17.mlp.fc2.bias torch.Size([512]) torch.float32
- layers.2.downsample.reduction.weight torch.Size([1024, 2048]) torch.float32
- layers.2.downsample.norm.weight torch.Size([1024]) torch.float32
- layers.2.downsample.norm.bias torch.Size([1024]) torch.float32
- layers.3.blocks.0.norm1.weight torch.Size([1024]) torch.float32
- layers.3.blocks.0.norm1.bias torch.Size([1024]) torch.float32
- layers.3.blocks.0.attn.logit_scale torch.Size([32, 1, 1]) torch.float32
- layers.3.blocks.0.attn.q_bias torch.Size([1024]) torch.float32
- layers.3.blocks.0.attn.v_bias torch.Size([1024]) torch.float32
- layers.3.blocks.0.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.3.blocks.0.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.3.blocks.0.attn.qkv.weight torch.Size([3072, 1024]) torch.float32
- layers.3.blocks.0.attn.proj.weight torch.Size([1024, 1024]) torch.float32
- layers.3.blocks.0.attn.proj.bias torch.Size([1024]) torch.float32
- layers.3.blocks.0.norm2.weight torch.Size([1024]) torch.float32
- layers.3.blocks.0.norm2.bias torch.Size([1024]) torch.float32
- layers.3.blocks.0.mlp.fc1.weight torch.Size([4096, 1024]) torch.float32
- layers.3.blocks.0.mlp.fc1.bias torch.Size([4096]) torch.float32
- layers.3.blocks.0.mlp.fc2.weight torch.Size([1024, 4096]) torch.float32
- layers.3.blocks.0.mlp.fc2.bias torch.Size([1024]) torch.float32
- layers.3.blocks.1.norm1.weight torch.Size([1024]) torch.float32
- layers.3.blocks.1.norm1.bias torch.Size([1024]) torch.float32
- layers.3.blocks.1.attn.logit_scale torch.Size([32, 1, 1]) torch.float32
- layers.3.blocks.1.attn.q_bias torch.Size([1024]) torch.float32
- layers.3.blocks.1.attn.v_bias torch.Size([1024]) torch.float32
- layers.3.blocks.1.attn.relative_coords_table torch.Size([1, 15, 15, 2]) torch.float32
- layers.3.blocks.1.attn.relative_position_index torch.Size([64, 64]) torch.int64
- layers.3.blocks.1.attn.qkv.weight torch.Size([3072, 1024]) torch.float32
- layers.3.blocks.1.attn.proj.weight torch.Size([1024, 1024]) torch.float32
- layers.3.blocks.1.attn.proj.bias torch.Size([1024]) torch.float32
- layers.3.blocks.1.norm2.weight torch.Size([1024]) torch.float32
- layers.3.blocks.1.norm2.bias torch.Size([1024]) torch.float32
- layers.3.blocks.1.mlp.fc1.weight torch.Size([4096, 1024]) torch.float32
- layers.3.blocks.1.mlp.fc1.bias torch.Size([4096]) torch.float32
- layers.3.blocks.1.mlp.fc2.weight torch.Size([1024, 4096]) torch.float32
- layers.3.blocks.1.mlp.fc2.bias torch.Size([1024]) torch.float32
- norm.weight torch.Size([1024]) torch.float32
- norm.bias torch.Size([1024]) torch.float32
- head.weight torch.Size([1000, 1024]) torch.float32
- head.bias torch.Size([1000]) torch.float32
- layers.0.blocks.0.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.0.blocks.0.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.0.blocks.0.attn.cpb_mlp.2.weight torch.Size([4, 512]) torch.float32
- layers.0.blocks.1.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.0.blocks.1.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.0.blocks.1.attn.cpb_mlp.2.weight torch.Size([4, 512]) torch.float32
- layers.1.blocks.0.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.1.blocks.0.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.1.blocks.0.attn.cpb_mlp.2.weight torch.Size([8, 512]) torch.float32
- layers.1.blocks.1.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.1.blocks.1.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.1.blocks.1.attn.cpb_mlp.2.weight torch.Size([8, 512]) torch.float32
- layers.2.blocks.0.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.2.blocks.0.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.2.blocks.0.attn.cpb_mlp.2.weight torch.Size([16, 512]) torch.float32
- layers.2.blocks.1.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.2.blocks.1.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.2.blocks.1.attn.cpb_mlp.2.weight torch.Size([16, 512]) torch.float32
- layers.2.blocks.2.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.2.blocks.2.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.2.blocks.2.attn.cpb_mlp.2.weight torch.Size([16, 512]) torch.float32
- layers.2.blocks.3.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.2.blocks.3.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.2.blocks.3.attn.cpb_mlp.2.weight torch.Size([16, 512]) torch.float32
- layers.2.blocks.4.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.2.blocks.4.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.2.blocks.4.attn.cpb_mlp.2.weight torch.Size([16, 512]) torch.float32
- layers.2.blocks.5.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.2.blocks.5.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.2.blocks.5.attn.cpb_mlp.2.weight torch.Size([16, 512]) torch.float32
- layers.2.blocks.6.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.2.blocks.6.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.2.blocks.6.attn.cpb_mlp.2.weight torch.Size([16, 512]) torch.float32
- layers.2.blocks.7.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.2.blocks.7.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.2.blocks.7.attn.cpb_mlp.2.weight torch.Size([16, 512]) torch.float32
- layers.2.blocks.8.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.2.blocks.8.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.2.blocks.8.attn.cpb_mlp.2.weight torch.Size([16, 512]) torch.float32
- layers.2.blocks.9.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.2.blocks.9.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.2.blocks.9.attn.cpb_mlp.2.weight torch.Size([16, 512]) torch.float32
- layers.2.blocks.10.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.2.blocks.10.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.2.blocks.10.attn.cpb_mlp.2.weight torch.Size([16, 512]) torch.float32
- layers.2.blocks.11.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.2.blocks.11.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.2.blocks.11.attn.cpb_mlp.2.weight torch.Size([16, 512]) torch.float32
- layers.2.blocks.12.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.2.blocks.12.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.2.blocks.12.attn.cpb_mlp.2.weight torch.Size([16, 512]) torch.float32
- layers.2.blocks.13.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.2.blocks.13.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.2.blocks.13.attn.cpb_mlp.2.weight torch.Size([16, 512]) torch.float32
- layers.2.blocks.14.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.2.blocks.14.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.2.blocks.14.attn.cpb_mlp.2.weight torch.Size([16, 512]) torch.float32
- layers.2.blocks.15.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.2.blocks.15.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.2.blocks.15.attn.cpb_mlp.2.weight torch.Size([16, 512]) torch.float32
- layers.2.blocks.16.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.2.blocks.16.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.2.blocks.16.attn.cpb_mlp.2.weight torch.Size([16, 512]) torch.float32
- layers.2.blocks.17.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.2.blocks.17.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.2.blocks.17.attn.cpb_mlp.2.weight torch.Size([16, 512]) torch.float32
- layers.3.blocks.0.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.3.blocks.0.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.3.blocks.0.attn.cpb_mlp.2.weight torch.Size([32, 512]) torch.float32
- layers.3.blocks.1.attn.cpb_mlp.0.weight torch.Size([512, 2]) torch.float32
- layers.3.blocks.1.attn.cpb_mlp.0.bias torch.Size([512]) torch.float32
- layers.3.blocks.1.attn.cpb_mlp.2.weight torch.Size([32, 512]) torch.float32
- ===============Pytorch weight End===================
|