@@ -556,7 +556,7 @@ def __init__(
556
556
self .patch_drop = nn .Identity ()
557
557
self .norm_pre = norm_layer (embed_dim ) if pre_norm else nn .Identity ()
558
558
559
- dpr = [x .item () for x in torch .linspace (0 , drop_path_rate , depth , device = 'cpu' )] # stochastic depth decay rule
559
+ dpr = [x .item () for x in torch .linspace (0 , drop_path_rate , depth )] # stochastic depth decay rule
560
560
self .blocks = nn .Sequential (* [
561
561
block_fn (
562
562
dim = embed_dim ,
@@ -1158,22 +1158,12 @@ def _convert_aimv2(
1158
1158
k = k .replace ('preprocessor.pos_embed' , 'pos_embed' )
1159
1159
k = k .replace ('trunk.' , '' )
1160
1160
k = k .replace ('post_trunk_norm.' , 'norm.' )
1161
-
1162
- # packed ver, FIXME to delete
1163
- # if 'mlp.fc1' in k:
1164
- # if k in out_dict:
1165
- # v = torch.cat([v, out_dict[k]], dim=0)
1166
- # elif 'mlp.fc3' in k:
1167
- # k = k.replace('mlp.fc3', 'mlp.fc1')
1168
- # if k in out_dict:
1169
- # v = torch.cat([out_dict[k], v], dim=0)
1170
1161
k = k .replace ('mlp.fc1' , 'mlp.fc1_g' )
1171
1162
k = k .replace ('mlp.fc3' , 'mlp.fc1_x' )
1172
-
1173
1163
out_dict [k ] = v
1174
-
1175
1164
return out_dict
1176
1165
1166
+
1177
1167
def checkpoint_filter_fn (
1178
1168
state_dict : Dict [str , torch .Tensor ],
1179
1169
model : VisionTransformer ,
@@ -1688,8 +1678,7 @@ def _cfg(url: str = '', **kwargs) -> Dict[str, Any]:
1688
1678
license = 'apple-ascl' ,
1689
1679
mean = OPENAI_CLIP_MEAN , std = OPENAI_CLIP_STD , crop_pct = 1.0 , num_classes = 512 ),
1690
1680
'vit_large_patch14_clip_224.dfn2b_s39b' : _cfg (
1691
- #hf_hub_id='timm/',
1692
- hf_hub_id = 'apple/DFN2B-CLIP-ViT-L-14-39B' , hf_hub_filename = 'open_clip_pytorch_model.bin' ,
1681
+ hf_hub_id = 'timm/' ,
1693
1682
license = 'apple-ascl' ,
1694
1683
mean = OPENAI_CLIP_MEAN , std = OPENAI_CLIP_STD , crop_pct = 1.0 , num_classes = 768 ),
1695
1684
'vit_large_patch14_clip_224.dfn2b' : _cfg (
@@ -2177,59 +2166,59 @@ def _cfg(url: str = '', **kwargs) -> Dict[str, Any]:
2177
2166
),
2178
2167
2179
2168
'aimv2_large_patch14_224.apple_pt' : _cfg (
2180
- hf_hub_id = 'apple/aimv2-large-patch14-224 ' ,
2169
+ hf_hub_id = 'timm/ ' ,
2181
2170
mean = OPENAI_CLIP_MEAN , std = OPENAI_CLIP_STD , license = 'apple-ascl' ,
2182
2171
crop_pct = 1.0 , num_classes = 0 ),
2183
2172
'aimv2_large_patch14_224.apple_pt_dist' : _cfg (
2184
- hf_hub_id = 'apple/aimv2-large-patch14-224-distilled ' ,
2173
+ hf_hub_id = 'timm/ ' ,
2185
2174
mean = OPENAI_CLIP_MEAN , std = OPENAI_CLIP_STD , license = 'apple-ascl' ,
2186
2175
crop_pct = 1.0 , num_classes = 0 ),
2187
2176
'aimv2_huge_patch14_224.apple_pt' : _cfg (
2188
- hf_hub_id = 'apple/aimv2-huge-patch14-224 ' ,
2177
+ hf_hub_id = 'timm/ ' ,
2189
2178
mean = OPENAI_CLIP_MEAN , std = OPENAI_CLIP_STD , license = 'apple-ascl' ,
2190
2179
crop_pct = 1.0 , num_classes = 0 ),
2191
2180
'aimv2_1b_patch14_224.apple_pt' : _cfg (
2192
- hf_hub_id = 'apple/aimv2-1b-patch14-224 ' ,
2181
+ hf_hub_id = 'timm/ ' ,
2193
2182
mean = OPENAI_CLIP_MEAN , std = OPENAI_CLIP_STD , license = 'apple-ascl' ,
2194
2183
crop_pct = 1.0 , num_classes = 0 ),
2195
2184
'aimv2_3b_patch14_224.apple_pt' : _cfg (
2196
- hf_hub_id = 'apple/aimv2-3b-patch14-224 ' ,
2185
+ hf_hub_id = 'timm/ ' ,
2197
2186
mean = OPENAI_CLIP_MEAN , std = OPENAI_CLIP_STD , license = 'apple-ascl' ,
2198
2187
crop_pct = 1.0 , num_classes = 0 ),
2199
2188
'aimv2_large_patch14_336.apple_pt' : _cfg (
2200
- hf_hub_id = 'apple/aimv2-large-patch14-336 ' ,
2189
+ hf_hub_id = 'timm/ ' ,
2201
2190
mean = OPENAI_CLIP_MEAN , std = OPENAI_CLIP_STD , license = 'apple-ascl' ,
2202
2191
input_size = (3 , 336 , 336 ), crop_pct = 1.0 , num_classes = 0 ),
2203
2192
'aimv2_large_patch14_336.apple_pt_dist' : _cfg (
2204
- hf_hub_id = 'apple/aimv2-large-patch14-336-distilled ' ,
2193
+ hf_hub_id = 'timm/ ' ,
2205
2194
mean = OPENAI_CLIP_MEAN , std = OPENAI_CLIP_STD , license = 'apple-ascl' ,
2206
2195
input_size = (3 , 336 , 336 ), crop_pct = 1.0 , num_classes = 0 ),
2207
2196
'aimv2_huge_patch14_336.apple_pt' : _cfg (
2208
- hf_hub_id = 'apple/aimv2-huge-patch14-336 ' ,
2197
+ hf_hub_id = 'timm/ ' ,
2209
2198
mean = OPENAI_CLIP_MEAN , std = OPENAI_CLIP_STD , license = 'apple-ascl' ,
2210
2199
input_size = (3 , 336 , 336 ), crop_pct = 1.0 , num_classes = 0 ),
2211
2200
'aimv2_1b_patch14_336.apple_pt' : _cfg (
2212
- hf_hub_id = 'apple/aimv2-1b-patch14-336 ' ,
2201
+ hf_hub_id = 'timm/ ' ,
2213
2202
mean = OPENAI_CLIP_MEAN , std = OPENAI_CLIP_STD , license = 'apple-ascl' ,
2214
2203
input_size = (3 , 336 , 336 ), crop_pct = 1.0 , num_classes = 0 ),
2215
2204
'aimv2_3b_patch14_336.apple_pt' : _cfg (
2216
- hf_hub_id = 'apple/aimv2-3b-patch14-336 ' ,
2205
+ hf_hub_id = 'timm/ ' ,
2217
2206
mean = OPENAI_CLIP_MEAN , std = OPENAI_CLIP_STD , license = 'apple-ascl' ,
2218
2207
input_size = (3 , 336 , 336 ), crop_pct = 1.0 , num_classes = 0 ),
2219
2208
'aimv2_large_patch14_448.apple_pt' : _cfg (
2220
- hf_hub_id = 'apple/aimv2-large-patch14-448 ' ,
2209
+ hf_hub_id = 'timm/ ' ,
2221
2210
mean = OPENAI_CLIP_MEAN , std = OPENAI_CLIP_STD , license = 'apple-ascl' ,
2222
2211
input_size = (3 , 448 , 448 ), crop_pct = 1.0 , num_classes = 0 ),
2223
2212
'aimv2_huge_patch14_448.apple_pt' : _cfg (
2224
- hf_hub_id = 'apple/aimv2-huge-patch14-448 ' ,
2213
+ hf_hub_id = 'timm/ ' ,
2225
2214
mean = OPENAI_CLIP_MEAN , std = OPENAI_CLIP_STD , license = 'apple-ascl' ,
2226
2215
input_size = (3 , 448 , 448 ), crop_pct = 1.0 , num_classes = 0 ),
2227
2216
'aimv2_1b_patch14_448.apple_pt' : _cfg (
2228
- hf_hub_id = 'apple/aimv2-1b-patch14-448 ' ,
2217
+ hf_hub_id = 'timm/ ' ,
2229
2218
mean = OPENAI_CLIP_MEAN , std = OPENAI_CLIP_STD , license = 'apple-ascl' ,
2230
2219
input_size = (3 , 448 , 448 ), crop_pct = 1.0 , num_classes = 0 ),
2231
2220
'aimv2_3b_patch14_448.apple_pt' : _cfg (
2232
- hf_hub_id = 'apple/aimv2-3b-patch14-448 ' ,
2221
+ hf_hub_id = 'timm/ ' ,
2233
2222
mean = OPENAI_CLIP_MEAN , std = OPENAI_CLIP_STD , license = 'apple-ascl' ,
2234
2223
input_size = (3 , 448 , 448 ), crop_pct = 1.0 , num_classes = 0 ),
2235
2224
0 commit comments