Skip to content

Commit 256e1ea

Browse files
merveenoyanpcuencaVaibhavs10
authored
Update tasks with new models and apps (#1229)
--------- Co-authored-by: Pedro Cuenca <[email protected]> Co-authored-by: vb <[email protected]>
1 parent b5230f9 commit 256e1ea

File tree

10 files changed

+44
-16
lines changed

10 files changed

+44
-16
lines changed

packages/tasks/src/tasks/depth-estimation/data.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ const taskData: TaskDataCustom = {
4141
},
4242
{
4343
description: "A robust depth estimation model.",
44-
id: "apple/DepthPro",
44+
id: "apple/DepthPro-hf",
4545
},
4646
],
4747
spaces: [

packages/tasks/src/tasks/image-text-to-text/data.ts

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ const taskData: TaskDataCustom = {
4848
},
4949
{
5050
description: "A screenshot understanding model used to control computers.",
51-
id: "showlab/ShowUI-2B",
51+
id: "microsoft/OmniParser-v2.0",
5252
},
5353
{
5454
description: "Cutting-edge vision language model.",
@@ -63,12 +63,16 @@ const taskData: TaskDataCustom = {
6363
id: "Qwen/Qwen2.5-VL-7B-Instruct",
6464
},
6565
{
66-
description: "Image-text-to-text model with reasoning capabilities.",
67-
id: "Qwen/QVQ-72B-Preview",
66+
description: "Image-text-to-text model with agentic capabilities.",
67+
id: "microsoft/Magma-8B",
6868
},
6969
{
7070
description: "Strong image-text-to-text model focused on documents.",
71-
id: "stepfun-ai/GOT-OCR2_0",
71+
id: "allenai/olmOCR-7B-0225-preview",
72+
},
73+
{
74+
description: "Small yet strong image-text-to-text model.",
75+
id: "ibm-granite/granite-vision-3.2-2b",
7276
},
7377
],
7478
spaces: [
@@ -85,8 +89,8 @@ const taskData: TaskDataCustom = {
8589
id: "akhaliq/Molmo-7B-D-0924",
8690
},
8791
{
88-
description: "An image-text-to-text application focused on documents.",
89-
id: "stepfun-ai/GOT_official_online_demo",
92+
description: "Powerful vision language assistant that can understand multiple images.",
93+
id: "HuggingFaceTB/SmolVLM2",
9094
},
9195
{
9296
description: "An application for chatting with an image-text-to-text model.",

packages/tasks/src/tasks/keypoint-detection/data.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ const taskData: TaskDataCustom = {
2727
description: "A robust keypoint detection model.",
2828
id: "magic-leap-community/superpoint",
2929
},
30+
{
31+
description: "A robust keypoint matching model.",
32+
id: "magic-leap-community/superglue_outdoor",
33+
},
3034
{
3135
description: "Strong keypoint detection model used to detect human pose.",
3236
id: "facebook/sapiens-pose-1b",

packages/tasks/src/tasks/object-detection/data.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,12 @@ const taskData: TaskDataCustom = {
4747
id: "facebook/detr-resnet-50",
4848
},
4949
{
50-
description: "Real-time and accurate object detection model.",
51-
id: "jameslahm/yolov10x",
50+
description: "Accurate object detection model.",
51+
id: "IDEA-Research/dab-detr-resnet-50",
5252
},
5353
{
54-
description: "Fast and accurate object detection model trained on COCO and Object365 datasets.",
55-
id: "PekingU/rtdetr_r18vd_coco_o365",
54+
description: "Fast and accurate object detection model.",
55+
id: "PekingU/rtdetr_v2_r50vd",
5656
},
5757
{
5858
description: "Object detection model for low-lying objects.",
@@ -70,7 +70,7 @@ const taskData: TaskDataCustom = {
7070
},
7171
{
7272
description: "A cutting-edge object detection application.",
73-
id: "Ultralytics/YOLO11",
73+
id: "sunsmarterjieleaf/yolov12",
7474
},
7575
{
7676
description: "An object tracking, segmentation and inpainting application.",

packages/tasks/src/tasks/text-generation/data.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ const taskData: TaskDataCustom = {
7676
},
7777
{
7878
description: "A very powerful model with reasoning capabilities.",
79-
id: "PowerInfer/SmallThinker-3B-Preview",
79+
id: "simplescaling/s1.1-32B",
8080
},
8181
{
8282
description: "Strong conversational model that supports very long instructions.",

packages/tasks/src/tasks/text-to-speech/data.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ const taskData: TaskDataCustom = {
7676
description: "An application that synthesizes emotional speech for diverse speaker prompts.",
7777
id: "parler-tts/parler-tts-expresso",
7878
},
79+
{
80+
description: "An application that generates podcast episodes.",
81+
id: "ngxson/kokoro-podcast-generator",
82+
},
7983
],
8084
summary:
8185
"Text-to-Speech (TTS) is the task of generating natural sounding speech given text input. TTS models can be extended to have a single model that generates speech for multiple speakers and multiple languages.",

packages/tasks/src/tasks/text-to-video/data.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ const taskData: TaskDataCustom = {
7878
description: "A text-to-video model focusing on physics-aware applications like robotics.",
7979
id: "nvidia/Cosmos-1.0-Diffusion-7B-Text2World",
8080
},
81+
{
82+
description: "A robust model for video generation.",
83+
id: "Wan-AI/Wan2.1-T2V-1.3B",
84+
},
8185
],
8286
spaces: [
8387
{
@@ -86,7 +90,7 @@ const taskData: TaskDataCustom = {
8690
},
8791
{
8892
description: "Consistent video generation application.",
89-
id: "TIGER-Lab/T2V-Turbo-V2",
93+
id: "Wan-AI/Wan2.1",
9094
},
9195
{
9296
description: "A cutting edge video generation application.",

packages/tasks/src/tasks/video-text-to-text/data.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ const taskData: TaskDataCustom = {
4646
description: "Strong video-text-to-text model with reasoning capabilities.",
4747
id: "GoodiesHere/Apollo-LMMs-Apollo-7B-t32",
4848
},
49+
{
50+
description: "Strong video-text-to-text model.",
51+
id: "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
52+
},
4953
],
5054
spaces: [
5155
{
@@ -56,6 +60,10 @@ const taskData: TaskDataCustom = {
5660
description: "A leaderboard for various video-text-to-text models.",
5761
id: "opencompass/openvlm_video_leaderboard",
5862
},
63+
{
64+
description: "An application to generate highlights from a video.",
65+
id: "HuggingFaceTB/SmolVLM2-HighlightGenerator",
66+
},
5967
],
6068
summary:
6169
"Video-text-to-text models take in a video and a text prompt and output text. These models are also called video-language models.",

packages/tasks/src/tasks/zero-shot-classification/data.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ const taskData: TaskDataCustom = {
6060
description: "Cutting-edge zero-shot multilingual text classification model.",
6161
id: "MoritzLaurer/ModernBERT-large-zeroshot-v2.0",
6262
},
63+
{
64+
description: "Zero-shot text classification model that can be used for topic and sentiment classification.",
65+
id: "knowledgator/gliclass-modern-base-v2.0-init",
66+
},
6367
],
6468
spaces: [],
6569
summary:

packages/tasks/src/tasks/zero-shot-image-classification/data.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,11 @@ const taskData: TaskDataCustom = {
5353
},
5454
{
5555
description: "Strong zero-shot image classification model.",
56-
id: "google/siglip-so400m-patch14-224",
56+
id: "google/siglip2-base-patch16-224",
5757
},
5858
{
5959
description: "Robust zero-shot image classification model.",
60-
id: "microsoft/LLM2CLIP-EVA02-L-14-336",
60+
id: "intfloat/mmE5-mllama-11b-instruct",
6161
},
6262
{
6363
description: "Powerful zero-shot image classification model supporting 94 languages.",

0 commit comments

Comments
 (0)