Skip to content

Sam2

Sam2EmbeddingRequest

Bases: Sam2InferenceRequest

SAM embedding request.

Attributes:

Name Type Description
image Optional[InferenceRequestImage]

The image to be embedded.

image_id Optional[str]

The ID of the image to be embedded used to cache the embedding.

format Optional[str]

The format of the response. Must be one of json or binary.

Source code in inference/core/entities/requests/sam2.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
class Sam2EmbeddingRequest(Sam2InferenceRequest):
    """SAM embedding request.

    Attributes:
        image (Optional[inference.core.entities.requests.inference.InferenceRequestImage]): The image to be embedded.
        image_id (Optional[str]): The ID of the image to be embedded used to cache the embedding.
        format (Optional[str]): The format of the response. Must be one of json or binary.
    """

    image: Optional[InferenceRequestImage] = Field(
        default=None,
        description="The image to be embedded",
    )
    image_id: Optional[str] = Field(
        default=None,
        examples=["image_id"],
        description="The ID of the image to be embedded used to cache the embedding.",
    )

Sam2InferenceRequest

Bases: BaseRequest

SAM2 inference request.

Attributes:

Name Type Description
api_key Optional[str]

Roboflow API Key.

sam2_version_id Optional[str]

The version ID of SAM2 to be used for this request.

Source code in inference/core/entities/requests/sam2.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
class Sam2InferenceRequest(BaseRequest):
    """SAM2 inference request.

    Attributes:
        api_key (Optional[str]): Roboflow API Key.
        sam2_version_id (Optional[str]): The version ID of SAM2 to be used for this request.
    """

    sam2_version_id: Optional[str] = Field(
        default=SAM2_VERSION_ID,
        examples=["hiera_large"],
        description="The version ID of SAM to be used for this request. Must be one of hiera_tiny, hiera_small, hiera_large, hiera_b_plus",
    )

    model_id: Optional[str] = Field(None)

    # TODO[pydantic]: We couldn't refactor the `validator`, please replace it by `field_validator` manually.
    # Check https://docs.pydantic.dev/dev-v2/migration/#changes-to-validators for more information.
    @validator("model_id", always=True)
    def validate_model_id(cls, value, values):
        if value is not None:
            return value
        if values.get("sam_version_id") is None:
            return None
        return f"sam2/{values['sam_version_id']}"

Sam2SegmentationRequest

Bases: Sam2InferenceRequest

SAM segmentation request.

Attributes:

Name Type Description
format Optional[str]

The format of the response.

image InferenceRequestImage

The image to be segmented.

image_id Optional[str]

The ID of the image to be segmented used to retrieve cached embeddings.

point_coords Optional[List[List[float]]]

The coordinates of the interactive points used during decoding.

point_labels Optional[List[float]]

The labels of the interactive points used during decoding.

Source code in inference/core/entities/requests/sam2.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
class Sam2SegmentationRequest(Sam2InferenceRequest):
    """SAM segmentation request.

    Attributes:
        format (Optional[str]): The format of the response.
        image (InferenceRequestImage): The image to be segmented.
        image_id (Optional[str]): The ID of the image to be segmented used to retrieve cached embeddings.
        point_coords (Optional[List[List[float]]]): The coordinates of the interactive points used during decoding.
        point_labels (Optional[List[float]]): The labels of the interactive points used during decoding.
    """

    format: Optional[str] = Field(
        default="json",
        examples=["json"],
        description="The format of the response. Must be one of 'json', 'rle', or 'binary'. If binary, masks are returned as binary numpy arrays. If json, masks are converted to polygons. If rle, masks are converted to RLE format.",
    )
    image: InferenceRequestImage = Field(
        description="The image to be segmented.",
    )
    image_id: Optional[str] = Field(
        default=None,
        examples=["image_id"],
        description="The ID of the image to be segmented used to retrieve cached embeddings. If an embedding is cached, it will be used instead of generating a new embedding. If no embedding is cached, a new embedding will be generated and cached.",
    )
    prompts: Sam2PromptSet = Field(
        default=Sam2PromptSet(prompts=None),
        example=[{"prompts": [{"points": [{"x": 100, "y": 100, "positive": True}]}]}],
        description="A list of prompts for masks to predict. Each prompt can include a bounding box and / or a set of postive or negative points. "
        "Also accepts a flat array of prompts (e.g. 'prompts': [{...}, {...}]) for convenience.",
    )
    multimask_output: bool = Field(
        default=True,
        examples=[True],
        description="If true, the model will return three masks. "
        "For ambiguous input prompts (such as a single click), this will often "
        "produce better masks than a single prediction. If only a single "
        "mask is needed, the model's predicted quality score can be used "
        "to select the best mask. For non-ambiguous prompts, such as multiple "
        "input prompts, multimask_output=False can give better results.",
    )

    # TODO[pydantic]: We couldn't refactor the `validator`, please replace it by `field_validator` manually.
    # Check https://docs.pydantic.dev/dev-v2/migration/#changes-to-validators for more information.
    @validator("prompts", pre=True, always=True)
    def _coerce_prompts(cls, value):
        """
        Accepts any of the following and coerces to Sam2PromptSet:
        - None
        - Sam2PromptSet
        - {"prompts": [...]} (nested)
        - [...] (flat list of prompts)
        - single prompt dict (wrapped to list)
        """
        if value is None:
            return Sam2PromptSet(prompts=None)
        if isinstance(value, Sam2PromptSet):
            return value
        # Nested dict with key 'prompts'
        if isinstance(value, dict):
            if "prompts" in value:
                return Sam2PromptSet(**value)
            # Single prompt dict – wrap and parse
            try:
                return Sam2PromptSet(prompts=[Sam2Prompt(**value)])
            except Exception:
                # Fall-through to attempt generic construction
                return Sam2PromptSet(**value)
        # Flat list of prompts (dicts or Sam2Prompt instances)
        if isinstance(value, list):
            prompts: List[Sam2Prompt] = []
            for item in value:
                if isinstance(item, Sam2Prompt):
                    prompts.append(item)
                elif isinstance(item, dict):
                    prompts.append(Sam2Prompt(**item))
                else:
                    raise ValueError(
                        "Invalid prompt entry; expected dict or Sam2Prompt instance"
                    )
            return Sam2PromptSet(prompts=prompts)
        # Fallback: let Pydantic try
        return value

    save_logits_to_cache: bool = Field(
        default=False,
        description="If True, saves the low-resolution logits to the cache for potential future use. "
        "This can speed up subsequent requests with similar prompts on the same image. "
        "This feature is ignored if DISABLE_SAM2_LOGITS_CACHE env variable is set True",
    )
    load_logits_from_cache: bool = Field(
        default=False,
        description="If True, attempts to load previously cached low-resolution logits for the given image and prompt set. "
        "This can significantly speed up inference when making multiple similar requests on the same image. "
        "This feature is ignored if DISABLE_SAM2_LOGITS_CACHE env variable is set True",
    )