Skip to content

Grounding dino

GroundingDINO

Bases: RoboflowCoreModel

GroundingDINO class for zero-shot object detection.

Attributes:

Name Type Description
model

The GroundingDINO model.

Source code in inference/models/grounding_dino/grounding_dino.py
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
class GroundingDINO(RoboflowCoreModel):
    """GroundingDINO class for zero-shot object detection.

    Attributes:
        model: The GroundingDINO model.
    """

    def __init__(
        self, *args, model_id="grounding_dino/groundingdino_swint_ogc", **kwargs
    ):
        """Initializes the GroundingDINO model.

        Args:
            *args: Variable length argument list.
            **kwargs: Arbitrary keyword arguments.
        """

        super().__init__(*args, model_id=model_id, **kwargs)

        GROUNDING_DINO_CACHE_DIR = os.path.join(MODEL_CACHE_DIR, model_id)

        GROUNDING_DINO_CONFIG_PATH = os.path.join(
            GROUNDING_DINO_CACHE_DIR, "GroundingDINO_SwinT_OGC.py"
        )

        if not os.path.exists(GROUNDING_DINO_CACHE_DIR):
            os.makedirs(GROUNDING_DINO_CACHE_DIR)

        if not os.path.exists(GROUNDING_DINO_CONFIG_PATH):
            url = "https://raw.githubusercontent.com/roboflow/GroundingDINO/main/groundingdino/config/GroundingDINO_SwinT_OGC.py"
            urllib.request.urlretrieve(url, GROUNDING_DINO_CONFIG_PATH)

        self.model = Model(
            model_config_path=GROUNDING_DINO_CONFIG_PATH,
            model_checkpoint_path=os.path.join(
                GROUNDING_DINO_CACHE_DIR, "groundingdino_swint_ogc.pth"
            ),
            device="cuda" if torch.cuda.is_available() else "cpu",
        )
        self.task_type = "object-detection"

    def preproc_image(self, image: Any):
        """Preprocesses an image.

        Args:
            image (InferenceRequestImage): The image to preprocess.

        Returns:
            np.array: The preprocessed image.
        """
        np_image = load_image_bgr(image)
        return np_image

    def infer_from_request(
        self,
        request: GroundingDINOInferenceRequest,
    ) -> ObjectDetectionInferenceResponse:
        """
        Perform inference based on the details provided in the request, and return the associated responses.
        """
        result = self.infer(**request.dict())
        return result

    def infer(
        self,
        image: InferenceRequestImage,
        text: List[str] = None,
        class_filter: list = None,
        box_threshold=0.5,
        text_threshold=0.5,
        class_agnostic_nms=CLASS_AGNOSTIC_NMS,
        **kwargs
    ):
        """
        Run inference on a provided image.
            - image: can be a BGR numpy array, filepath, InferenceRequestImage, PIL Image, byte-string, etc.

        Args:
            request (CVInferenceRequest): The inference request.
            class_filter (Optional[List[str]]): A list of class names to filter, if provided.

        Returns:
            GroundingDINOInferenceRequest: The inference response.
        """
        t1 = perf_counter()
        image = self.preproc_image(image)
        img_dims = image.shape

        detections = self.model.predict_with_classes(
            image=image,
            classes=text,
            box_threshold=box_threshold,
            text_threshold=text_threshold,
        )

        self.class_names = text

        if class_agnostic_nms:
            detections = detections.with_nms(class_agnostic=True)
        else:
            detections = detections.with_nms()

        xywh_bboxes = [xyxy_to_xywh(detection) for detection in detections.xyxy]

        t2 = perf_counter() - t1

        responses = ObjectDetectionInferenceResponse(
            predictions=[
                ObjectDetectionPrediction(
                    **{
                        "x": xywh_bboxes[i][0],
                        "y": xywh_bboxes[i][1],
                        "width": xywh_bboxes[i][2],
                        "height": xywh_bboxes[i][3],
                        "confidence": detections.confidence[i],
                        "class": self.class_names[int(detections.class_id[i])],
                        "class_id": int(detections.class_id[i]),
                    }
                )
                for i, pred in enumerate(detections.xyxy)
                if not class_filter
                or self.class_names[int(pred[6])] in class_filter
                and detections.class_id[i] is not None
            ],
            image=InferenceResponseImage(width=img_dims[1], height=img_dims[0]),
            time=t2,
        )
        return responses

    def get_infer_bucket_file_list(self) -> list:
        """Get the list of required files for inference.

        Returns:
            list: A list of required files for inference, e.g., ["model.pt"].
        """
        return ["groundingdino_swint_ogc.pth"]

__init__(*args, model_id='grounding_dino/groundingdino_swint_ogc', **kwargs)

Initializes the GroundingDINO model.

Parameters:

Name Type Description Default
*args

Variable length argument list.

()
**kwargs

Arbitrary keyword arguments.

{}
Source code in inference/models/grounding_dino/grounding_dino.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def __init__(
    self, *args, model_id="grounding_dino/groundingdino_swint_ogc", **kwargs
):
    """Initializes the GroundingDINO model.

    Args:
        *args: Variable length argument list.
        **kwargs: Arbitrary keyword arguments.
    """

    super().__init__(*args, model_id=model_id, **kwargs)

    GROUNDING_DINO_CACHE_DIR = os.path.join(MODEL_CACHE_DIR, model_id)

    GROUNDING_DINO_CONFIG_PATH = os.path.join(
        GROUNDING_DINO_CACHE_DIR, "GroundingDINO_SwinT_OGC.py"
    )

    if not os.path.exists(GROUNDING_DINO_CACHE_DIR):
        os.makedirs(GROUNDING_DINO_CACHE_DIR)

    if not os.path.exists(GROUNDING_DINO_CONFIG_PATH):
        url = "https://raw.githubusercontent.com/roboflow/GroundingDINO/main/groundingdino/config/GroundingDINO_SwinT_OGC.py"
        urllib.request.urlretrieve(url, GROUNDING_DINO_CONFIG_PATH)

    self.model = Model(
        model_config_path=GROUNDING_DINO_CONFIG_PATH,
        model_checkpoint_path=os.path.join(
            GROUNDING_DINO_CACHE_DIR, "groundingdino_swint_ogc.pth"
        ),
        device="cuda" if torch.cuda.is_available() else "cpu",
    )
    self.task_type = "object-detection"

get_infer_bucket_file_list()

Get the list of required files for inference.

Returns:

Name Type Description
list list

A list of required files for inference, e.g., ["model.pt"].

Source code in inference/models/grounding_dino/grounding_dino.py
150
151
152
153
154
155
156
def get_infer_bucket_file_list(self) -> list:
    """Get the list of required files for inference.

    Returns:
        list: A list of required files for inference, e.g., ["model.pt"].
    """
    return ["groundingdino_swint_ogc.pth"]

infer(image, text=None, class_filter=None, box_threshold=0.5, text_threshold=0.5, class_agnostic_nms=CLASS_AGNOSTIC_NMS, **kwargs)

Run inference on a provided image. - image: can be a BGR numpy array, filepath, InferenceRequestImage, PIL Image, byte-string, etc.

Parameters:

Name Type Description Default
request CVInferenceRequest

The inference request.

required
class_filter Optional[List[str]]

A list of class names to filter, if provided.

None

Returns:

Name Type Description
GroundingDINOInferenceRequest

The inference response.

Source code in inference/models/grounding_dino/grounding_dino.py
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
def infer(
    self,
    image: InferenceRequestImage,
    text: List[str] = None,
    class_filter: list = None,
    box_threshold=0.5,
    text_threshold=0.5,
    class_agnostic_nms=CLASS_AGNOSTIC_NMS,
    **kwargs
):
    """
    Run inference on a provided image.
        - image: can be a BGR numpy array, filepath, InferenceRequestImage, PIL Image, byte-string, etc.

    Args:
        request (CVInferenceRequest): The inference request.
        class_filter (Optional[List[str]]): A list of class names to filter, if provided.

    Returns:
        GroundingDINOInferenceRequest: The inference response.
    """
    t1 = perf_counter()
    image = self.preproc_image(image)
    img_dims = image.shape

    detections = self.model.predict_with_classes(
        image=image,
        classes=text,
        box_threshold=box_threshold,
        text_threshold=text_threshold,
    )

    self.class_names = text

    if class_agnostic_nms:
        detections = detections.with_nms(class_agnostic=True)
    else:
        detections = detections.with_nms()

    xywh_bboxes = [xyxy_to_xywh(detection) for detection in detections.xyxy]

    t2 = perf_counter() - t1

    responses = ObjectDetectionInferenceResponse(
        predictions=[
            ObjectDetectionPrediction(
                **{
                    "x": xywh_bboxes[i][0],
                    "y": xywh_bboxes[i][1],
                    "width": xywh_bboxes[i][2],
                    "height": xywh_bboxes[i][3],
                    "confidence": detections.confidence[i],
                    "class": self.class_names[int(detections.class_id[i])],
                    "class_id": int(detections.class_id[i]),
                }
            )
            for i, pred in enumerate(detections.xyxy)
            if not class_filter
            or self.class_names[int(pred[6])] in class_filter
            and detections.class_id[i] is not None
        ],
        image=InferenceResponseImage(width=img_dims[1], height=img_dims[0]),
        time=t2,
    )
    return responses

infer_from_request(request)

Perform inference based on the details provided in the request, and return the associated responses.

Source code in inference/models/grounding_dino/grounding_dino.py
74
75
76
77
78
79
80
81
82
def infer_from_request(
    self,
    request: GroundingDINOInferenceRequest,
) -> ObjectDetectionInferenceResponse:
    """
    Perform inference based on the details provided in the request, and return the associated responses.
    """
    result = self.infer(**request.dict())
    return result

preproc_image(image)

Preprocesses an image.

Parameters:

Name Type Description Default
image InferenceRequestImage

The image to preprocess.

required

Returns:

Type Description

np.array: The preprocessed image.

Source code in inference/models/grounding_dino/grounding_dino.py
62
63
64
65
66
67
68
69
70
71
72
def preproc_image(self, image: Any):
    """Preprocesses an image.

    Args:
        image (InferenceRequestImage): The image to preprocess.

    Returns:
        np.array: The preprocessed image.
    """
    np_image = load_image_bgr(image)
    return np_image