Skip to content

Easy ocr inference models

InferenceModelsEasyOCRAdapter

Bases: Model

Roboflow EasyOCR model implementation.

This class is responsible for handling the EasyOCR model, including loading the model, preprocessing the input, and performing inference.

Source code in inference/models/easy_ocr/easy_ocr_inference_models.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
class InferenceModelsEasyOCRAdapter(Model):
    """Roboflow EasyOCR model implementation.

    This class is responsible for handling the EasyOCR model, including
    loading the model, preprocessing the input, and performing inference.
    """

    def __init__(
        self, model_id: str = "easy_ocr/english_g2", api_key: str = None, **kwargs
    ):
        super().__init__()

        self.metrics = {"num_inferences": 0, "avg_inference_time": 0.0}

        self.api_key = api_key if api_key else API_KEY

        self.task_type = "ocr"

        extra_weights_provider_headers = get_extra_weights_provider_headers()

        self._model: EasyOCRTorch = AutoModel.from_pretrained(
            model_id_or_path=model_id,
            api_key=self.api_key,
            allow_untrusted_packages=ALLOW_INFERENCE_MODELS_UNTRUSTED_PACKAGES,
            allow_direct_local_storage_loading=ALLOW_INFERENCE_MODELS_DIRECTLY_ACCESS_LOCAL_PACKAGES,
            extra_weights_provider_headers=extra_weights_provider_headers,
            **kwargs,
        )

    def predict(self, image_in: np.ndarray, **kwargs) -> Tuple[str, Detections]:
        parsed_texts, parsed_structures = self._model.infer(images=image_in, **kwargs)
        parsed_text = parsed_texts[0]
        parsed_structure = parsed_structures[0]
        return parsed_text, parsed_structure

    def postprocess(
        self,
        predictions: Tuple[np.ndarray, ...],
        preprocess_return_metadata: PreprocessReturnMetadata,
        **kwargs,
    ) -> Any:
        return predictions, preprocess_return_metadata

    def preprocess(
        self, image: Any, **kwargs
    ) -> Tuple[np.ndarray, InferenceResponseImage]:
        image = load_image_bgr(image)
        return image, InferenceResponseImage(
            width=image.shape[1], height=image.shape[0]
        )

    def infer_from_request(
        self, request: EasyOCRInferenceRequest
    ) -> Union[OCRInferenceResponse, List]:
        if type(request.image) is list:
            response = []
            request_copy = copy.copy(request)
            for image in request.image:
                request_copy.image = image
                response.append(self.single_request(request=request_copy))
            return response
        return self.single_request(request)

    def single_request(self, request: EasyOCRInferenceRequest) -> OCRInferenceResponse:
        t1 = perf_counter()
        kwargs = request.dict()
        kwargs["confidence"] = 0.0
        prediction_result, image_metadata = self.infer(**kwargs)
        predictions_for_image = []
        for instance_id in range(prediction_result[1].xyxy.shape[0]):
            x_min, y_min, x_max, y_max = prediction_result[1].xyxy[instance_id].tolist()
            width = x_max - x_min
            height = y_max - y_min
            center_x = (x_min + x_max) / 2
            center_y = (y_min + y_max) / 2
            predictions_for_image.append(
                ObjectDetectionPrediction(
                    # Passing args as a dictionary here since one of the args is 'class' (a protected term in Python)
                    **{
                        "x": center_x,
                        "y": center_y,
                        "width": width,
                        "height": height,
                        "confidence": 1.0,  # confidence is not returned by the model
                        "class": prediction_result[1].bboxes_metadata[instance_id][
                            "text"
                        ],
                        "class_id": 0,  # you can only prompt for one object at once
                        "detection_id": str(uuid.uuid4()),
                    }
                )
            )
        return OCRInferenceResponse(
            result=prediction_result[0],
            image=image_metadata,
            predictions=predictions_for_image,
            time=perf_counter() - t1,
        )