Skip to content

Doctr model

DocTR

Bases: RoboflowCoreModel

Source code in inference/models/doctr/doctr_model.py
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
class DocTR(RoboflowCoreModel):
    def __init__(self, *args, model_id: str = "doctr_rec/crnn_vgg16_bn", **kwargs):
        """Initializes the DocTR model.

        Args:
            *args: Variable length argument list.
            **kwargs: Arbitrary keyword arguments.
        """
        self.api_key = kwargs.get("api_key")
        self.dataset_id = "doctr"
        self.version_id = "default"
        self.endpoint = model_id
        model_id = model_id.lower()

        os.environ["DOCTR_CACHE_DIR"] = os.path.join(MODEL_CACHE_DIR, "doctr_rec")

        self.det_model = DocTRDet(api_key=kwargs.get("api_key"))
        self.rec_model = DocTRRec(api_key=kwargs.get("api_key"))

        os.makedirs(f"{MODEL_CACHE_DIR}/doctr_rec/models/", exist_ok=True)
        os.makedirs(f"{MODEL_CACHE_DIR}/doctr_det/models/", exist_ok=True)

        shutil.copyfile(
            f"{MODEL_CACHE_DIR}/doctr_det/db_resnet50/model.pt",
            f"{MODEL_CACHE_DIR}/doctr_det/models/db_resnet50-ac60cadc.pt",
        )
        shutil.copyfile(
            f"{MODEL_CACHE_DIR}/doctr_rec/crnn_vgg16_bn/model.pt",
            f"{MODEL_CACHE_DIR}/doctr_rec/models/crnn_vgg16_bn-9762b0b0.pt",
        )

        self.model = ocr_predictor(
            det_arch=self.det_model.version_id,
            reco_arch=self.rec_model.version_id,
            pretrained=True,
        )
        self.task_type = "ocr"

    def clear_cache(self, delete_from_disk: bool = True) -> None:
        self.det_model.clear_cache(delete_from_disk=delete_from_disk)
        self.rec_model.clear_cache(delete_from_disk=delete_from_disk)

    def preprocess_image(self, image: Image.Image) -> Image.Image:
        """
        DocTR pre-processes images as part of its inference pipeline.

        Thus, no preprocessing is required here.
        """
        pass

    def infer_from_request(
        self, request: DoctrOCRInferenceRequest
    ) -> OCRInferenceResponse:
        t1 = perf_counter()
        result = self.infer(**request.dict())
        return OCRInferenceResponse(
            result=result,
            time=perf_counter() - t1,
        )

    def infer(self, image: Any, **kwargs):
        """
        Run inference on a provided image.
            - image: can be a BGR numpy array, filepath, InferenceRequestImage, PIL Image, byte-string, etc.

        Args:
            request (DoctrOCRInferenceRequest): The inference request.

        Returns:
            OCRInferenceResponse: The inference response.
        """

        img = load_image(image)

        with tempfile.NamedTemporaryFile(suffix=".jpg") as f:
            image = Image.fromarray(img[0])

            image.save(f.name)

            doc = DocumentFile.from_images([f.name])

            result = self.model(doc).export()

            result = result["pages"][0]["blocks"]

            result = [
                " ".join([word["value"] for word in line["words"]])
                for block in result
                for line in block["lines"]
            ]

            result = " ".join(result)

            return result

    def get_infer_bucket_file_list(self) -> list:
        """Get the list of required files for inference.

        Returns:
            list: A list of required files for inference, e.g., ["model.pt"].
        """
        return ["model.pt"]

__init__(*args, model_id='doctr_rec/crnn_vgg16_bn', **kwargs)

Initializes the DocTR model.

Parameters:

Name Type Description Default
*args

Variable length argument list.

()
**kwargs

Arbitrary keyword arguments.

{}
Source code in inference/models/doctr/doctr_model.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
def __init__(self, *args, model_id: str = "doctr_rec/crnn_vgg16_bn", **kwargs):
    """Initializes the DocTR model.

    Args:
        *args: Variable length argument list.
        **kwargs: Arbitrary keyword arguments.
    """
    self.api_key = kwargs.get("api_key")
    self.dataset_id = "doctr"
    self.version_id = "default"
    self.endpoint = model_id
    model_id = model_id.lower()

    os.environ["DOCTR_CACHE_DIR"] = os.path.join(MODEL_CACHE_DIR, "doctr_rec")

    self.det_model = DocTRDet(api_key=kwargs.get("api_key"))
    self.rec_model = DocTRRec(api_key=kwargs.get("api_key"))

    os.makedirs(f"{MODEL_CACHE_DIR}/doctr_rec/models/", exist_ok=True)
    os.makedirs(f"{MODEL_CACHE_DIR}/doctr_det/models/", exist_ok=True)

    shutil.copyfile(
        f"{MODEL_CACHE_DIR}/doctr_det/db_resnet50/model.pt",
        f"{MODEL_CACHE_DIR}/doctr_det/models/db_resnet50-ac60cadc.pt",
    )
    shutil.copyfile(
        f"{MODEL_CACHE_DIR}/doctr_rec/crnn_vgg16_bn/model.pt",
        f"{MODEL_CACHE_DIR}/doctr_rec/models/crnn_vgg16_bn-9762b0b0.pt",
    )

    self.model = ocr_predictor(
        det_arch=self.det_model.version_id,
        reco_arch=self.rec_model.version_id,
        pretrained=True,
    )
    self.task_type = "ocr"

get_infer_bucket_file_list()

Get the list of required files for inference.

Returns:

Name Type Description
list list

A list of required files for inference, e.g., ["model.pt"].

Source code in inference/models/doctr/doctr_model.py
116
117
118
119
120
121
122
def get_infer_bucket_file_list(self) -> list:
    """Get the list of required files for inference.

    Returns:
        list: A list of required files for inference, e.g., ["model.pt"].
    """
    return ["model.pt"]

infer(image, **kwargs)

Run inference on a provided image. - image: can be a BGR numpy array, filepath, InferenceRequestImage, PIL Image, byte-string, etc.

Parameters:

Name Type Description Default
request DoctrOCRInferenceRequest

The inference request.

required

Returns:

Name Type Description
OCRInferenceResponse

The inference response.

Source code in inference/models/doctr/doctr_model.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def infer(self, image: Any, **kwargs):
    """
    Run inference on a provided image.
        - image: can be a BGR numpy array, filepath, InferenceRequestImage, PIL Image, byte-string, etc.

    Args:
        request (DoctrOCRInferenceRequest): The inference request.

    Returns:
        OCRInferenceResponse: The inference response.
    """

    img = load_image(image)

    with tempfile.NamedTemporaryFile(suffix=".jpg") as f:
        image = Image.fromarray(img[0])

        image.save(f.name)

        doc = DocumentFile.from_images([f.name])

        result = self.model(doc).export()

        result = result["pages"][0]["blocks"]

        result = [
            " ".join([word["value"] for word in line["words"]])
            for block in result
            for line in block["lines"]
        ]

        result = " ".join(result)

        return result

preprocess_image(image)

DocTR pre-processes images as part of its inference pipeline.

Thus, no preprocessing is required here.

Source code in inference/models/doctr/doctr_model.py
63
64
65
66
67
68
69
def preprocess_image(self, image: Image.Image) -> Image.Image:
    """
    DocTR pre-processes images as part of its inference pipeline.

    Thus, no preprocessing is required here.
    """
    pass

DocTRDet

Bases: RoboflowCoreModel

DocTR class for document Optical Character Recognition (OCR).

Attributes:

Name Type Description
doctr

The DocTR model.

ort_session

ONNX runtime inference session.

Source code in inference/models/doctr/doctr_model.py
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
class DocTRDet(RoboflowCoreModel):
    """DocTR class for document Optical Character Recognition (OCR).

    Attributes:
        doctr: The DocTR model.
        ort_session: ONNX runtime inference session.
    """

    def __init__(self, *args, model_id: str = "doctr_det/db_resnet50", **kwargs):
        """Initializes the DocTR model.

        Args:
            *args: Variable length argument list.
            **kwargs: Arbitrary keyword arguments.
        """

        self.get_infer_bucket_file_list()

        super().__init__(*args, model_id=model_id, **kwargs)

    def clear_cache(self, delete_from_disk: bool = True) -> None:
        super().clear_cache(delete_from_disk=delete_from_disk)

    def get_infer_bucket_file_list(self) -> list:
        """Get the list of required files for inference.

        Returns:
            list: A list of required files for inference, e.g., ["model.pt"].
        """
        return ["model.pt"]

__init__(*args, model_id='doctr_det/db_resnet50', **kwargs)

Initializes the DocTR model.

Parameters:

Name Type Description Default
*args

Variable length argument list.

()
**kwargs

Arbitrary keyword arguments.

{}
Source code in inference/models/doctr/doctr_model.py
157
158
159
160
161
162
163
164
165
166
167
def __init__(self, *args, model_id: str = "doctr_det/db_resnet50", **kwargs):
    """Initializes the DocTR model.

    Args:
        *args: Variable length argument list.
        **kwargs: Arbitrary keyword arguments.
    """

    self.get_infer_bucket_file_list()

    super().__init__(*args, model_id=model_id, **kwargs)

get_infer_bucket_file_list()

Get the list of required files for inference.

Returns:

Name Type Description
list list

A list of required files for inference, e.g., ["model.pt"].

Source code in inference/models/doctr/doctr_model.py
172
173
174
175
176
177
178
def get_infer_bucket_file_list(self) -> list:
    """Get the list of required files for inference.

    Returns:
        list: A list of required files for inference, e.g., ["model.pt"].
    """
    return ["model.pt"]

DocTRRec

Bases: RoboflowCoreModel

Source code in inference/models/doctr/doctr_model.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
class DocTRRec(RoboflowCoreModel):
    def __init__(self, *args, model_id: str = "doctr_rec/crnn_vgg16_bn", **kwargs):
        """Initializes the DocTR model.

        Args:
            *args: Variable length argument list.
            **kwargs: Arbitrary keyword arguments.
        """
        self.get_infer_bucket_file_list()

        super().__init__(*args, model_id=model_id, **kwargs)

    def clear_cache(self, delete_from_disk: bool = True) -> None:
        super().clear_cache(delete_from_disk=delete_from_disk)

    def get_infer_bucket_file_list(self) -> list:
        """Get the list of required files for inference.

        Returns:
            list: A list of required files for inference, e.g., ["model.pt"].
        """
        return ["model.pt"]

__init__(*args, model_id='doctr_rec/crnn_vgg16_bn', **kwargs)

Initializes the DocTR model.

Parameters:

Name Type Description Default
*args

Variable length argument list.

()
**kwargs

Arbitrary keyword arguments.

{}
Source code in inference/models/doctr/doctr_model.py
126
127
128
129
130
131
132
133
134
135
def __init__(self, *args, model_id: str = "doctr_rec/crnn_vgg16_bn", **kwargs):
    """Initializes the DocTR model.

    Args:
        *args: Variable length argument list.
        **kwargs: Arbitrary keyword arguments.
    """
    self.get_infer_bucket_file_list()

    super().__init__(*args, model_id=model_id, **kwargs)

get_infer_bucket_file_list()

Get the list of required files for inference.

Returns:

Name Type Description
list list

A list of required files for inference, e.g., ["model.pt"].

Source code in inference/models/doctr/doctr_model.py
140
141
142
143
144
145
146
def get_infer_bucket_file_list(self) -> list:
    """Get the list of required files for inference.

    Returns:
        list: A list of required files for inference, e.g., ["model.pt"].
    """
    return ["model.pt"]