Skip to content

Doctr model

DocTR

Bases: RoboflowCoreModel

Source code in inference/models/doctr/doctr_model.py
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
class DocTR(RoboflowCoreModel):
    def __init__(self, *args, model_id: str = "doctr_rec/crnn_vgg16_bn", **kwargs):
        """Initializes the DocTR model.

        Args:
            *args: Variable length argument list.
            **kwargs: Arbitrary keyword arguments.
        """
        self.api_key = kwargs.get("api_key")
        self.dataset_id = "doctr"
        self.version_id = "default"
        self.endpoint = model_id
        model_id = model_id.lower()

        os.environ["DOCTR_CACHE_DIR"] = os.path.join(MODEL_CACHE_DIR, "doctr_rec")

        self.det_model = DocTRDet(api_key=kwargs.get("api_key"))
        self.rec_model = DocTRRec(api_key=kwargs.get("api_key"))

        os.makedirs(f"{MODEL_CACHE_DIR}/doctr_rec/models/", exist_ok=True)
        os.makedirs(f"{MODEL_CACHE_DIR}/doctr_det/models/", exist_ok=True)

        shutil.copyfile(
            f"{MODEL_CACHE_DIR}/doctr_det/db_resnet50/model.pt",
            f"{MODEL_CACHE_DIR}/doctr_det/models/db_resnet50-ac60cadc.pt",
        )
        shutil.copyfile(
            f"{MODEL_CACHE_DIR}/doctr_rec/crnn_vgg16_bn/model.pt",
            f"{MODEL_CACHE_DIR}/doctr_rec/models/crnn_vgg16_bn-9762b0b0.pt",
        )

        self.model = ocr_predictor(
            det_arch=self.det_model.version_id,
            reco_arch=self.rec_model.version_id,
            pretrained=True,
        )
        self.task_type = "ocr"

    def clear_cache(self) -> None:
        self.det_model.clear_cache()
        self.rec_model.clear_cache()

    def preprocess_image(self, image: Image.Image) -> Image.Image:
        """
        DocTR pre-processes images as part of its inference pipeline.

        Thus, no preprocessing is required here.
        """
        pass

    def infer_from_request(
        self, request: DoctrOCRInferenceRequest
    ) -> OCRInferenceResponse:
        t1 = perf_counter()
        result = self.infer(**request.dict())
        return OCRInferenceResponse(
            result=result,
            time=perf_counter() - t1,
        )

    def infer(self, image: Any, **kwargs):
        """
        Run inference on a provided image.
            - image: can be a BGR numpy array, filepath, InferenceRequestImage, PIL Image, byte-string, etc.

        Args:
            request (DoctrOCRInferenceRequest): The inference request.

        Returns:
            OCRInferenceResponse: The inference response.
        """

        img = load_image(image)

        with tempfile.NamedTemporaryFile(suffix=".jpg") as f:
            image = Image.fromarray(img[0])

            image.save(f.name)

            doc = DocumentFile.from_images([f.name])

            result = self.model(doc).export()

            result = result["pages"][0]["blocks"]

            result = [
                " ".join([word["value"] for word in line["words"]])
                for block in result
                for line in block["lines"]
            ]

            result = " ".join(result)

            return result

    def get_infer_bucket_file_list(self) -> list:
        """Get the list of required files for inference.

        Returns:
            list: A list of required files for inference, e.g., ["model.pt"].
        """
        return ["model.pt"]

__init__(*args, model_id='doctr_rec/crnn_vgg16_bn', **kwargs)

Initializes the DocTR model.

Parameters:

Name Type Description Default
*args

Variable length argument list.

()
**kwargs

Arbitrary keyword arguments.

{}
Source code in inference/models/doctr/doctr_model.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
def __init__(self, *args, model_id: str = "doctr_rec/crnn_vgg16_bn", **kwargs):
    """Initializes the DocTR model.

    Args:
        *args: Variable length argument list.
        **kwargs: Arbitrary keyword arguments.
    """
    self.api_key = kwargs.get("api_key")
    self.dataset_id = "doctr"
    self.version_id = "default"
    self.endpoint = model_id
    model_id = model_id.lower()

    os.environ["DOCTR_CACHE_DIR"] = os.path.join(MODEL_CACHE_DIR, "doctr_rec")

    self.det_model = DocTRDet(api_key=kwargs.get("api_key"))
    self.rec_model = DocTRRec(api_key=kwargs.get("api_key"))

    os.makedirs(f"{MODEL_CACHE_DIR}/doctr_rec/models/", exist_ok=True)
    os.makedirs(f"{MODEL_CACHE_DIR}/doctr_det/models/", exist_ok=True)

    shutil.copyfile(
        f"{MODEL_CACHE_DIR}/doctr_det/db_resnet50/model.pt",
        f"{MODEL_CACHE_DIR}/doctr_det/models/db_resnet50-ac60cadc.pt",
    )
    shutil.copyfile(
        f"{MODEL_CACHE_DIR}/doctr_rec/crnn_vgg16_bn/model.pt",
        f"{MODEL_CACHE_DIR}/doctr_rec/models/crnn_vgg16_bn-9762b0b0.pt",
    )

    self.model = ocr_predictor(
        det_arch=self.det_model.version_id,
        reco_arch=self.rec_model.version_id,
        pretrained=True,
    )
    self.task_type = "ocr"

get_infer_bucket_file_list()

Get the list of required files for inference.

Returns:

Name Type Description
list list

A list of required files for inference, e.g., ["model.pt"].

Source code in inference/models/doctr/doctr_model.py
116
117
118
119
120
121
122
def get_infer_bucket_file_list(self) -> list:
    """Get the list of required files for inference.

    Returns:
        list: A list of required files for inference, e.g., ["model.pt"].
    """
    return ["model.pt"]

infer(image, **kwargs)

Run inference on a provided image. - image: can be a BGR numpy array, filepath, InferenceRequestImage, PIL Image, byte-string, etc.

Parameters:

Name Type Description Default
request DoctrOCRInferenceRequest

The inference request.

required

Returns:

Name Type Description
OCRInferenceResponse

The inference response.

Source code in inference/models/doctr/doctr_model.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def infer(self, image: Any, **kwargs):
    """
    Run inference on a provided image.
        - image: can be a BGR numpy array, filepath, InferenceRequestImage, PIL Image, byte-string, etc.

    Args:
        request (DoctrOCRInferenceRequest): The inference request.

    Returns:
        OCRInferenceResponse: The inference response.
    """

    img = load_image(image)

    with tempfile.NamedTemporaryFile(suffix=".jpg") as f:
        image = Image.fromarray(img[0])

        image.save(f.name)

        doc = DocumentFile.from_images([f.name])

        result = self.model(doc).export()

        result = result["pages"][0]["blocks"]

        result = [
            " ".join([word["value"] for word in line["words"]])
            for block in result
            for line in block["lines"]
        ]

        result = " ".join(result)

        return result

preprocess_image(image)

DocTR pre-processes images as part of its inference pipeline.

Thus, no preprocessing is required here.

Source code in inference/models/doctr/doctr_model.py
63
64
65
66
67
68
69
def preprocess_image(self, image: Image.Image) -> Image.Image:
    """
    DocTR pre-processes images as part of its inference pipeline.

    Thus, no preprocessing is required here.
    """
    pass

DocTRDet

Bases: RoboflowCoreModel

DocTR class for document Optical Character Recognition (OCR).

Attributes:

Name Type Description
doctr

The DocTR model.

ort_session

ONNX runtime inference session.

Source code in inference/models/doctr/doctr_model.py
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
class DocTRDet(RoboflowCoreModel):
    """DocTR class for document Optical Character Recognition (OCR).

    Attributes:
        doctr: The DocTR model.
        ort_session: ONNX runtime inference session.
    """

    def __init__(self, *args, model_id: str = "doctr_det/db_resnet50", **kwargs):
        """Initializes the DocTR model.

        Args:
            *args: Variable length argument list.
            **kwargs: Arbitrary keyword arguments.
        """

        self.get_infer_bucket_file_list()

        super().__init__(*args, model_id=model_id, **kwargs)

    def get_infer_bucket_file_list(self) -> list:
        """Get the list of required files for inference.

        Returns:
            list: A list of required files for inference, e.g., ["model.pt"].
        """
        return ["model.pt"]

__init__(*args, model_id='doctr_det/db_resnet50', **kwargs)

Initializes the DocTR model.

Parameters:

Name Type Description Default
*args

Variable length argument list.

()
**kwargs

Arbitrary keyword arguments.

{}
Source code in inference/models/doctr/doctr_model.py
156
157
158
159
160
161
162
163
164
165
166
def __init__(self, *args, model_id: str = "doctr_det/db_resnet50", **kwargs):
    """Initializes the DocTR model.

    Args:
        *args: Variable length argument list.
        **kwargs: Arbitrary keyword arguments.
    """

    self.get_infer_bucket_file_list()

    super().__init__(*args, model_id=model_id, **kwargs)

get_infer_bucket_file_list()

Get the list of required files for inference.

Returns:

Name Type Description
list list

A list of required files for inference, e.g., ["model.pt"].

Source code in inference/models/doctr/doctr_model.py
168
169
170
171
172
173
174
def get_infer_bucket_file_list(self) -> list:
    """Get the list of required files for inference.

    Returns:
        list: A list of required files for inference, e.g., ["model.pt"].
    """
    return ["model.pt"]

DocTRRec

Bases: RoboflowCoreModel

Source code in inference/models/doctr/doctr_model.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
class DocTRRec(RoboflowCoreModel):
    def __init__(self, *args, model_id: str = "doctr_rec/crnn_vgg16_bn", **kwargs):
        """Initializes the DocTR model.

        Args:
            *args: Variable length argument list.
            **kwargs: Arbitrary keyword arguments.
        """
        pass

        self.get_infer_bucket_file_list()

        super().__init__(*args, model_id=model_id, **kwargs)

    def get_infer_bucket_file_list(self) -> list:
        """Get the list of required files for inference.

        Returns:
            list: A list of required files for inference, e.g., ["model.pt"].
        """
        return ["model.pt"]

__init__(*args, model_id='doctr_rec/crnn_vgg16_bn', **kwargs)

Initializes the DocTR model.

Parameters:

Name Type Description Default
*args

Variable length argument list.

()
**kwargs

Arbitrary keyword arguments.

{}
Source code in inference/models/doctr/doctr_model.py
126
127
128
129
130
131
132
133
134
135
136
137
def __init__(self, *args, model_id: str = "doctr_rec/crnn_vgg16_bn", **kwargs):
    """Initializes the DocTR model.

    Args:
        *args: Variable length argument list.
        **kwargs: Arbitrary keyword arguments.
    """
    pass

    self.get_infer_bucket_file_list()

    super().__init__(*args, model_id=model_id, **kwargs)

get_infer_bucket_file_list()

Get the list of required files for inference.

Returns:

Name Type Description
list list

A list of required files for inference, e.g., ["model.pt"].

Source code in inference/models/doctr/doctr_model.py
139
140
141
142
143
144
145
def get_infer_bucket_file_list(self) -> list:
    """Get the list of required files for inference.

    Returns:
        list: A list of required files for inference, e.g., ["model.pt"].
    """
    return ["model.pt"]