Roboflow defined HTTP interface for a general-purpose inference server.
This class sets up the FastAPI application and adds necessary middleware,
as well as initializes the model manager and model registry for the inference server.
classHttpInterface(BaseInterface):"""Roboflow defined HTTP interface for a general-purpose inference server. This class sets up the FastAPI application and adds necessary middleware, as well as initializes the model manager and model registry for the inference server. Attributes: app (FastAPI): The FastAPI application instance. model_manager (ModelManager): The manager for handling different models. """def__init__(self,model_manager:ModelManager,root_path:Optional[str]=None,):""" Initializes the HttpInterface with given model manager and model registry. Args: model_manager (ModelManager): The manager for handling different models. root_path (Optional[str]): The root path for the FastAPI application. Description: Deploy Roboflow trained models to nearly any compute environment! """description="Roboflow inference server"app=FastAPI(title="Roboflow Inference Server",description=description,version=__version__,terms_of_service="https://roboflow.com/terms",contact={"name":"Roboflow Inc.","url":"https://roboflow.com/contact","email":"help@roboflow.com",},license_info={"name":"Apache 2.0","url":"https://www.apache.org/licenses/LICENSE-2.0.html",},root_path=root_path,)app.mount("/static",StaticFiles(directory="./inference/landing/out/static",html=True),name="static",)app.mount("/_next/static",StaticFiles(directory="./inference/landing/out/_next/static",html=True),name="_next_static",)@app.on_event("shutdown")asyncdefon_shutdown():logger.info("Shutting down %s",description)awaitusage_collector.async_push_usage_payloads()ifENABLE_PROMETHEUS:InferenceInstrumentator(app,model_manager=model_manager,endpoint="/metrics")ifMETLO_KEY:app.add_middleware(ASGIMiddleware,host="https://app.metlo.com",api_key=METLO_KEY)ifLAMBDA:app.add_middleware(LambdaMiddleware)iflen(ALLOW_ORIGINS)>0:# Add CORS Middleware (but not for /build**, which is controlled separately)app.add_middleware(PathAwareCORSMiddleware,match_paths=r"^(?!/build).*",allow_origins=ALLOW_ORIGINS,allow_credentials=True,allow_methods=["*"],allow_headers=["*"],)# Optionally add middleware for profiling the FastAPI server and underlying inference API codeifPROFILE:app.add_middleware(CProfileMiddleware,enable=True,server_app=app,filename="/profile/output.pstats",strip_dirs=False,sort_by="cumulative",)app.add_middleware(asgi_correlation_id.CorrelationIdMiddleware)ifMETRICS_ENABLED:@app.middleware("http")asyncdefcount_errors(request:Request,call_next):"""Middleware to count errors. Args: request (Request): The incoming request. call_next (Callable): The next middleware or endpoint to call. Returns: Response: The response from the next middleware or endpoint. """response=awaitcall_next(request)ifself.model_manager.pingbackandresponse.status_code>=400:self.model_manager.num_errors+=1returnresponseifnot(LAMBDAorGCP_SERVERLESS):@app.get("/device/stats")asyncdefdevice_stats():not_configured_error_message={"error":"Device statistics endpoint is not enabled.","hint":"Mount the Docker socket and point its location when running the docker ""container to collect device stats ""(i.e. `docker run ... -v /var/run/docker.sock:/var/run/docker.sock ""-e DOCKER_SOCKET_PATH=/var/run/docker.sock ...`).",}ifnotDOCKER_SOCKET_PATH:returnJSONResponse(status_code=404,content=not_configured_error_message,)ifnotis_docker_socket_mounted(docker_socket_path=DOCKER_SOCKET_PATH):returnJSONResponse(status_code=500,content=not_configured_error_message,)container_stats=get_container_stats(docker_socket_path=DOCKER_SOCKET_PATH)returnJSONResponse(status_code=200,content=container_stats)ifDEDICATED_DEPLOYMENT_WORKSPACE_URL:cached_api_keys=dict()@app.middleware("http")asyncdefcheck_authorization(request:Request,call_next):# exclusionsskip_check=(request.methodnotin["GET","POST"]orrequest.url.pathin["/","/docs","/redoc","/info","/openapi.json",# needed for /docs and /redoc"/workflows/blocks/describe","/workflows/definition/schema",]orrequest.url.path.startswith("/static/")orrequest.url.path.startswith("/_next/"))ifskip_check:returnawaitcall_next(request)def_unauthorized_response(msg):returnJSONResponse(status_code=401,content={"status":401,"message":msg,},)# check api_keyreq_params=request.query_paramsjson_params=dict()if(request.headers.get("content-type",None)=="application/json"andint(request.headers.get("content-length",0))>0):json_params=awaitrequest.json()api_key=req_params.get("api_key",None)orjson_params.get("api_key",None)ifcached_api_keys.get(api_key,0)<time.time():try:workspace_url=(get_roboflow_workspace(api_key)ifapi_keyisnotNoneelseNone)ifworkspace_url!=DEDICATED_DEPLOYMENT_WORKSPACE_URL:return_unauthorized_response("Unauthorized api_key")cached_api_keys[api_key]=(time.time()+3600)# expired after 1 hourexceptRoboflowAPINotAuthorizedErrorase:return_unauthorized_response("Unauthorized api_key")returnawaitcall_next(request)self.app=appself.model_manager=model_managerself.stream_manager_client:Optional[StreamManagerClient]=NoneifENABLE_STREAM_API:operations_timeout=os.getenv("STREAM_MANAGER_OPERATIONS_TIMEOUT")ifoperations_timeoutisnotNone:operations_timeout=float(operations_timeout)self.stream_manager_client=StreamManagerClient.init(host=os.getenv("STREAM_MANAGER_HOST","127.0.0.1"),port=int(os.getenv("STREAM_MANAGER_PORT","7070")),operations_timeout=operations_timeout,)asyncdefprocess_inference_request(inference_request:InferenceRequest,**kwargs)->InferenceResponse:"""Processes an inference request by calling the appropriate model. Args: inference_request (InferenceRequest): The request containing model ID and other inference details. Returns: InferenceResponse: The response containing the inference results. """de_aliased_model_id=resolve_roboflow_model_alias(model_id=inference_request.model_id)self.model_manager.add_model(de_aliased_model_id,inference_request.api_key)resp=awaitself.model_manager.infer_from_request(de_aliased_model_id,inference_request,**kwargs)returnorjson_response(resp)defprocess_workflow_inference_request(workflow_request:WorkflowInferenceRequest,workflow_specification:dict,background_tasks:Optional[BackgroundTasks],profiler:WorkflowsProfiler,)->WorkflowInferenceResponse:workflow_init_parameters={"workflows_core.model_manager":model_manager,"workflows_core.api_key":workflow_request.api_key,"workflows_core.background_tasks":background_tasks,}execution_engine=ExecutionEngine.init(workflow_definition=workflow_specification,init_parameters=workflow_init_parameters,max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS,prevent_local_images_loading=True,profiler=profiler,workflow_id=workflow_request.workflow_id,)is_preview=Falseifhasattr(workflow_request,"is_preview"):is_preview=workflow_request.is_previewworkflow_results=execution_engine.run(runtime_parameters=workflow_request.inputs,serialize_results=True,_is_preview=is_preview,)withprofiler.profile_execution_phase(name="workflow_results_filtering",categories=["inference_package_operation"],):outputs=filter_out_unwanted_workflow_outputs(workflow_results=workflow_results,excluded_fields=workflow_request.excluded_fields,)profiler_trace=profiler.export_trace()response=WorkflowInferenceResponse(outputs=outputs,profiler_trace=profiler_trace,)returnorjson_response(response=response)defload_core_model(inference_request:InferenceRequest,api_key:Optional[str]=None,core_model:str=None,)->None:"""Loads a core model (e.g., "clip" or "sam") into the model manager. Args: inference_request (InferenceRequest): The request containing version and other details. api_key (Optional[str]): The API key for the request. core_model (str): The core model type, e.g., "clip" or "sam". Returns: str: The core model ID. """ifapi_key:inference_request.api_key=api_keyversion_id_field=f"{core_model}_version_id"core_model_id=(f"{core_model}/{inference_request.__getattribute__(version_id_field)}")self.model_manager.add_model(core_model_id,inference_request.api_key)returncore_model_idload_clip_model=partial(load_core_model,core_model="clip")"""Loads the CLIP model into the model manager. Args: inference_request: The request containing version and other details. api_key: The API key for the request. Returns: The CLIP model ID. """load_sam_model=partial(load_core_model,core_model="sam")"""Loads the SAM model into the model manager. Args: inference_request: The request containing version and other details. api_key: The API key for the request. Returns: The SAM model ID. """load_sam2_model=partial(load_core_model,core_model="sam2")"""Loads the SAM2 model into the model manager. Args: inference_request: The request containing version and other details. api_key: The API key for the request. Returns: The SAM2 model ID. """load_gaze_model=partial(load_core_model,core_model="gaze")"""Loads the GAZE model into the model manager. Args: inference_request: The request containing version and other details. api_key: The API key for the request. Returns: The GAZE model ID. """load_doctr_model=partial(load_core_model,core_model="doctr")"""Loads the DocTR model into the model manager. Args: inference_request: The request containing version and other details. api_key: The API key for the request. Returns: The DocTR model ID. """load_paligemma_model=partial(load_core_model,core_model="paligemma")load_grounding_dino_model=partial(load_core_model,core_model="grounding_dino")"""Loads the Grounding DINO model into the model manager. Args: inference_request: The request containing version and other details. api_key: The API key for the request. Returns: The Grounding DINO model ID. """load_yolo_world_model=partial(load_core_model,core_model="yolo_world")load_owlv2_model=partial(load_core_model,core_model="owlv2")"""Loads the YOLO World model into the model manager. Args: inference_request: The request containing version and other details. api_key: The API key for the request. Returns: The YOLO World model ID. """load_trocr_model=partial(load_core_model,core_model="trocr")"""Loads the TrOCR model into the model manager. Args: inference_request: The request containing version and other details. api_key: The API key for the request. Returns: The TrOCR model ID. """@app.get("/info",response_model=ServerVersionInfo,summary="Info",description="Get the server name and version number",)asyncdefroot():"""Endpoint to get the server name and version number. Returns: ServerVersionInfo: The server version information. """returnServerVersionInfo(name="Roboflow Inference Server",version=__version__,uuid=GLOBAL_INFERENCE_SERVER_ID,)# The current AWS Lambda authorizer only supports path parameters, therefore we can only use the legacy infer route. This case statement excludes routes which won't work for the current Lambda authorizer.ifnot(LAMBDAorGCP_SERVERLESS):@app.get("/model/registry",response_model=ModelsDescriptions,summary="Get model keys",description="Get the ID of each loaded model",)asyncdefregistry():"""Get the ID of each loaded model in the registry. Returns: ModelsDescriptions: The object containing models descriptions """logger.debug(f"Reached /model/registry")models_descriptions=self.model_manager.describe_models()returnModelsDescriptions.from_models_descriptions(models_descriptions=models_descriptions)@app.post("/model/add",response_model=ModelsDescriptions,summary="Load a model",description="Load the model with the given model ID",)@with_route_exceptionsasyncdefmodel_add(request:AddModelRequest):"""Load the model with the given model ID into the model manager. Args: request (AddModelRequest): The request containing the model ID and optional API key. Returns: ModelsDescriptions: The object containing models descriptions """logger.debug(f"Reached /model/add")de_aliased_model_id=resolve_roboflow_model_alias(model_id=request.model_id)logger.info(f"Loading model: {de_aliased_model_id}")self.model_manager.add_model(de_aliased_model_id,request.api_key)models_descriptions=self.model_manager.describe_models()returnModelsDescriptions.from_models_descriptions(models_descriptions=models_descriptions)@app.post("/model/remove",response_model=ModelsDescriptions,summary="Remove a model",description="Remove the model with the given model ID",)@with_route_exceptionsasyncdefmodel_remove(request:ClearModelRequest):"""Remove the model with the given model ID from the model manager. Args: request (ClearModelRequest): The request containing the model ID to be removed. Returns: ModelsDescriptions: The object containing models descriptions """logger.debug(f"Reached /model/remove")de_aliased_model_id=resolve_roboflow_model_alias(model_id=request.model_id)self.model_manager.remove(de_aliased_model_id)models_descriptions=self.model_manager.describe_models()returnModelsDescriptions.from_models_descriptions(models_descriptions=models_descriptions)@app.post("/model/clear",response_model=ModelsDescriptions,summary="Remove all models",description="Remove all loaded models",)@with_route_exceptionsasyncdefmodel_clear():"""Remove all loaded models from the model manager. Returns: ModelsDescriptions: The object containing models descriptions """logger.debug(f"Reached /model/clear")self.model_manager.clear()models_descriptions=self.model_manager.describe_models()returnModelsDescriptions.from_models_descriptions(models_descriptions=models_descriptions)# these NEW endpoints need authentication protectionifnotLAMBDAandnotGCP_SERVERLESS:@app.post("/infer/object_detection",response_model=Union[ObjectDetectionInferenceResponse,List[ObjectDetectionInferenceResponse],StubResponse,],summary="Object detection infer",description="Run inference with the specified object detection model",response_model_exclude_none=True,)@with_route_exceptions@usage_collector("request")asyncdefinfer_object_detection(inference_request:ObjectDetectionInferenceRequest,background_tasks:BackgroundTasks,):"""Run inference with the specified object detection model. Args: inference_request (ObjectDetectionInferenceRequest): The request containing the necessary details for object detection. background_tasks: (BackgroundTasks) pool of fastapi background tasks Returns: Union[ObjectDetectionInferenceResponse, List[ObjectDetectionInferenceResponse]]: The response containing the inference results. """logger.debug(f"Reached /infer/object_detection")returnawaitprocess_inference_request(inference_request,active_learning_eligible=True,background_tasks=background_tasks,)@app.post("/infer/instance_segmentation",response_model=Union[InstanceSegmentationInferenceResponse,StubResponse],summary="Instance segmentation infer",description="Run inference with the specified instance segmentation model",)@with_route_exceptions@usage_collector("request")asyncdefinfer_instance_segmentation(inference_request:InstanceSegmentationInferenceRequest,background_tasks:BackgroundTasks,):"""Run inference with the specified instance segmentation model. Args: inference_request (InstanceSegmentationInferenceRequest): The request containing the necessary details for instance segmentation. background_tasks: (BackgroundTasks) pool of fastapi background tasks Returns: InstanceSegmentationInferenceResponse: The response containing the inference results. """logger.debug(f"Reached /infer/instance_segmentation")returnawaitprocess_inference_request(inference_request,active_learning_eligible=True,background_tasks=background_tasks,)@app.post("/infer/classification",response_model=Union[ClassificationInferenceResponse,MultiLabelClassificationInferenceResponse,StubResponse,],summary="Classification infer",description="Run inference with the specified classification model",)@with_route_exceptions@usage_collector("request")asyncdefinfer_classification(inference_request:ClassificationInferenceRequest,background_tasks:BackgroundTasks,):"""Run inference with the specified classification model. Args: inference_request (ClassificationInferenceRequest): The request containing the necessary details for classification. background_tasks: (BackgroundTasks) pool of fastapi background tasks Returns: Union[ClassificationInferenceResponse, MultiLabelClassificationInferenceResponse]: The response containing the inference results. """logger.debug(f"Reached /infer/classification")returnawaitprocess_inference_request(inference_request,active_learning_eligible=True,background_tasks=background_tasks,)@app.post("/infer/keypoints_detection",response_model=Union[KeypointsDetectionInferenceResponse,StubResponse],summary="Keypoints detection infer",description="Run inference with the specified keypoints detection model",)@with_route_exceptions@usage_collector("request")asyncdefinfer_keypoints(inference_request:KeypointsDetectionInferenceRequest,):"""Run inference with the specified keypoints detection model. Args: inference_request (KeypointsDetectionInferenceRequest): The request containing the necessary details for keypoints detection. Returns: Union[ClassificationInferenceResponse, MultiLabelClassificationInferenceResponse]: The response containing the inference results. """logger.debug(f"Reached /infer/keypoints_detection")returnawaitprocess_inference_request(inference_request)ifLMM_ENABLED:@app.post("/infer/lmm",response_model=Union[LMMInferenceResponse,List[LMMInferenceResponse],StubResponse,],summary="Large multi-modal model infer",description="Run inference with the specified large multi-modal model",response_model_exclude_none=True,)@with_route_exceptions@usage_collector("request")asyncdefinfer_lmm(inference_request:LMMInferenceRequest,):"""Run inference with the specified object detection model. Args: inference_request (ObjectDetectionInferenceRequest): The request containing the necessary details for object detection. background_tasks: (BackgroundTasks) pool of fastapi background tasks Returns: Union[ObjectDetectionInferenceResponse, List[ObjectDetectionInferenceResponse]]: The response containing the inference results. """logger.debug(f"Reached /infer/lmm")returnawaitprocess_inference_request(inference_request)ifnotDISABLE_WORKFLOW_ENDPOINTS:@app.post("/{workspace_name}/workflows/{workflow_id}/describe_interface",response_model=DescribeInterfaceResponse,summary="Endpoint to describe interface of predefined workflow",description="Checks Roboflow API for workflow definition, once acquired - describes workflow inputs and outputs",)@with_route_exceptionsasyncdefdescribe_predefined_workflow_interface(workspace_name:str,workflow_id:str,workflow_request:PredefinedWorkflowDescribeInterfaceRequest,)->DescribeInterfaceResponse:workflow_specification=get_workflow_specification(api_key=workflow_request.api_key,workspace_id=workspace_name,workflow_id=workflow_id,use_cache=workflow_request.use_cache,)returnhandle_describe_workflows_interface(definition=workflow_specification,)@app.post("/workflows/describe_interface",response_model=DescribeInterfaceResponse,summary="Endpoint to describe interface of workflow given in request",description="Parses workflow definition and retrieves describes inputs and outputs",)@with_route_exceptionsasyncdefdescribe_workflow_interface(workflow_request:WorkflowSpecificationDescribeInterfaceRequest,)->DescribeInterfaceResponse:returnhandle_describe_workflows_interface(definition=workflow_request.specification,)@app.post("/{workspace_name}/workflows/{workflow_id}",response_model=WorkflowInferenceResponse,summary="Endpoint to run predefined workflow",description="Checks Roboflow API for workflow definition, once acquired - parses and executes injecting runtime parameters from request body",)@app.post("/infer/workflows/{workspace_name}/{workflow_id}",response_model=WorkflowInferenceResponse,summary="[LEGACY] Endpoint to run predefined workflow",description="Checks Roboflow API for workflow definition, once acquired - parses and executes injecting runtime parameters from request body. This endpoint is deprecated and will be removed end of Q2 2024",deprecated=True,)@with_route_exceptions@usage_collector("request")asyncdefinfer_from_predefined_workflow(workspace_name:str,workflow_id:str,workflow_request:PredefinedWorkflowInferenceRequest,background_tasks:BackgroundTasks,)->WorkflowInferenceResponse:# TODO: get rid of async: https://github.com/roboflow/inference/issues/569ifENABLE_WORKFLOWS_PROFILINGandworkflow_request.enable_profiling:profiler=BaseWorkflowsProfiler.init(max_runs_in_buffer=WORKFLOWS_PROFILER_BUFFER_SIZE,)else:profiler=NullWorkflowsProfiler.init()withprofiler.profile_execution_phase(name="workflow_definition_fetching",categories=["inference_package_operation"],):workflow_specification=get_workflow_specification(api_key=workflow_request.api_key,workspace_id=workspace_name,workflow_id=workflow_id,use_cache=workflow_request.use_cache,)ifnotworkflow_request.workflow_id:workflow_request.workflow_id=workflow_idifnotworkflow_specification.get("id"):logger.warning("Internal workflow ID missing in specification for '%s'",workflow_id,)returnprocess_workflow_inference_request(workflow_request=workflow_request,workflow_specification=workflow_specification,background_tasks=(background_tasksifnot(LAMBDAorGCP_SERVERLESS)elseNone),profiler=profiler,)@app.post("/workflows/run",response_model=WorkflowInferenceResponse,summary="Endpoint to run workflow specification provided in payload",description="Parses and executes workflow specification, injecting runtime parameters from request body.",)@app.post("/infer/workflows",response_model=WorkflowInferenceResponse,summary="[LEGACY] Endpoint to run workflow specification provided in payload",description="Parses and executes workflow specification, injecting runtime parameters from request body. This endpoint is deprecated and will be removed end of Q2 2024.",deprecated=True,)@with_route_exceptions@usage_collector("request")asyncdefinfer_from_workflow(workflow_request:WorkflowSpecificationInferenceRequest,background_tasks:BackgroundTasks,)->WorkflowInferenceResponse:# TODO: get rid of async: https://github.com/roboflow/inference/issues/569ifENABLE_WORKFLOWS_PROFILINGandworkflow_request.enable_profiling:profiler=BaseWorkflowsProfiler.init(max_runs_in_buffer=WORKFLOWS_PROFILER_BUFFER_SIZE,)else:profiler=NullWorkflowsProfiler.init()returnprocess_workflow_inference_request(workflow_request=workflow_request,workflow_specification=workflow_request.specification,background_tasks=(background_tasksifnot(LAMBDAorGCP_SERVERLESS)elseNone),profiler=profiler,)@app.get("/workflows/execution_engine/versions",response_model=ExecutionEngineVersions,summary="Returns available Execution Engine versions sorted from oldest to newest",description="Returns available Execution Engine versions sorted from oldest to newest",)@with_route_exceptionsasyncdefget_execution_engine_versions()->ExecutionEngineVersions:# TODO: get rid of async: https://github.com/roboflow/inference/issues/569versions=get_available_versions()returnExecutionEngineVersions(versions=versions)@app.get("/workflows/blocks/describe",response_model=WorkflowsBlocksDescription,summary="[LEGACY] Endpoint to get definition of workflows blocks that are accessible",description="Endpoint provides detailed information about workflows building blocks that are ""accessible in the inference server. This information could be used to programmatically ""build / display workflows.",deprecated=True,)@with_route_exceptionsasyncdefdescribe_workflows_blocks(request:Request,)->Union[WorkflowsBlocksDescription,Response]:result=handle_describe_workflows_blocks_request()returngzip_response_if_requested(request=request,response=result)@app.post("/workflows/blocks/describe",response_model=WorkflowsBlocksDescription,summary="[EXPERIMENTAL] Endpoint to get definition of workflows blocks that are accessible",description="Endpoint provides detailed information about workflows building blocks that are ""accessible in the inference server. This information could be used to programmatically ""build / display workflows. Additionally - in request body one can specify list of ""dynamic blocks definitions which will be transformed into blocks and used to generate ""schemas and definitions of connections",)@with_route_exceptionsasyncdefdescribe_workflows_blocks(request:Request,request_payload:Optional[DescribeBlocksRequest]=None,)->Union[WorkflowsBlocksDescription,Response]:# TODO: get rid of async: https://github.com/roboflow/inference/issues/569dynamic_blocks_definitions=Nonerequested_execution_engine_version=Noneifrequest_payloadisnotNone:dynamic_blocks_definitions=(request_payload.dynamic_blocks_definitions)requested_execution_engine_version=(request_payload.execution_engine_version)result=handle_describe_workflows_blocks_request(dynamic_blocks_definitions=dynamic_blocks_definitions,requested_execution_engine_version=requested_execution_engine_version,)returngzip_response_if_requested(request=request,response=result)@app.get("/workflows/definition/schema",response_model=WorkflowsBlocksSchemaDescription,summary="Endpoint to fetch the workflows block schema",description="Endpoint to fetch the schema of all available blocks. This information can be ""used to validate workflow definitions and suggest syntax in the JSON editor.",)@with_route_exceptionsasyncdefget_workflow_schema()->WorkflowsBlocksSchemaDescription:returnget_workflow_schema_description()@app.post("/workflows/blocks/dynamic_outputs",response_model=List[OutputDefinition],summary="[EXPERIMENTAL] Endpoint to get definition of dynamic output for workflow step",description="Endpoint to be used when step outputs can be discovered only after ""filling manifest with data.",)@with_route_exceptionsasyncdefget_dynamic_block_outputs(step_manifest:Dict[str,Any],)->List[OutputDefinition]:# TODO: get rid of async: https://github.com/roboflow/inference/issues/569# Potentially TODO: dynamic blocks do not support dynamic outputs, but if it changes# we need to provide dynamic blocks manifests heredummy_workflow_definition={"version":"1.0","inputs":[],"steps":[step_manifest],"outputs":[],}available_blocks=load_workflow_blocks()parsed_definition=parse_workflow_definition(raw_workflow_definition=dummy_workflow_definition,available_blocks=available_blocks,)parsed_manifest=parsed_definition.steps[0]returnparsed_manifest.get_actual_outputs()@app.post("/workflows/validate",response_model=WorkflowValidationStatus,summary="[EXPERIMENTAL] Endpoint to validate",description="Endpoint provides a way to check validity of JSON workflow definition.",)@with_route_exceptionsasyncdefvalidate_workflow(specification:dict,)->WorkflowValidationStatus:# TODO: get rid of async: https://github.com/roboflow/inference/issues/569step_execution_mode=StepExecutionMode(WORKFLOWS_STEP_EXECUTION_MODE)workflow_init_parameters={"workflows_core.model_manager":model_manager,"workflows_core.api_key":None,"workflows_core.background_tasks":None,"workflows_core.step_execution_mode":step_execution_mode,}_=ExecutionEngine.init(workflow_definition=specification,init_parameters=workflow_init_parameters,max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS,prevent_local_images_loading=True,)returnWorkflowValidationStatus(status="ok")ifENABLE_STREAM_API:@app.get("/inference_pipelines/list",response_model=ListPipelinesResponse,summary="[EXPERIMENTAL] List active InferencePipelines",description="[EXPERIMENTAL] Listing all active InferencePipelines processing videos",)@with_route_exceptionsasyncdeflist_pipelines(_:Request)->ListPipelinesResponse:returnawaitself.stream_manager_client.list_pipelines()@app.get("/inference_pipelines/{pipeline_id}/status",response_model=InferencePipelineStatusResponse,summary="[EXPERIMENTAL] Get status of InferencePipeline",description="[EXPERIMENTAL] Get status of InferencePipeline",)@with_route_exceptionsasyncdefget_status(pipeline_id:str)->InferencePipelineStatusResponse:returnawaitself.stream_manager_client.get_status(pipeline_id=pipeline_id)@app.post("/inference_pipelines/initialise",response_model=CommandResponse,summary="[EXPERIMENTAL] Starts new InferencePipeline",description="[EXPERIMENTAL] Starts new InferencePipeline",)@with_route_exceptionsasyncdefinitialise(request:InitialisePipelinePayload)->CommandResponse:returnawaitself.stream_manager_client.initialise_pipeline(initialisation_request=request)@app.post("/inference_pipelines/initialise_webrtc",response_model=InitializeWebRTCPipelineResponse,summary="[EXPERIMENTAL] Establishes WebRTC peer connection and starts new InferencePipeline consuming video track",description="[EXPERIMENTAL] Establishes WebRTC peer connection and starts new InferencePipeline consuming video track",)@with_route_exceptionsasyncdefinitialise_webrtc_inference_pipeline(request:InitialiseWebRTCPipelinePayload,)->CommandResponse:resp=awaitself.stream_manager_client.initialise_webrtc_pipeline(initialisation_request=request)returnresp@app.post("/inference_pipelines/{pipeline_id}/pause",response_model=CommandResponse,summary="[EXPERIMENTAL] Pauses the InferencePipeline",description="[EXPERIMENTAL] Pauses the InferencePipeline",)@with_route_exceptionsasyncdefpause(pipeline_id:str)->CommandResponse:returnawaitself.stream_manager_client.pause_pipeline(pipeline_id=pipeline_id)@app.post("/inference_pipelines/{pipeline_id}/resume",response_model=CommandResponse,summary="[EXPERIMENTAL] Resumes the InferencePipeline",description="[EXPERIMENTAL] Resumes the InferencePipeline",)@with_route_exceptionsasyncdefresume(pipeline_id:str)->CommandResponse:returnawaitself.stream_manager_client.resume_pipeline(pipeline_id=pipeline_id)@app.post("/inference_pipelines/{pipeline_id}/terminate",response_model=CommandResponse,summary="[EXPERIMENTAL] Terminates the InferencePipeline",description="[EXPERIMENTAL] Terminates the InferencePipeline",)@with_route_exceptionsasyncdefterminate(pipeline_id:str)->CommandResponse:returnawaitself.stream_manager_client.terminate_pipeline(pipeline_id=pipeline_id)@app.get("/inference_pipelines/{pipeline_id}/consume",response_model=ConsumePipelineResponse,summary="[EXPERIMENTAL] Consumes InferencePipeline result",description="[EXPERIMENTAL] Consumes InferencePipeline result",)@with_route_exceptionsasyncdefconsume(pipeline_id:str,request:Optional[ConsumeResultsPayload]=None,)->ConsumePipelineResponse:ifrequestisNone:request=ConsumeResultsPayload()returnawaitself.stream_manager_client.consume_pipeline_result(pipeline_id=pipeline_id,excluded_fields=request.excluded_fields,)# Enable preloading models at startupif((PRELOAD_MODELSorDEDICATED_DEPLOYMENT_WORKSPACE_URL)andAPI_KEYandnot(LAMBDAorGCP_SERVERLESS)):classModelInitState:"""Class to track model initialization state."""def__init__(self):self.is_ready=Falseself.lock=asyncio.Lock()# For thread-safe updatesself.initialization_errors=[]# Track errors per modelmodel_init_state=ModelInitState()asyncdefinitialize_models(state:ModelInitState):"""Perform asynchronous initialization tasks to load models."""# Limit the number of concurrent tasks to prevent resource exhaustionsemaphore=asyncio.Semaphore(2)# Adjust the limit as neededasyncdefload_model(model_id):try:asyncwithsemaphore:# Add a timeout to prevent indefinite hangingawaitasyncio.wait_for(model_add(AddModelRequest(model_id=model_id,model_type=None,api_key=API_KEY,)),timeout=300,# Timeout after 5 minutes)logger.info(f"Model {model_id} loaded successfully.")exceptasyncio.TimeoutError:error_msg=f"Timeout while loading model {model_id}"logger.error(error_msg)asyncwithstate.lock:state.initialization_errors.append((model_id,error_msg))exceptExceptionase:error_msg=f"Error loading model {model_id}: {e}"logger.error(error_msg)asyncwithstate.lock:state.initialization_errors.append((model_id,str(e)))ifPRELOAD_MODELS:# Create tasks for each model to be loadedtasks=[load_model(model_id)formodel_idinPRELOAD_MODELS]# Wait for all tasks to complete, collecting exceptionsawaitasyncio.gather(*tasks,return_exceptions=True)# Update the readiness state in a thread-safe mannerasyncwithstate.lock:state.is_ready=True@app.on_event("startup")asyncdefstartup_model_init():"""Initialize the models on startup."""asyncio.create_task(initialize_models(model_init_state))logger.info("Model initialization started in the background.")@app.get("/readiness",status_code=200)asyncdefreadiness(state:ModelInitState=Depends(lambda:model_init_state),):"""Readiness endpoint for Kubernetes readiness probe."""asyncwithstate.lock:ifstate.is_ready:return{"status":"ready"}else:returnJSONResponse(content={"status":"not ready"},status_code=503)@app.get("/healthz",status_code=200)asyncdefhealthz():"""Health endpoint for Kubernetes liveness probe."""return{"status":"healthy"}ifCORE_MODELS_ENABLED:ifCORE_MODEL_CLIP_ENABLED:@app.post("/clip/embed_image",response_model=ClipEmbeddingResponse,summary="CLIP Image Embeddings",description="Run the Open AI CLIP model to embed image data.",)@with_route_exceptions@usage_collector("request")asyncdefclip_embed_image(inference_request:ClipImageEmbeddingRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Embeds image data using the OpenAI CLIP model. Args: inference_request (ClipImageEmbeddingRequest): The request containing the image to be embedded. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: ClipEmbeddingResponse: The response containing the embedded image. """logger.debug(f"Reached /clip/embed_image")clip_model_id=load_clip_model(inference_request,api_key=api_key)response=awaitself.model_manager.infer_from_request(clip_model_id,inference_request)ifLAMBDA:actor=request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["actor"]trackUsage(clip_model_id,actor)returnresponse@app.post("/clip/embed_text",response_model=ClipEmbeddingResponse,summary="CLIP Text Embeddings",description="Run the Open AI CLIP model to embed text data.",)@with_route_exceptions@usage_collector("request")asyncdefclip_embed_text(inference_request:ClipTextEmbeddingRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Embeds text data using the OpenAI CLIP model. Args: inference_request (ClipTextEmbeddingRequest): The request containing the text to be embedded. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: ClipEmbeddingResponse: The response containing the embedded text. """logger.debug(f"Reached /clip/embed_text")clip_model_id=load_clip_model(inference_request,api_key=api_key)response=awaitself.model_manager.infer_from_request(clip_model_id,inference_request)ifLAMBDA:actor=request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["actor"]trackUsage(clip_model_id,actor)returnresponse@app.post("/clip/compare",response_model=ClipCompareResponse,summary="CLIP Compare",description="Run the Open AI CLIP model to compute similarity scores.",)@with_route_exceptions@usage_collector("request")asyncdefclip_compare(inference_request:ClipCompareRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Computes similarity scores using the OpenAI CLIP model. Args: inference_request (ClipCompareRequest): The request containing the data to be compared. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: ClipCompareResponse: The response containing the similarity scores. """logger.debug(f"Reached /clip/compare")clip_model_id=load_clip_model(inference_request,api_key=api_key)response=awaitself.model_manager.infer_from_request(clip_model_id,inference_request)ifLAMBDA:actor=request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["actor"]trackUsage(clip_model_id,actor,n=2)returnresponseifCORE_MODEL_GROUNDINGDINO_ENABLED:@app.post("/grounding_dino/infer",response_model=ObjectDetectionInferenceResponse,summary="Grounding DINO inference.",description="Run the Grounding DINO zero-shot object detection model.",)@with_route_exceptions@usage_collector("request")asyncdefgrounding_dino_infer(inference_request:GroundingDINOInferenceRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Embeds image data using the Grounding DINO model. Args: inference_request GroundingDINOInferenceRequest): The request containing the image on which to run object detection. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: ObjectDetectionInferenceResponse: The object detection response. """logger.debug(f"Reached /grounding_dino/infer")grounding_dino_model_id=load_grounding_dino_model(inference_request,api_key=api_key)response=awaitself.model_manager.infer_from_request(grounding_dino_model_id,inference_request)ifLAMBDA:actor=request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["actor"]trackUsage(grounding_dino_model_id,actor)returnresponseifCORE_MODEL_YOLO_WORLD_ENABLED:@app.post("/yolo_world/infer",response_model=ObjectDetectionInferenceResponse,summary="YOLO-World inference.",description="Run the YOLO-World zero-shot object detection model.",response_model_exclude_none=True,)@with_route_exceptions@usage_collector("request")asyncdefyolo_world_infer(inference_request:YOLOWorldInferenceRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Runs the YOLO-World zero-shot object detection model. Args: inference_request (YOLOWorldInferenceRequest): The request containing the image on which to run object detection. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: ObjectDetectionInferenceResponse: The object detection response. """logger.debug(f"Reached /yolo_world/infer. Loading model")yolo_world_model_id=load_yolo_world_model(inference_request,api_key=api_key)logger.debug("YOLOWorld model loaded. Staring the inference.")response=awaitself.model_manager.infer_from_request(yolo_world_model_id,inference_request)logger.debug("YOLOWorld prediction available.")ifLAMBDA:actor=request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["actor"]trackUsage(yolo_world_model_id,actor)logger.debug("Usage of YOLOWorld denoted.")returnresponseifCORE_MODEL_DOCTR_ENABLED:@app.post("/doctr/ocr",response_model=OCRInferenceResponse,summary="DocTR OCR response",description="Run the DocTR OCR model to retrieve text in an image.",)@with_route_exceptions@usage_collector("request")asyncdefdoctr_retrieve_text(inference_request:DoctrOCRInferenceRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Embeds image data using the DocTR model. Args: inference_request (M.DoctrOCRInferenceRequest): The request containing the image from which to retrieve text. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: M.OCRInferenceResponse: The response containing the embedded image. """logger.debug(f"Reached /doctr/ocr")doctr_model_id=load_doctr_model(inference_request,api_key=api_key)response=awaitself.model_manager.infer_from_request(doctr_model_id,inference_request)ifLAMBDA:actor=request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["actor"]trackUsage(doctr_model_id,actor)returnresponseifCORE_MODEL_SAM_ENABLED:@app.post("/sam/embed_image",response_model=SamEmbeddingResponse,summary="SAM Image Embeddings",description="Run the Meta AI Segmant Anything Model to embed image data.",)@with_route_exceptions@usage_collector("request")asyncdefsam_embed_image(inference_request:SamEmbeddingRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Embeds image data using the Meta AI Segmant Anything Model (SAM). Args: inference_request (SamEmbeddingRequest): The request containing the image to be embedded. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: M.SamEmbeddingResponse or Response: The response containing the embedded image. """logger.debug(f"Reached /sam/embed_image")sam_model_id=load_sam_model(inference_request,api_key=api_key)model_response=awaitself.model_manager.infer_from_request(sam_model_id,inference_request)ifLAMBDA:actor=request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["actor"]trackUsage(sam_model_id,actor)ifinference_request.format=="binary":returnResponse(content=model_response.embeddings,headers={"Content-Type":"application/octet-stream"},)returnmodel_response@app.post("/sam/segment_image",response_model=SamSegmentationResponse,summary="SAM Image Segmentation",description="Run the Meta AI Segmant Anything Model to generate segmenations for image data.",)@with_route_exceptions@usage_collector("request")asyncdefsam_segment_image(inference_request:SamSegmentationRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Generates segmentations for image data using the Meta AI Segmant Anything Model (SAM). Args: inference_request (SamSegmentationRequest): The request containing the image to be segmented. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: M.SamSegmentationResponse or Response: The response containing the segmented image. """logger.debug(f"Reached /sam/segment_image")sam_model_id=load_sam_model(inference_request,api_key=api_key)model_response=awaitself.model_manager.infer_from_request(sam_model_id,inference_request)ifLAMBDA:actor=request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["actor"]trackUsage(sam_model_id,actor)ifinference_request.format=="binary":returnResponse(content=model_response,headers={"Content-Type":"application/octet-stream"},)returnmodel_responseifCORE_MODEL_SAM2_ENABLED:@app.post("/sam2/embed_image",response_model=Sam2EmbeddingResponse,summary="SAM2 Image Embeddings",description="Run the Meta AI Segment Anything 2 Model to embed image data.",)@with_route_exceptions@usage_collector("request")asyncdefsam2_embed_image(inference_request:Sam2EmbeddingRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Embeds image data using the Meta AI Segment Anything Model (SAM). Args: inference_request (SamEmbeddingRequest): The request containing the image to be embedded. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: M.Sam2EmbeddingResponse or Response: The response affirming the image has been embedded """logger.debug(f"Reached /sam2/embed_image")sam2_model_id=load_sam2_model(inference_request,api_key=api_key)model_response=awaitself.model_manager.infer_from_request(sam2_model_id,inference_request)returnmodel_response@app.post("/sam2/segment_image",response_model=Sam2SegmentationResponse,summary="SAM2 Image Segmentation",description="Run the Meta AI Segment Anything 2 Model to generate segmenations for image data.",)@with_route_exceptions@usage_collector("request")asyncdefsam2_segment_image(inference_request:Sam2SegmentationRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Generates segmentations for image data using the Meta AI Segment Anything Model (SAM). Args: inference_request (Sam2SegmentationRequest): The request containing the image to be segmented. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: M.SamSegmentationResponse or Response: The response containing the segmented image. """logger.debug(f"Reached /sam2/segment_image")sam2_model_id=load_sam2_model(inference_request,api_key=api_key)model_response=awaitself.model_manager.infer_from_request(sam2_model_id,inference_request)ifinference_request.format=="binary":returnResponse(content=model_response,headers={"Content-Type":"application/octet-stream"},)returnmodel_responseifCORE_MODEL_OWLV2_ENABLED:@app.post("/owlv2/infer",response_model=ObjectDetectionInferenceResponse,summary="Owlv2 image prompting",description="Run the google owlv2 model to few-shot object detect",)@with_route_exceptions@usage_collector("request")asyncdefowlv2_infer(inference_request:OwlV2InferenceRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Embeds image data using the Meta AI Segmant Anything Model (SAM). Args: inference_request (SamEmbeddingRequest): The request containing the image to be embedded. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: M.Sam2EmbeddingResponse or Response: The response affirming the image has been embedded """logger.debug(f"Reached /owlv2/infer")owl2_model_id=load_owlv2_model(inference_request,api_key=api_key)model_response=awaitself.model_manager.infer_from_request(owl2_model_id,inference_request)returnmodel_responseifCORE_MODEL_GAZE_ENABLED:@app.post("/gaze/gaze_detection",response_model=List[GazeDetectionInferenceResponse],summary="Gaze Detection",description="Run the gaze detection model to detect gaze.",)@with_route_exceptions@usage_collector("request")asyncdefgaze_detection(inference_request:GazeDetectionInferenceRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Detect gaze using the gaze detection model. Args: inference_request (M.GazeDetectionRequest): The request containing the image to be detected. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: M.GazeDetectionResponse: The response containing all the detected faces and the corresponding gazes. """logger.debug(f"Reached /gaze/gaze_detection")gaze_model_id=load_gaze_model(inference_request,api_key=api_key)response=awaitself.model_manager.infer_from_request(gaze_model_id,inference_request)ifLAMBDA:actor=request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["actor"]trackUsage(gaze_model_id,actor)returnresponseifCORE_MODEL_TROCR_ENABLED:@app.post("/ocr/trocr",response_model=OCRInferenceResponse,summary="TrOCR OCR response",description="Run the TrOCR model to retrieve text in an image.",)@with_route_exceptions@usage_collector("request")asyncdeftrocr_retrieve_text(inference_request:TrOCRInferenceRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Retrieves text from image data using the TrOCR model. Args: inference_request (TrOCRInferenceRequest): The request containing the image from which to retrieve text. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: OCRInferenceResponse: The response containing the retrieved text. """logger.debug(f"Reached /trocr/ocr")trocr_model_id=load_trocr_model(inference_request,api_key=api_key)response=awaitself.model_manager.infer_from_request(trocr_model_id,inference_request)ifLAMBDA:actor=request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["actor"]trackUsage(trocr_model_id,actor)returnresponseifnot(LAMBDAorGCP_SERVERLESS):@app.get("/notebook/start",summary="Jupyter Lab Server Start",description="Starts a jupyter lab server for running development code",)@with_route_exceptionsasyncdefnotebook_start(browserless:bool=False):"""Starts a jupyter lab server for running development code. Args: inference_request (NotebookStartRequest): The request containing the necessary details for starting a jupyter lab server. background_tasks: (BackgroundTasks) pool of fastapi background tasks Returns: NotebookStartResponse: The response containing the URL of the jupyter lab server. """logger.debug(f"Reached /notebook/start")ifNOTEBOOK_ENABLED:start_notebook()ifbrowserless:return{"success":True,"message":f"Jupyter Lab server started at http://localhost:{NOTEBOOK_PORT}?token={NOTEBOOK_PASSWORD}",}else:sleep(2)returnRedirectResponse(f"http://localhost:{NOTEBOOK_PORT}/lab/tree/quickstart.ipynb?token={NOTEBOOK_PASSWORD}")else:ifbrowserless:return{"success":False,"message":"Notebook server is not enabled. Enable notebooks via the NOTEBOOK_ENABLED environment variable.",}else:returnRedirectResponse(f"/notebook-instructions.html")ifENABLE_BUILDER:frominference.core.interfaces.http.builder.routesimport(routerasbuilder_router,)# Allow CORS on only the API, but not the builder UI/iframe (where the CSRF is passed)app.add_middleware(PathAwareCORSMiddleware,match_paths=r"^/build/api.*",allow_origins=[BUILDER_ORIGIN],allow_methods=["*"],allow_headers=["*"],allow_credentials=True,)# Attach all routes from builder to the /build prefixapp.include_router(builder_router,prefix="/build",tags=["builder"])ifLEGACY_ROUTE_ENABLED:# Legacy object detection inference path for backwards compatability@app.get("/{dataset_id}/{version_id:str}",# Order matters in this response model Union. It will use the first matching model. For example, Object Detection Inference Response is a subset of Instance segmentation inference response, so instance segmentation must come first in order for the matching logic to work.response_model=Union[InstanceSegmentationInferenceResponse,KeypointsDetectionInferenceResponse,ObjectDetectionInferenceResponse,ClassificationInferenceResponse,MultiLabelClassificationInferenceResponse,StubResponse,Any,],response_model_exclude_none=True,)@app.post("/{dataset_id}/{version_id:str}",# Order matters in this response model Union. It will use the first matching model. For example, Object Detection Inference Response is a subset of Instance segmentation inference response, so instance segmentation must come first in order for the matching logic to work.response_model=Union[InstanceSegmentationInferenceResponse,KeypointsDetectionInferenceResponse,ObjectDetectionInferenceResponse,ClassificationInferenceResponse,MultiLabelClassificationInferenceResponse,StubResponse,Any,],response_model_exclude_none=True,)@with_route_exceptions@usage_collector("request")asyncdeflegacy_infer_from_request(background_tasks:BackgroundTasks,request:Request,dataset_id:str=Path(description="ID of a Roboflow dataset corresponding to the model to use for inference OR workspace ID"),version_id:str=Path(description="ID of a Roboflow dataset version corresponding to the model to use for inference OR model ID"),api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),confidence:float=Query(0.4,description="The confidence threshold used to filter out predictions",),keypoint_confidence:float=Query(0.0,description="The confidence threshold used to filter out keypoints that are not visible based on model confidence",),format:str=Query("json",description="One of 'json' or 'image'. If 'json' prediction data is return as a JSON string. If 'image' prediction data is visualized and overlayed on the original input image.",),image:Optional[str]=Query(None,description="The publically accessible URL of an image to use for inference.",),image_type:Optional[str]=Query("base64",description="One of base64 or numpy. Note, numpy input is not supported for Roboflow Hosted Inference.",),labels:Optional[bool]=Query(False,description="If true, labels will be include in any inference visualization.",),mask_decode_mode:Optional[str]=Query("accurate",description="One of 'accurate' or 'fast'. If 'accurate' the mask will be decoded using the original image size. If 'fast' the mask will be decoded using the original mask size. 'accurate' is slower but more accurate.",),tradeoff_factor:Optional[float]=Query(0.0,description="The amount to tradeoff between 0='fast' and 1='accurate'",),max_detections:int=Query(300,description="The maximum number of detections to return. This is used to limit the number of predictions returned by the model. The model may return more predictions than this number, but only the top `max_detections` predictions will be returned.",),overlap:float=Query(0.3,description="The IoU threhsold that must be met for a box pair to be considered duplicate during NMS",),stroke:int=Query(1,description="The stroke width used when visualizing predictions"),countinference:Optional[bool]=Query(True,description="If false, does not track inference against usage.",include_in_schema=False,),service_secret:Optional[str]=Query(None,description="Shared secret used to authenticate requests to the inference server from internal services (e.g. to allow disabling inference usage tracking via the `countinference` query parameter)",include_in_schema=False,),disable_preproc_auto_orient:Optional[bool]=Query(False,description="If true, disables automatic image orientation"),disable_preproc_contrast:Optional[bool]=Query(False,description="If true, disables automatic contrast adjustment"),disable_preproc_grayscale:Optional[bool]=Query(False,description="If true, disables automatic grayscale conversion",),disable_preproc_static_crop:Optional[bool]=Query(False,description="If true, disables automatic static crop"),disable_active_learning:Optional[bool]=Query(default=False,description="If true, the predictions will be prevented from registration by Active Learning (if the functionality is enabled)",),active_learning_target_dataset:Optional[str]=Query(default=None,description="Parameter to be used when Active Learning data registration should happen against different dataset than the one pointed by model_id",),source:Optional[str]=Query("external",description="The source of the inference request",),source_info:Optional[str]=Query("external",description="The detailed source information of the inference request",),):""" Legacy inference endpoint for object detection, instance segmentation, and classification. Args: background_tasks: (BackgroundTasks) pool of fastapi background tasks dataset_id (str): ID of a Roboflow dataset corresponding to the model to use for inference OR workspace ID version_id (str): ID of a Roboflow dataset version corresponding to the model to use for inference OR model ID api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. # Other parameters described in the function signature... Returns: Union[InstanceSegmentationInferenceResponse, KeypointsDetectionInferenceRequest, ObjectDetectionInferenceResponse, ClassificationInferenceResponse, MultiLabelClassificationInferenceResponse, Any]: The response containing the inference results. """logger.debug(f"Reached legacy route /:dataset_id/:version_id with {dataset_id}/{version_id}")model_id=f"{dataset_id}/{version_id}"ifconfidence>=1:confidence/=100elifconfidence<0.01:confidence=0.01ifoverlap>=1:overlap/=100ifimageisnotNone:request_image=InferenceRequestImage(type="url",value=image)else:if"Content-Type"notinrequest.headers:raiseContentTypeMissing(f"Request must include a Content-Type header")if"multipart/form-data"inrequest.headers["Content-Type"]:form_data=awaitrequest.form()base64_image_str=awaitform_data["file"].read()base64_image_str=base64.b64encode(base64_image_str)request_image=InferenceRequestImage(type="base64",value=base64_image_str.decode("ascii"))elif("application/x-www-form-urlencoded"inrequest.headers["Content-Type"]or"application/json"inrequest.headers["Content-Type"]):data=awaitrequest.body()request_image=InferenceRequestImage(type=image_type,value=data)else:raiseContentTypeInvalid(f"Invalid Content-Type: {request.headers['Content-Type']}")ifnotcountinferenceandservice_secret!=ROBOFLOW_SERVICE_SECRET:raiseMissingServiceSecretError("Service secret is required to disable inference usage tracking")ifLAMBDA:logger.debug("request.scope: %s",request.scope)request_model_id=(request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["model"]["endpoint"].replace("--","/").replace("rf-","").replace("nu-",""))actor=request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["actor"]ifcountinference:trackUsage(request_model_id,actor)else:ifservice_secret!=ROBOFLOW_SERVICE_SECRET:raiseMissingServiceSecretError("Service secret is required to disable inference usage tracking")logger.info("Not counting inference for usage")else:request_model_id=model_idlogger.debug(f"State of model registry: {self.model_manager.describe_models()}")self.model_manager.add_model(request_model_id,api_key,model_id_alias=model_id)task_type=self.model_manager.get_task_type(model_id,api_key=api_key)inference_request_type=ObjectDetectionInferenceRequestargs=dict()iftask_type=="instance-segmentation":inference_request_type=InstanceSegmentationInferenceRequestargs={"mask_decode_mode":mask_decode_mode,"tradeoff_factor":tradeoff_factor,}eliftask_type=="classification":inference_request_type=ClassificationInferenceRequesteliftask_type=="keypoint-detection":inference_request_type=KeypointsDetectionInferenceRequestargs={"keypoint_confidence":keypoint_confidence}inference_request=inference_request_type(api_key=api_key,model_id=model_id,image=request_image,confidence=confidence,iou_threshold=overlap,max_detections=max_detections,visualization_labels=labels,visualization_stroke_width=stroke,visualize_predictions=(format=="image"orformat=="image_and_json"),disable_preproc_auto_orient=disable_preproc_auto_orient,disable_preproc_contrast=disable_preproc_contrast,disable_preproc_grayscale=disable_preproc_grayscale,disable_preproc_static_crop=disable_preproc_static_crop,disable_active_learning=disable_active_learning,active_learning_target_dataset=active_learning_target_dataset,source=source,source_info=source_info,usage_billable=countinference,**args,)inference_response=awaitself.model_manager.infer_from_request(inference_request.model_id,inference_request,active_learning_eligible=True,background_tasks=background_tasks,)logger.debug("Response ready.")ifformat=="image":returnResponse(content=inference_response.visualization,media_type="image/jpeg",)else:returnorjson_response(inference_response)ifnot(LAMBDAorGCP_SERVERLESS):# Legacy clear cache endpoint for backwards compatability@app.get("/clear_cache",response_model=str)asyncdeflegacy_clear_cache():""" Clears the model cache. This endpoint provides a way to clear the cache of loaded models. Returns: str: A string indicating that the cache has been cleared. """logger.debug(f"Reached /clear_cache")awaitmodel_clear()return"Cache Cleared"# Legacy add model endpoint for backwards compatability@app.get("/start/{dataset_id}/{version_id}")asyncdefmodel_add_legacy(dataset_id:str,version_id:str,api_key:str=None):""" Starts a model inference session. This endpoint initializes and starts an inference session for the specified model version. Args: dataset_id (str): ID of a Roboflow dataset corresponding to the model. version_id (str): ID of a Roboflow dataset version corresponding to the model. api_key (str, optional): Roboflow API Key for artifact retrieval. Returns: JSONResponse: A response object containing the status and a success message. """logger.debug(f"Reached /start/{dataset_id}/{version_id} with {dataset_id}/{version_id}")model_id=f"{dataset_id}/{version_id}"self.model_manager.add_model(model_id,api_key)returnJSONResponse({"status":200,"message":"inference session started from local memory.",})app.mount("/",StaticFiles(directory="./inference/landing/out",html=True),name="root",)defrun(self):uvicorn.run(self.app,host="127.0.0.1",port=8080)
def__init__(self,model_manager:ModelManager,root_path:Optional[str]=None,):""" Initializes the HttpInterface with given model manager and model registry. Args: model_manager (ModelManager): The manager for handling different models. root_path (Optional[str]): The root path for the FastAPI application. Description: Deploy Roboflow trained models to nearly any compute environment! """description="Roboflow inference server"app=FastAPI(title="Roboflow Inference Server",description=description,version=__version__,terms_of_service="https://roboflow.com/terms",contact={"name":"Roboflow Inc.","url":"https://roboflow.com/contact","email":"help@roboflow.com",},license_info={"name":"Apache 2.0","url":"https://www.apache.org/licenses/LICENSE-2.0.html",},root_path=root_path,)app.mount("/static",StaticFiles(directory="./inference/landing/out/static",html=True),name="static",)app.mount("/_next/static",StaticFiles(directory="./inference/landing/out/_next/static",html=True),name="_next_static",)@app.on_event("shutdown")asyncdefon_shutdown():logger.info("Shutting down %s",description)awaitusage_collector.async_push_usage_payloads()ifENABLE_PROMETHEUS:InferenceInstrumentator(app,model_manager=model_manager,endpoint="/metrics")ifMETLO_KEY:app.add_middleware(ASGIMiddleware,host="https://app.metlo.com",api_key=METLO_KEY)ifLAMBDA:app.add_middleware(LambdaMiddleware)iflen(ALLOW_ORIGINS)>0:# Add CORS Middleware (but not for /build**, which is controlled separately)app.add_middleware(PathAwareCORSMiddleware,match_paths=r"^(?!/build).*",allow_origins=ALLOW_ORIGINS,allow_credentials=True,allow_methods=["*"],allow_headers=["*"],)# Optionally add middleware for profiling the FastAPI server and underlying inference API codeifPROFILE:app.add_middleware(CProfileMiddleware,enable=True,server_app=app,filename="/profile/output.pstats",strip_dirs=False,sort_by="cumulative",)app.add_middleware(asgi_correlation_id.CorrelationIdMiddleware)ifMETRICS_ENABLED:@app.middleware("http")asyncdefcount_errors(request:Request,call_next):"""Middleware to count errors. Args: request (Request): The incoming request. call_next (Callable): The next middleware or endpoint to call. Returns: Response: The response from the next middleware or endpoint. """response=awaitcall_next(request)ifself.model_manager.pingbackandresponse.status_code>=400:self.model_manager.num_errors+=1returnresponseifnot(LAMBDAorGCP_SERVERLESS):@app.get("/device/stats")asyncdefdevice_stats():not_configured_error_message={"error":"Device statistics endpoint is not enabled.","hint":"Mount the Docker socket and point its location when running the docker ""container to collect device stats ""(i.e. `docker run ... -v /var/run/docker.sock:/var/run/docker.sock ""-e DOCKER_SOCKET_PATH=/var/run/docker.sock ...`).",}ifnotDOCKER_SOCKET_PATH:returnJSONResponse(status_code=404,content=not_configured_error_message,)ifnotis_docker_socket_mounted(docker_socket_path=DOCKER_SOCKET_PATH):returnJSONResponse(status_code=500,content=not_configured_error_message,)container_stats=get_container_stats(docker_socket_path=DOCKER_SOCKET_PATH)returnJSONResponse(status_code=200,content=container_stats)ifDEDICATED_DEPLOYMENT_WORKSPACE_URL:cached_api_keys=dict()@app.middleware("http")asyncdefcheck_authorization(request:Request,call_next):# exclusionsskip_check=(request.methodnotin["GET","POST"]orrequest.url.pathin["/","/docs","/redoc","/info","/openapi.json",# needed for /docs and /redoc"/workflows/blocks/describe","/workflows/definition/schema",]orrequest.url.path.startswith("/static/")orrequest.url.path.startswith("/_next/"))ifskip_check:returnawaitcall_next(request)def_unauthorized_response(msg):returnJSONResponse(status_code=401,content={"status":401,"message":msg,},)# check api_keyreq_params=request.query_paramsjson_params=dict()if(request.headers.get("content-type",None)=="application/json"andint(request.headers.get("content-length",0))>0):json_params=awaitrequest.json()api_key=req_params.get("api_key",None)orjson_params.get("api_key",None)ifcached_api_keys.get(api_key,0)<time.time():try:workspace_url=(get_roboflow_workspace(api_key)ifapi_keyisnotNoneelseNone)ifworkspace_url!=DEDICATED_DEPLOYMENT_WORKSPACE_URL:return_unauthorized_response("Unauthorized api_key")cached_api_keys[api_key]=(time.time()+3600)# expired after 1 hourexceptRoboflowAPINotAuthorizedErrorase:return_unauthorized_response("Unauthorized api_key")returnawaitcall_next(request)self.app=appself.model_manager=model_managerself.stream_manager_client:Optional[StreamManagerClient]=NoneifENABLE_STREAM_API:operations_timeout=os.getenv("STREAM_MANAGER_OPERATIONS_TIMEOUT")ifoperations_timeoutisnotNone:operations_timeout=float(operations_timeout)self.stream_manager_client=StreamManagerClient.init(host=os.getenv("STREAM_MANAGER_HOST","127.0.0.1"),port=int(os.getenv("STREAM_MANAGER_PORT","7070")),operations_timeout=operations_timeout,)asyncdefprocess_inference_request(inference_request:InferenceRequest,**kwargs)->InferenceResponse:"""Processes an inference request by calling the appropriate model. Args: inference_request (InferenceRequest): The request containing model ID and other inference details. Returns: InferenceResponse: The response containing the inference results. """de_aliased_model_id=resolve_roboflow_model_alias(model_id=inference_request.model_id)self.model_manager.add_model(de_aliased_model_id,inference_request.api_key)resp=awaitself.model_manager.infer_from_request(de_aliased_model_id,inference_request,**kwargs)returnorjson_response(resp)defprocess_workflow_inference_request(workflow_request:WorkflowInferenceRequest,workflow_specification:dict,background_tasks:Optional[BackgroundTasks],profiler:WorkflowsProfiler,)->WorkflowInferenceResponse:workflow_init_parameters={"workflows_core.model_manager":model_manager,"workflows_core.api_key":workflow_request.api_key,"workflows_core.background_tasks":background_tasks,}execution_engine=ExecutionEngine.init(workflow_definition=workflow_specification,init_parameters=workflow_init_parameters,max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS,prevent_local_images_loading=True,profiler=profiler,workflow_id=workflow_request.workflow_id,)is_preview=Falseifhasattr(workflow_request,"is_preview"):is_preview=workflow_request.is_previewworkflow_results=execution_engine.run(runtime_parameters=workflow_request.inputs,serialize_results=True,_is_preview=is_preview,)withprofiler.profile_execution_phase(name="workflow_results_filtering",categories=["inference_package_operation"],):outputs=filter_out_unwanted_workflow_outputs(workflow_results=workflow_results,excluded_fields=workflow_request.excluded_fields,)profiler_trace=profiler.export_trace()response=WorkflowInferenceResponse(outputs=outputs,profiler_trace=profiler_trace,)returnorjson_response(response=response)defload_core_model(inference_request:InferenceRequest,api_key:Optional[str]=None,core_model:str=None,)->None:"""Loads a core model (e.g., "clip" or "sam") into the model manager. Args: inference_request (InferenceRequest): The request containing version and other details. api_key (Optional[str]): The API key for the request. core_model (str): The core model type, e.g., "clip" or "sam". Returns: str: The core model ID. """ifapi_key:inference_request.api_key=api_keyversion_id_field=f"{core_model}_version_id"core_model_id=(f"{core_model}/{inference_request.__getattribute__(version_id_field)}")self.model_manager.add_model(core_model_id,inference_request.api_key)returncore_model_idload_clip_model=partial(load_core_model,core_model="clip")"""Loads the CLIP model into the model manager. Args: inference_request: The request containing version and other details. api_key: The API key for the request. Returns: The CLIP model ID. """load_sam_model=partial(load_core_model,core_model="sam")"""Loads the SAM model into the model manager. Args: inference_request: The request containing version and other details. api_key: The API key for the request. Returns: The SAM model ID. """load_sam2_model=partial(load_core_model,core_model="sam2")"""Loads the SAM2 model into the model manager. Args: inference_request: The request containing version and other details. api_key: The API key for the request. Returns: The SAM2 model ID. """load_gaze_model=partial(load_core_model,core_model="gaze")"""Loads the GAZE model into the model manager. Args: inference_request: The request containing version and other details. api_key: The API key for the request. Returns: The GAZE model ID. """load_doctr_model=partial(load_core_model,core_model="doctr")"""Loads the DocTR model into the model manager. Args: inference_request: The request containing version and other details. api_key: The API key for the request. Returns: The DocTR model ID. """load_paligemma_model=partial(load_core_model,core_model="paligemma")load_grounding_dino_model=partial(load_core_model,core_model="grounding_dino")"""Loads the Grounding DINO model into the model manager. Args: inference_request: The request containing version and other details. api_key: The API key for the request. Returns: The Grounding DINO model ID. """load_yolo_world_model=partial(load_core_model,core_model="yolo_world")load_owlv2_model=partial(load_core_model,core_model="owlv2")"""Loads the YOLO World model into the model manager. Args: inference_request: The request containing version and other details. api_key: The API key for the request. Returns: The YOLO World model ID. """load_trocr_model=partial(load_core_model,core_model="trocr")"""Loads the TrOCR model into the model manager. Args: inference_request: The request containing version and other details. api_key: The API key for the request. Returns: The TrOCR model ID. """@app.get("/info",response_model=ServerVersionInfo,summary="Info",description="Get the server name and version number",)asyncdefroot():"""Endpoint to get the server name and version number. Returns: ServerVersionInfo: The server version information. """returnServerVersionInfo(name="Roboflow Inference Server",version=__version__,uuid=GLOBAL_INFERENCE_SERVER_ID,)# The current AWS Lambda authorizer only supports path parameters, therefore we can only use the legacy infer route. This case statement excludes routes which won't work for the current Lambda authorizer.ifnot(LAMBDAorGCP_SERVERLESS):@app.get("/model/registry",response_model=ModelsDescriptions,summary="Get model keys",description="Get the ID of each loaded model",)asyncdefregistry():"""Get the ID of each loaded model in the registry. Returns: ModelsDescriptions: The object containing models descriptions """logger.debug(f"Reached /model/registry")models_descriptions=self.model_manager.describe_models()returnModelsDescriptions.from_models_descriptions(models_descriptions=models_descriptions)@app.post("/model/add",response_model=ModelsDescriptions,summary="Load a model",description="Load the model with the given model ID",)@with_route_exceptionsasyncdefmodel_add(request:AddModelRequest):"""Load the model with the given model ID into the model manager. Args: request (AddModelRequest): The request containing the model ID and optional API key. Returns: ModelsDescriptions: The object containing models descriptions """logger.debug(f"Reached /model/add")de_aliased_model_id=resolve_roboflow_model_alias(model_id=request.model_id)logger.info(f"Loading model: {de_aliased_model_id}")self.model_manager.add_model(de_aliased_model_id,request.api_key)models_descriptions=self.model_manager.describe_models()returnModelsDescriptions.from_models_descriptions(models_descriptions=models_descriptions)@app.post("/model/remove",response_model=ModelsDescriptions,summary="Remove a model",description="Remove the model with the given model ID",)@with_route_exceptionsasyncdefmodel_remove(request:ClearModelRequest):"""Remove the model with the given model ID from the model manager. Args: request (ClearModelRequest): The request containing the model ID to be removed. Returns: ModelsDescriptions: The object containing models descriptions """logger.debug(f"Reached /model/remove")de_aliased_model_id=resolve_roboflow_model_alias(model_id=request.model_id)self.model_manager.remove(de_aliased_model_id)models_descriptions=self.model_manager.describe_models()returnModelsDescriptions.from_models_descriptions(models_descriptions=models_descriptions)@app.post("/model/clear",response_model=ModelsDescriptions,summary="Remove all models",description="Remove all loaded models",)@with_route_exceptionsasyncdefmodel_clear():"""Remove all loaded models from the model manager. Returns: ModelsDescriptions: The object containing models descriptions """logger.debug(f"Reached /model/clear")self.model_manager.clear()models_descriptions=self.model_manager.describe_models()returnModelsDescriptions.from_models_descriptions(models_descriptions=models_descriptions)# these NEW endpoints need authentication protectionifnotLAMBDAandnotGCP_SERVERLESS:@app.post("/infer/object_detection",response_model=Union[ObjectDetectionInferenceResponse,List[ObjectDetectionInferenceResponse],StubResponse,],summary="Object detection infer",description="Run inference with the specified object detection model",response_model_exclude_none=True,)@with_route_exceptions@usage_collector("request")asyncdefinfer_object_detection(inference_request:ObjectDetectionInferenceRequest,background_tasks:BackgroundTasks,):"""Run inference with the specified object detection model. Args: inference_request (ObjectDetectionInferenceRequest): The request containing the necessary details for object detection. background_tasks: (BackgroundTasks) pool of fastapi background tasks Returns: Union[ObjectDetectionInferenceResponse, List[ObjectDetectionInferenceResponse]]: The response containing the inference results. """logger.debug(f"Reached /infer/object_detection")returnawaitprocess_inference_request(inference_request,active_learning_eligible=True,background_tasks=background_tasks,)@app.post("/infer/instance_segmentation",response_model=Union[InstanceSegmentationInferenceResponse,StubResponse],summary="Instance segmentation infer",description="Run inference with the specified instance segmentation model",)@with_route_exceptions@usage_collector("request")asyncdefinfer_instance_segmentation(inference_request:InstanceSegmentationInferenceRequest,background_tasks:BackgroundTasks,):"""Run inference with the specified instance segmentation model. Args: inference_request (InstanceSegmentationInferenceRequest): The request containing the necessary details for instance segmentation. background_tasks: (BackgroundTasks) pool of fastapi background tasks Returns: InstanceSegmentationInferenceResponse: The response containing the inference results. """logger.debug(f"Reached /infer/instance_segmentation")returnawaitprocess_inference_request(inference_request,active_learning_eligible=True,background_tasks=background_tasks,)@app.post("/infer/classification",response_model=Union[ClassificationInferenceResponse,MultiLabelClassificationInferenceResponse,StubResponse,],summary="Classification infer",description="Run inference with the specified classification model",)@with_route_exceptions@usage_collector("request")asyncdefinfer_classification(inference_request:ClassificationInferenceRequest,background_tasks:BackgroundTasks,):"""Run inference with the specified classification model. Args: inference_request (ClassificationInferenceRequest): The request containing the necessary details for classification. background_tasks: (BackgroundTasks) pool of fastapi background tasks Returns: Union[ClassificationInferenceResponse, MultiLabelClassificationInferenceResponse]: The response containing the inference results. """logger.debug(f"Reached /infer/classification")returnawaitprocess_inference_request(inference_request,active_learning_eligible=True,background_tasks=background_tasks,)@app.post("/infer/keypoints_detection",response_model=Union[KeypointsDetectionInferenceResponse,StubResponse],summary="Keypoints detection infer",description="Run inference with the specified keypoints detection model",)@with_route_exceptions@usage_collector("request")asyncdefinfer_keypoints(inference_request:KeypointsDetectionInferenceRequest,):"""Run inference with the specified keypoints detection model. Args: inference_request (KeypointsDetectionInferenceRequest): The request containing the necessary details for keypoints detection. Returns: Union[ClassificationInferenceResponse, MultiLabelClassificationInferenceResponse]: The response containing the inference results. """logger.debug(f"Reached /infer/keypoints_detection")returnawaitprocess_inference_request(inference_request)ifLMM_ENABLED:@app.post("/infer/lmm",response_model=Union[LMMInferenceResponse,List[LMMInferenceResponse],StubResponse,],summary="Large multi-modal model infer",description="Run inference with the specified large multi-modal model",response_model_exclude_none=True,)@with_route_exceptions@usage_collector("request")asyncdefinfer_lmm(inference_request:LMMInferenceRequest,):"""Run inference with the specified object detection model. Args: inference_request (ObjectDetectionInferenceRequest): The request containing the necessary details for object detection. background_tasks: (BackgroundTasks) pool of fastapi background tasks Returns: Union[ObjectDetectionInferenceResponse, List[ObjectDetectionInferenceResponse]]: The response containing the inference results. """logger.debug(f"Reached /infer/lmm")returnawaitprocess_inference_request(inference_request)ifnotDISABLE_WORKFLOW_ENDPOINTS:@app.post("/{workspace_name}/workflows/{workflow_id}/describe_interface",response_model=DescribeInterfaceResponse,summary="Endpoint to describe interface of predefined workflow",description="Checks Roboflow API for workflow definition, once acquired - describes workflow inputs and outputs",)@with_route_exceptionsasyncdefdescribe_predefined_workflow_interface(workspace_name:str,workflow_id:str,workflow_request:PredefinedWorkflowDescribeInterfaceRequest,)->DescribeInterfaceResponse:workflow_specification=get_workflow_specification(api_key=workflow_request.api_key,workspace_id=workspace_name,workflow_id=workflow_id,use_cache=workflow_request.use_cache,)returnhandle_describe_workflows_interface(definition=workflow_specification,)@app.post("/workflows/describe_interface",response_model=DescribeInterfaceResponse,summary="Endpoint to describe interface of workflow given in request",description="Parses workflow definition and retrieves describes inputs and outputs",)@with_route_exceptionsasyncdefdescribe_workflow_interface(workflow_request:WorkflowSpecificationDescribeInterfaceRequest,)->DescribeInterfaceResponse:returnhandle_describe_workflows_interface(definition=workflow_request.specification,)@app.post("/{workspace_name}/workflows/{workflow_id}",response_model=WorkflowInferenceResponse,summary="Endpoint to run predefined workflow",description="Checks Roboflow API for workflow definition, once acquired - parses and executes injecting runtime parameters from request body",)@app.post("/infer/workflows/{workspace_name}/{workflow_id}",response_model=WorkflowInferenceResponse,summary="[LEGACY] Endpoint to run predefined workflow",description="Checks Roboflow API for workflow definition, once acquired - parses and executes injecting runtime parameters from request body. This endpoint is deprecated and will be removed end of Q2 2024",deprecated=True,)@with_route_exceptions@usage_collector("request")asyncdefinfer_from_predefined_workflow(workspace_name:str,workflow_id:str,workflow_request:PredefinedWorkflowInferenceRequest,background_tasks:BackgroundTasks,)->WorkflowInferenceResponse:# TODO: get rid of async: https://github.com/roboflow/inference/issues/569ifENABLE_WORKFLOWS_PROFILINGandworkflow_request.enable_profiling:profiler=BaseWorkflowsProfiler.init(max_runs_in_buffer=WORKFLOWS_PROFILER_BUFFER_SIZE,)else:profiler=NullWorkflowsProfiler.init()withprofiler.profile_execution_phase(name="workflow_definition_fetching",categories=["inference_package_operation"],):workflow_specification=get_workflow_specification(api_key=workflow_request.api_key,workspace_id=workspace_name,workflow_id=workflow_id,use_cache=workflow_request.use_cache,)ifnotworkflow_request.workflow_id:workflow_request.workflow_id=workflow_idifnotworkflow_specification.get("id"):logger.warning("Internal workflow ID missing in specification for '%s'",workflow_id,)returnprocess_workflow_inference_request(workflow_request=workflow_request,workflow_specification=workflow_specification,background_tasks=(background_tasksifnot(LAMBDAorGCP_SERVERLESS)elseNone),profiler=profiler,)@app.post("/workflows/run",response_model=WorkflowInferenceResponse,summary="Endpoint to run workflow specification provided in payload",description="Parses and executes workflow specification, injecting runtime parameters from request body.",)@app.post("/infer/workflows",response_model=WorkflowInferenceResponse,summary="[LEGACY] Endpoint to run workflow specification provided in payload",description="Parses and executes workflow specification, injecting runtime parameters from request body. This endpoint is deprecated and will be removed end of Q2 2024.",deprecated=True,)@with_route_exceptions@usage_collector("request")asyncdefinfer_from_workflow(workflow_request:WorkflowSpecificationInferenceRequest,background_tasks:BackgroundTasks,)->WorkflowInferenceResponse:# TODO: get rid of async: https://github.com/roboflow/inference/issues/569ifENABLE_WORKFLOWS_PROFILINGandworkflow_request.enable_profiling:profiler=BaseWorkflowsProfiler.init(max_runs_in_buffer=WORKFLOWS_PROFILER_BUFFER_SIZE,)else:profiler=NullWorkflowsProfiler.init()returnprocess_workflow_inference_request(workflow_request=workflow_request,workflow_specification=workflow_request.specification,background_tasks=(background_tasksifnot(LAMBDAorGCP_SERVERLESS)elseNone),profiler=profiler,)@app.get("/workflows/execution_engine/versions",response_model=ExecutionEngineVersions,summary="Returns available Execution Engine versions sorted from oldest to newest",description="Returns available Execution Engine versions sorted from oldest to newest",)@with_route_exceptionsasyncdefget_execution_engine_versions()->ExecutionEngineVersions:# TODO: get rid of async: https://github.com/roboflow/inference/issues/569versions=get_available_versions()returnExecutionEngineVersions(versions=versions)@app.get("/workflows/blocks/describe",response_model=WorkflowsBlocksDescription,summary="[LEGACY] Endpoint to get definition of workflows blocks that are accessible",description="Endpoint provides detailed information about workflows building blocks that are ""accessible in the inference server. This information could be used to programmatically ""build / display workflows.",deprecated=True,)@with_route_exceptionsasyncdefdescribe_workflows_blocks(request:Request,)->Union[WorkflowsBlocksDescription,Response]:result=handle_describe_workflows_blocks_request()returngzip_response_if_requested(request=request,response=result)@app.post("/workflows/blocks/describe",response_model=WorkflowsBlocksDescription,summary="[EXPERIMENTAL] Endpoint to get definition of workflows blocks that are accessible",description="Endpoint provides detailed information about workflows building blocks that are ""accessible in the inference server. This information could be used to programmatically ""build / display workflows. Additionally - in request body one can specify list of ""dynamic blocks definitions which will be transformed into blocks and used to generate ""schemas and definitions of connections",)@with_route_exceptionsasyncdefdescribe_workflows_blocks(request:Request,request_payload:Optional[DescribeBlocksRequest]=None,)->Union[WorkflowsBlocksDescription,Response]:# TODO: get rid of async: https://github.com/roboflow/inference/issues/569dynamic_blocks_definitions=Nonerequested_execution_engine_version=Noneifrequest_payloadisnotNone:dynamic_blocks_definitions=(request_payload.dynamic_blocks_definitions)requested_execution_engine_version=(request_payload.execution_engine_version)result=handle_describe_workflows_blocks_request(dynamic_blocks_definitions=dynamic_blocks_definitions,requested_execution_engine_version=requested_execution_engine_version,)returngzip_response_if_requested(request=request,response=result)@app.get("/workflows/definition/schema",response_model=WorkflowsBlocksSchemaDescription,summary="Endpoint to fetch the workflows block schema",description="Endpoint to fetch the schema of all available blocks. This information can be ""used to validate workflow definitions and suggest syntax in the JSON editor.",)@with_route_exceptionsasyncdefget_workflow_schema()->WorkflowsBlocksSchemaDescription:returnget_workflow_schema_description()@app.post("/workflows/blocks/dynamic_outputs",response_model=List[OutputDefinition],summary="[EXPERIMENTAL] Endpoint to get definition of dynamic output for workflow step",description="Endpoint to be used when step outputs can be discovered only after ""filling manifest with data.",)@with_route_exceptionsasyncdefget_dynamic_block_outputs(step_manifest:Dict[str,Any],)->List[OutputDefinition]:# TODO: get rid of async: https://github.com/roboflow/inference/issues/569# Potentially TODO: dynamic blocks do not support dynamic outputs, but if it changes# we need to provide dynamic blocks manifests heredummy_workflow_definition={"version":"1.0","inputs":[],"steps":[step_manifest],"outputs":[],}available_blocks=load_workflow_blocks()parsed_definition=parse_workflow_definition(raw_workflow_definition=dummy_workflow_definition,available_blocks=available_blocks,)parsed_manifest=parsed_definition.steps[0]returnparsed_manifest.get_actual_outputs()@app.post("/workflows/validate",response_model=WorkflowValidationStatus,summary="[EXPERIMENTAL] Endpoint to validate",description="Endpoint provides a way to check validity of JSON workflow definition.",)@with_route_exceptionsasyncdefvalidate_workflow(specification:dict,)->WorkflowValidationStatus:# TODO: get rid of async: https://github.com/roboflow/inference/issues/569step_execution_mode=StepExecutionMode(WORKFLOWS_STEP_EXECUTION_MODE)workflow_init_parameters={"workflows_core.model_manager":model_manager,"workflows_core.api_key":None,"workflows_core.background_tasks":None,"workflows_core.step_execution_mode":step_execution_mode,}_=ExecutionEngine.init(workflow_definition=specification,init_parameters=workflow_init_parameters,max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS,prevent_local_images_loading=True,)returnWorkflowValidationStatus(status="ok")ifENABLE_STREAM_API:@app.get("/inference_pipelines/list",response_model=ListPipelinesResponse,summary="[EXPERIMENTAL] List active InferencePipelines",description="[EXPERIMENTAL] Listing all active InferencePipelines processing videos",)@with_route_exceptionsasyncdeflist_pipelines(_:Request)->ListPipelinesResponse:returnawaitself.stream_manager_client.list_pipelines()@app.get("/inference_pipelines/{pipeline_id}/status",response_model=InferencePipelineStatusResponse,summary="[EXPERIMENTAL] Get status of InferencePipeline",description="[EXPERIMENTAL] Get status of InferencePipeline",)@with_route_exceptionsasyncdefget_status(pipeline_id:str)->InferencePipelineStatusResponse:returnawaitself.stream_manager_client.get_status(pipeline_id=pipeline_id)@app.post("/inference_pipelines/initialise",response_model=CommandResponse,summary="[EXPERIMENTAL] Starts new InferencePipeline",description="[EXPERIMENTAL] Starts new InferencePipeline",)@with_route_exceptionsasyncdefinitialise(request:InitialisePipelinePayload)->CommandResponse:returnawaitself.stream_manager_client.initialise_pipeline(initialisation_request=request)@app.post("/inference_pipelines/initialise_webrtc",response_model=InitializeWebRTCPipelineResponse,summary="[EXPERIMENTAL] Establishes WebRTC peer connection and starts new InferencePipeline consuming video track",description="[EXPERIMENTAL] Establishes WebRTC peer connection and starts new InferencePipeline consuming video track",)@with_route_exceptionsasyncdefinitialise_webrtc_inference_pipeline(request:InitialiseWebRTCPipelinePayload,)->CommandResponse:resp=awaitself.stream_manager_client.initialise_webrtc_pipeline(initialisation_request=request)returnresp@app.post("/inference_pipelines/{pipeline_id}/pause",response_model=CommandResponse,summary="[EXPERIMENTAL] Pauses the InferencePipeline",description="[EXPERIMENTAL] Pauses the InferencePipeline",)@with_route_exceptionsasyncdefpause(pipeline_id:str)->CommandResponse:returnawaitself.stream_manager_client.pause_pipeline(pipeline_id=pipeline_id)@app.post("/inference_pipelines/{pipeline_id}/resume",response_model=CommandResponse,summary="[EXPERIMENTAL] Resumes the InferencePipeline",description="[EXPERIMENTAL] Resumes the InferencePipeline",)@with_route_exceptionsasyncdefresume(pipeline_id:str)->CommandResponse:returnawaitself.stream_manager_client.resume_pipeline(pipeline_id=pipeline_id)@app.post("/inference_pipelines/{pipeline_id}/terminate",response_model=CommandResponse,summary="[EXPERIMENTAL] Terminates the InferencePipeline",description="[EXPERIMENTAL] Terminates the InferencePipeline",)@with_route_exceptionsasyncdefterminate(pipeline_id:str)->CommandResponse:returnawaitself.stream_manager_client.terminate_pipeline(pipeline_id=pipeline_id)@app.get("/inference_pipelines/{pipeline_id}/consume",response_model=ConsumePipelineResponse,summary="[EXPERIMENTAL] Consumes InferencePipeline result",description="[EXPERIMENTAL] Consumes InferencePipeline result",)@with_route_exceptionsasyncdefconsume(pipeline_id:str,request:Optional[ConsumeResultsPayload]=None,)->ConsumePipelineResponse:ifrequestisNone:request=ConsumeResultsPayload()returnawaitself.stream_manager_client.consume_pipeline_result(pipeline_id=pipeline_id,excluded_fields=request.excluded_fields,)# Enable preloading models at startupif((PRELOAD_MODELSorDEDICATED_DEPLOYMENT_WORKSPACE_URL)andAPI_KEYandnot(LAMBDAorGCP_SERVERLESS)):classModelInitState:"""Class to track model initialization state."""def__init__(self):self.is_ready=Falseself.lock=asyncio.Lock()# For thread-safe updatesself.initialization_errors=[]# Track errors per modelmodel_init_state=ModelInitState()asyncdefinitialize_models(state:ModelInitState):"""Perform asynchronous initialization tasks to load models."""# Limit the number of concurrent tasks to prevent resource exhaustionsemaphore=asyncio.Semaphore(2)# Adjust the limit as neededasyncdefload_model(model_id):try:asyncwithsemaphore:# Add a timeout to prevent indefinite hangingawaitasyncio.wait_for(model_add(AddModelRequest(model_id=model_id,model_type=None,api_key=API_KEY,)),timeout=300,# Timeout after 5 minutes)logger.info(f"Model {model_id} loaded successfully.")exceptasyncio.TimeoutError:error_msg=f"Timeout while loading model {model_id}"logger.error(error_msg)asyncwithstate.lock:state.initialization_errors.append((model_id,error_msg))exceptExceptionase:error_msg=f"Error loading model {model_id}: {e}"logger.error(error_msg)asyncwithstate.lock:state.initialization_errors.append((model_id,str(e)))ifPRELOAD_MODELS:# Create tasks for each model to be loadedtasks=[load_model(model_id)formodel_idinPRELOAD_MODELS]# Wait for all tasks to complete, collecting exceptionsawaitasyncio.gather(*tasks,return_exceptions=True)# Update the readiness state in a thread-safe mannerasyncwithstate.lock:state.is_ready=True@app.on_event("startup")asyncdefstartup_model_init():"""Initialize the models on startup."""asyncio.create_task(initialize_models(model_init_state))logger.info("Model initialization started in the background.")@app.get("/readiness",status_code=200)asyncdefreadiness(state:ModelInitState=Depends(lambda:model_init_state),):"""Readiness endpoint for Kubernetes readiness probe."""asyncwithstate.lock:ifstate.is_ready:return{"status":"ready"}else:returnJSONResponse(content={"status":"not ready"},status_code=503)@app.get("/healthz",status_code=200)asyncdefhealthz():"""Health endpoint for Kubernetes liveness probe."""return{"status":"healthy"}ifCORE_MODELS_ENABLED:ifCORE_MODEL_CLIP_ENABLED:@app.post("/clip/embed_image",response_model=ClipEmbeddingResponse,summary="CLIP Image Embeddings",description="Run the Open AI CLIP model to embed image data.",)@with_route_exceptions@usage_collector("request")asyncdefclip_embed_image(inference_request:ClipImageEmbeddingRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Embeds image data using the OpenAI CLIP model. Args: inference_request (ClipImageEmbeddingRequest): The request containing the image to be embedded. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: ClipEmbeddingResponse: The response containing the embedded image. """logger.debug(f"Reached /clip/embed_image")clip_model_id=load_clip_model(inference_request,api_key=api_key)response=awaitself.model_manager.infer_from_request(clip_model_id,inference_request)ifLAMBDA:actor=request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["actor"]trackUsage(clip_model_id,actor)returnresponse@app.post("/clip/embed_text",response_model=ClipEmbeddingResponse,summary="CLIP Text Embeddings",description="Run the Open AI CLIP model to embed text data.",)@with_route_exceptions@usage_collector("request")asyncdefclip_embed_text(inference_request:ClipTextEmbeddingRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Embeds text data using the OpenAI CLIP model. Args: inference_request (ClipTextEmbeddingRequest): The request containing the text to be embedded. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: ClipEmbeddingResponse: The response containing the embedded text. """logger.debug(f"Reached /clip/embed_text")clip_model_id=load_clip_model(inference_request,api_key=api_key)response=awaitself.model_manager.infer_from_request(clip_model_id,inference_request)ifLAMBDA:actor=request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["actor"]trackUsage(clip_model_id,actor)returnresponse@app.post("/clip/compare",response_model=ClipCompareResponse,summary="CLIP Compare",description="Run the Open AI CLIP model to compute similarity scores.",)@with_route_exceptions@usage_collector("request")asyncdefclip_compare(inference_request:ClipCompareRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Computes similarity scores using the OpenAI CLIP model. Args: inference_request (ClipCompareRequest): The request containing the data to be compared. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: ClipCompareResponse: The response containing the similarity scores. """logger.debug(f"Reached /clip/compare")clip_model_id=load_clip_model(inference_request,api_key=api_key)response=awaitself.model_manager.infer_from_request(clip_model_id,inference_request)ifLAMBDA:actor=request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["actor"]trackUsage(clip_model_id,actor,n=2)returnresponseifCORE_MODEL_GROUNDINGDINO_ENABLED:@app.post("/grounding_dino/infer",response_model=ObjectDetectionInferenceResponse,summary="Grounding DINO inference.",description="Run the Grounding DINO zero-shot object detection model.",)@with_route_exceptions@usage_collector("request")asyncdefgrounding_dino_infer(inference_request:GroundingDINOInferenceRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Embeds image data using the Grounding DINO model. Args: inference_request GroundingDINOInferenceRequest): The request containing the image on which to run object detection. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: ObjectDetectionInferenceResponse: The object detection response. """logger.debug(f"Reached /grounding_dino/infer")grounding_dino_model_id=load_grounding_dino_model(inference_request,api_key=api_key)response=awaitself.model_manager.infer_from_request(grounding_dino_model_id,inference_request)ifLAMBDA:actor=request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["actor"]trackUsage(grounding_dino_model_id,actor)returnresponseifCORE_MODEL_YOLO_WORLD_ENABLED:@app.post("/yolo_world/infer",response_model=ObjectDetectionInferenceResponse,summary="YOLO-World inference.",description="Run the YOLO-World zero-shot object detection model.",response_model_exclude_none=True,)@with_route_exceptions@usage_collector("request")asyncdefyolo_world_infer(inference_request:YOLOWorldInferenceRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Runs the YOLO-World zero-shot object detection model. Args: inference_request (YOLOWorldInferenceRequest): The request containing the image on which to run object detection. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: ObjectDetectionInferenceResponse: The object detection response. """logger.debug(f"Reached /yolo_world/infer. Loading model")yolo_world_model_id=load_yolo_world_model(inference_request,api_key=api_key)logger.debug("YOLOWorld model loaded. Staring the inference.")response=awaitself.model_manager.infer_from_request(yolo_world_model_id,inference_request)logger.debug("YOLOWorld prediction available.")ifLAMBDA:actor=request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["actor"]trackUsage(yolo_world_model_id,actor)logger.debug("Usage of YOLOWorld denoted.")returnresponseifCORE_MODEL_DOCTR_ENABLED:@app.post("/doctr/ocr",response_model=OCRInferenceResponse,summary="DocTR OCR response",description="Run the DocTR OCR model to retrieve text in an image.",)@with_route_exceptions@usage_collector("request")asyncdefdoctr_retrieve_text(inference_request:DoctrOCRInferenceRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Embeds image data using the DocTR model. Args: inference_request (M.DoctrOCRInferenceRequest): The request containing the image from which to retrieve text. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: M.OCRInferenceResponse: The response containing the embedded image. """logger.debug(f"Reached /doctr/ocr")doctr_model_id=load_doctr_model(inference_request,api_key=api_key)response=awaitself.model_manager.infer_from_request(doctr_model_id,inference_request)ifLAMBDA:actor=request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["actor"]trackUsage(doctr_model_id,actor)returnresponseifCORE_MODEL_SAM_ENABLED:@app.post("/sam/embed_image",response_model=SamEmbeddingResponse,summary="SAM Image Embeddings",description="Run the Meta AI Segmant Anything Model to embed image data.",)@with_route_exceptions@usage_collector("request")asyncdefsam_embed_image(inference_request:SamEmbeddingRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Embeds image data using the Meta AI Segmant Anything Model (SAM). Args: inference_request (SamEmbeddingRequest): The request containing the image to be embedded. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: M.SamEmbeddingResponse or Response: The response containing the embedded image. """logger.debug(f"Reached /sam/embed_image")sam_model_id=load_sam_model(inference_request,api_key=api_key)model_response=awaitself.model_manager.infer_from_request(sam_model_id,inference_request)ifLAMBDA:actor=request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["actor"]trackUsage(sam_model_id,actor)ifinference_request.format=="binary":returnResponse(content=model_response.embeddings,headers={"Content-Type":"application/octet-stream"},)returnmodel_response@app.post("/sam/segment_image",response_model=SamSegmentationResponse,summary="SAM Image Segmentation",description="Run the Meta AI Segmant Anything Model to generate segmenations for image data.",)@with_route_exceptions@usage_collector("request")asyncdefsam_segment_image(inference_request:SamSegmentationRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Generates segmentations for image data using the Meta AI Segmant Anything Model (SAM). Args: inference_request (SamSegmentationRequest): The request containing the image to be segmented. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: M.SamSegmentationResponse or Response: The response containing the segmented image. """logger.debug(f"Reached /sam/segment_image")sam_model_id=load_sam_model(inference_request,api_key=api_key)model_response=awaitself.model_manager.infer_from_request(sam_model_id,inference_request)ifLAMBDA:actor=request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["actor"]trackUsage(sam_model_id,actor)ifinference_request.format=="binary":returnResponse(content=model_response,headers={"Content-Type":"application/octet-stream"},)returnmodel_responseifCORE_MODEL_SAM2_ENABLED:@app.post("/sam2/embed_image",response_model=Sam2EmbeddingResponse,summary="SAM2 Image Embeddings",description="Run the Meta AI Segment Anything 2 Model to embed image data.",)@with_route_exceptions@usage_collector("request")asyncdefsam2_embed_image(inference_request:Sam2EmbeddingRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Embeds image data using the Meta AI Segment Anything Model (SAM). Args: inference_request (SamEmbeddingRequest): The request containing the image to be embedded. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: M.Sam2EmbeddingResponse or Response: The response affirming the image has been embedded """logger.debug(f"Reached /sam2/embed_image")sam2_model_id=load_sam2_model(inference_request,api_key=api_key)model_response=awaitself.model_manager.infer_from_request(sam2_model_id,inference_request)returnmodel_response@app.post("/sam2/segment_image",response_model=Sam2SegmentationResponse,summary="SAM2 Image Segmentation",description="Run the Meta AI Segment Anything 2 Model to generate segmenations for image data.",)@with_route_exceptions@usage_collector("request")asyncdefsam2_segment_image(inference_request:Sam2SegmentationRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Generates segmentations for image data using the Meta AI Segment Anything Model (SAM). Args: inference_request (Sam2SegmentationRequest): The request containing the image to be segmented. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: M.SamSegmentationResponse or Response: The response containing the segmented image. """logger.debug(f"Reached /sam2/segment_image")sam2_model_id=load_sam2_model(inference_request,api_key=api_key)model_response=awaitself.model_manager.infer_from_request(sam2_model_id,inference_request)ifinference_request.format=="binary":returnResponse(content=model_response,headers={"Content-Type":"application/octet-stream"},)returnmodel_responseifCORE_MODEL_OWLV2_ENABLED:@app.post("/owlv2/infer",response_model=ObjectDetectionInferenceResponse,summary="Owlv2 image prompting",description="Run the google owlv2 model to few-shot object detect",)@with_route_exceptions@usage_collector("request")asyncdefowlv2_infer(inference_request:OwlV2InferenceRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Embeds image data using the Meta AI Segmant Anything Model (SAM). Args: inference_request (SamEmbeddingRequest): The request containing the image to be embedded. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: M.Sam2EmbeddingResponse or Response: The response affirming the image has been embedded """logger.debug(f"Reached /owlv2/infer")owl2_model_id=load_owlv2_model(inference_request,api_key=api_key)model_response=awaitself.model_manager.infer_from_request(owl2_model_id,inference_request)returnmodel_responseifCORE_MODEL_GAZE_ENABLED:@app.post("/gaze/gaze_detection",response_model=List[GazeDetectionInferenceResponse],summary="Gaze Detection",description="Run the gaze detection model to detect gaze.",)@with_route_exceptions@usage_collector("request")asyncdefgaze_detection(inference_request:GazeDetectionInferenceRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Detect gaze using the gaze detection model. Args: inference_request (M.GazeDetectionRequest): The request containing the image to be detected. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: M.GazeDetectionResponse: The response containing all the detected faces and the corresponding gazes. """logger.debug(f"Reached /gaze/gaze_detection")gaze_model_id=load_gaze_model(inference_request,api_key=api_key)response=awaitself.model_manager.infer_from_request(gaze_model_id,inference_request)ifLAMBDA:actor=request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["actor"]trackUsage(gaze_model_id,actor)returnresponseifCORE_MODEL_TROCR_ENABLED:@app.post("/ocr/trocr",response_model=OCRInferenceResponse,summary="TrOCR OCR response",description="Run the TrOCR model to retrieve text in an image.",)@with_route_exceptions@usage_collector("request")asyncdeftrocr_retrieve_text(inference_request:TrOCRInferenceRequest,request:Request,api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),):""" Retrieves text from image data using the TrOCR model. Args: inference_request (TrOCRInferenceRequest): The request containing the image from which to retrieve text. api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. request (Request, default Body()): The HTTP request. Returns: OCRInferenceResponse: The response containing the retrieved text. """logger.debug(f"Reached /trocr/ocr")trocr_model_id=load_trocr_model(inference_request,api_key=api_key)response=awaitself.model_manager.infer_from_request(trocr_model_id,inference_request)ifLAMBDA:actor=request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["actor"]trackUsage(trocr_model_id,actor)returnresponseifnot(LAMBDAorGCP_SERVERLESS):@app.get("/notebook/start",summary="Jupyter Lab Server Start",description="Starts a jupyter lab server for running development code",)@with_route_exceptionsasyncdefnotebook_start(browserless:bool=False):"""Starts a jupyter lab server for running development code. Args: inference_request (NotebookStartRequest): The request containing the necessary details for starting a jupyter lab server. background_tasks: (BackgroundTasks) pool of fastapi background tasks Returns: NotebookStartResponse: The response containing the URL of the jupyter lab server. """logger.debug(f"Reached /notebook/start")ifNOTEBOOK_ENABLED:start_notebook()ifbrowserless:return{"success":True,"message":f"Jupyter Lab server started at http://localhost:{NOTEBOOK_PORT}?token={NOTEBOOK_PASSWORD}",}else:sleep(2)returnRedirectResponse(f"http://localhost:{NOTEBOOK_PORT}/lab/tree/quickstart.ipynb?token={NOTEBOOK_PASSWORD}")else:ifbrowserless:return{"success":False,"message":"Notebook server is not enabled. Enable notebooks via the NOTEBOOK_ENABLED environment variable.",}else:returnRedirectResponse(f"/notebook-instructions.html")ifENABLE_BUILDER:frominference.core.interfaces.http.builder.routesimport(routerasbuilder_router,)# Allow CORS on only the API, but not the builder UI/iframe (where the CSRF is passed)app.add_middleware(PathAwareCORSMiddleware,match_paths=r"^/build/api.*",allow_origins=[BUILDER_ORIGIN],allow_methods=["*"],allow_headers=["*"],allow_credentials=True,)# Attach all routes from builder to the /build prefixapp.include_router(builder_router,prefix="/build",tags=["builder"])ifLEGACY_ROUTE_ENABLED:# Legacy object detection inference path for backwards compatability@app.get("/{dataset_id}/{version_id:str}",# Order matters in this response model Union. It will use the first matching model. For example, Object Detection Inference Response is a subset of Instance segmentation inference response, so instance segmentation must come first in order for the matching logic to work.response_model=Union[InstanceSegmentationInferenceResponse,KeypointsDetectionInferenceResponse,ObjectDetectionInferenceResponse,ClassificationInferenceResponse,MultiLabelClassificationInferenceResponse,StubResponse,Any,],response_model_exclude_none=True,)@app.post("/{dataset_id}/{version_id:str}",# Order matters in this response model Union. It will use the first matching model. For example, Object Detection Inference Response is a subset of Instance segmentation inference response, so instance segmentation must come first in order for the matching logic to work.response_model=Union[InstanceSegmentationInferenceResponse,KeypointsDetectionInferenceResponse,ObjectDetectionInferenceResponse,ClassificationInferenceResponse,MultiLabelClassificationInferenceResponse,StubResponse,Any,],response_model_exclude_none=True,)@with_route_exceptions@usage_collector("request")asyncdeflegacy_infer_from_request(background_tasks:BackgroundTasks,request:Request,dataset_id:str=Path(description="ID of a Roboflow dataset corresponding to the model to use for inference OR workspace ID"),version_id:str=Path(description="ID of a Roboflow dataset version corresponding to the model to use for inference OR model ID"),api_key:Optional[str]=Query(None,description="Roboflow API Key that will be passed to the model during initialization for artifact retrieval",),confidence:float=Query(0.4,description="The confidence threshold used to filter out predictions",),keypoint_confidence:float=Query(0.0,description="The confidence threshold used to filter out keypoints that are not visible based on model confidence",),format:str=Query("json",description="One of 'json' or 'image'. If 'json' prediction data is return as a JSON string. If 'image' prediction data is visualized and overlayed on the original input image.",),image:Optional[str]=Query(None,description="The publically accessible URL of an image to use for inference.",),image_type:Optional[str]=Query("base64",description="One of base64 or numpy. Note, numpy input is not supported for Roboflow Hosted Inference.",),labels:Optional[bool]=Query(False,description="If true, labels will be include in any inference visualization.",),mask_decode_mode:Optional[str]=Query("accurate",description="One of 'accurate' or 'fast'. If 'accurate' the mask will be decoded using the original image size. If 'fast' the mask will be decoded using the original mask size. 'accurate' is slower but more accurate.",),tradeoff_factor:Optional[float]=Query(0.0,description="The amount to tradeoff between 0='fast' and 1='accurate'",),max_detections:int=Query(300,description="The maximum number of detections to return. This is used to limit the number of predictions returned by the model. The model may return more predictions than this number, but only the top `max_detections` predictions will be returned.",),overlap:float=Query(0.3,description="The IoU threhsold that must be met for a box pair to be considered duplicate during NMS",),stroke:int=Query(1,description="The stroke width used when visualizing predictions"),countinference:Optional[bool]=Query(True,description="If false, does not track inference against usage.",include_in_schema=False,),service_secret:Optional[str]=Query(None,description="Shared secret used to authenticate requests to the inference server from internal services (e.g. to allow disabling inference usage tracking via the `countinference` query parameter)",include_in_schema=False,),disable_preproc_auto_orient:Optional[bool]=Query(False,description="If true, disables automatic image orientation"),disable_preproc_contrast:Optional[bool]=Query(False,description="If true, disables automatic contrast adjustment"),disable_preproc_grayscale:Optional[bool]=Query(False,description="If true, disables automatic grayscale conversion",),disable_preproc_static_crop:Optional[bool]=Query(False,description="If true, disables automatic static crop"),disable_active_learning:Optional[bool]=Query(default=False,description="If true, the predictions will be prevented from registration by Active Learning (if the functionality is enabled)",),active_learning_target_dataset:Optional[str]=Query(default=None,description="Parameter to be used when Active Learning data registration should happen against different dataset than the one pointed by model_id",),source:Optional[str]=Query("external",description="The source of the inference request",),source_info:Optional[str]=Query("external",description="The detailed source information of the inference request",),):""" Legacy inference endpoint for object detection, instance segmentation, and classification. Args: background_tasks: (BackgroundTasks) pool of fastapi background tasks dataset_id (str): ID of a Roboflow dataset corresponding to the model to use for inference OR workspace ID version_id (str): ID of a Roboflow dataset version corresponding to the model to use for inference OR model ID api_key (Optional[str], default None): Roboflow API Key passed to the model during initialization for artifact retrieval. # Other parameters described in the function signature... Returns: Union[InstanceSegmentationInferenceResponse, KeypointsDetectionInferenceRequest, ObjectDetectionInferenceResponse, ClassificationInferenceResponse, MultiLabelClassificationInferenceResponse, Any]: The response containing the inference results. """logger.debug(f"Reached legacy route /:dataset_id/:version_id with {dataset_id}/{version_id}")model_id=f"{dataset_id}/{version_id}"ifconfidence>=1:confidence/=100elifconfidence<0.01:confidence=0.01ifoverlap>=1:overlap/=100ifimageisnotNone:request_image=InferenceRequestImage(type="url",value=image)else:if"Content-Type"notinrequest.headers:raiseContentTypeMissing(f"Request must include a Content-Type header")if"multipart/form-data"inrequest.headers["Content-Type"]:form_data=awaitrequest.form()base64_image_str=awaitform_data["file"].read()base64_image_str=base64.b64encode(base64_image_str)request_image=InferenceRequestImage(type="base64",value=base64_image_str.decode("ascii"))elif("application/x-www-form-urlencoded"inrequest.headers["Content-Type"]or"application/json"inrequest.headers["Content-Type"]):data=awaitrequest.body()request_image=InferenceRequestImage(type=image_type,value=data)else:raiseContentTypeInvalid(f"Invalid Content-Type: {request.headers['Content-Type']}")ifnotcountinferenceandservice_secret!=ROBOFLOW_SERVICE_SECRET:raiseMissingServiceSecretError("Service secret is required to disable inference usage tracking")ifLAMBDA:logger.debug("request.scope: %s",request.scope)request_model_id=(request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["model"]["endpoint"].replace("--","/").replace("rf-","").replace("nu-",""))actor=request.scope["aws.event"]["requestContext"]["authorizer"]["lambda"]["actor"]ifcountinference:trackUsage(request_model_id,actor)else:ifservice_secret!=ROBOFLOW_SERVICE_SECRET:raiseMissingServiceSecretError("Service secret is required to disable inference usage tracking")logger.info("Not counting inference for usage")else:request_model_id=model_idlogger.debug(f"State of model registry: {self.model_manager.describe_models()}")self.model_manager.add_model(request_model_id,api_key,model_id_alias=model_id)task_type=self.model_manager.get_task_type(model_id,api_key=api_key)inference_request_type=ObjectDetectionInferenceRequestargs=dict()iftask_type=="instance-segmentation":inference_request_type=InstanceSegmentationInferenceRequestargs={"mask_decode_mode":mask_decode_mode,"tradeoff_factor":tradeoff_factor,}eliftask_type=="classification":inference_request_type=ClassificationInferenceRequesteliftask_type=="keypoint-detection":inference_request_type=KeypointsDetectionInferenceRequestargs={"keypoint_confidence":keypoint_confidence}inference_request=inference_request_type(api_key=api_key,model_id=model_id,image=request_image,confidence=confidence,iou_threshold=overlap,max_detections=max_detections,visualization_labels=labels,visualization_stroke_width=stroke,visualize_predictions=(format=="image"orformat=="image_and_json"),disable_preproc_auto_orient=disable_preproc_auto_orient,disable_preproc_contrast=disable_preproc_contrast,disable_preproc_grayscale=disable_preproc_grayscale,disable_preproc_static_crop=disable_preproc_static_crop,disable_active_learning=disable_active_learning,active_learning_target_dataset=active_learning_target_dataset,source=source,source_info=source_info,usage_billable=countinference,**args,)inference_response=awaitself.model_manager.infer_from_request(inference_request.model_id,inference_request,active_learning_eligible=True,background_tasks=background_tasks,)logger.debug("Response ready.")ifformat=="image":returnResponse(content=inference_response.visualization,media_type="image/jpeg",)else:returnorjson_response(inference_response)ifnot(LAMBDAorGCP_SERVERLESS):# Legacy clear cache endpoint for backwards compatability@app.get("/clear_cache",response_model=str)asyncdeflegacy_clear_cache():""" Clears the model cache. This endpoint provides a way to clear the cache of loaded models. Returns: str: A string indicating that the cache has been cleared. """logger.debug(f"Reached /clear_cache")awaitmodel_clear()return"Cache Cleared"# Legacy add model endpoint for backwards compatability@app.get("/start/{dataset_id}/{version_id}")asyncdefmodel_add_legacy(dataset_id:str,version_id:str,api_key:str=None):""" Starts a model inference session. This endpoint initializes and starts an inference session for the specified model version. Args: dataset_id (str): ID of a Roboflow dataset corresponding to the model. version_id (str): ID of a Roboflow dataset version corresponding to the model. api_key (str, optional): Roboflow API Key for artifact retrieval. Returns: JSONResponse: A response object containing the status and a success message. """logger.debug(f"Reached /start/{dataset_id}/{version_id} with {dataset_id}/{version_id}")model_id=f"{dataset_id}/{version_id}"self.model_manager.add_model(model_id,api_key)returnJSONResponse({"status":200,"message":"inference session started from local memory.",})app.mount("/",StaticFiles(directory="./inference/landing/out",html=True),name="root",)
defwith_route_exceptions(route):""" A decorator that wraps a FastAPI route to handle specific exceptions. If an exception is caught, it returns a JSON response with the error message. Args: route (Callable): The FastAPI route to be wrapped. Returns: Callable: The wrapped route. """@wraps(route)asyncdefwrapped_route(*args,**kwargs):try:returnawaitroute(*args,**kwargs)exceptContentTypeInvalidaserror:logger.error("%s: %s",type(error).__name__,error)resp=JSONResponse(status_code=400,content={"message":"Invalid Content-Type header provided with request."},)traceback.print_exc()exceptContentTypeMissingaserror:logger.error("%s: %s",type(error).__name__,error)resp=JSONResponse(status_code=400,content={"message":"Content-Type header not provided with request."},)traceback.print_exc()exceptInputImageLoadErroraserror:logger.error("%s: %s",type(error).__name__,error)resp=JSONResponse(status_code=400,content={"message":f"Could not load input image. Cause: {error.get_public_error_details()}"},)traceback.print_exc()exceptInvalidModelIDErroraserror:logger.error("%s: %s",type(error).__name__,error)resp=JSONResponse(status_code=400,content={"message":"Invalid Model ID sent in request."},)traceback.print_exc()exceptInvalidMaskDecodeArgumentaserror:logger.error("%s: %s",type(error).__name__,error)resp=JSONResponse(status_code=400,content={"message":"Invalid mask decode argument sent. tradeoff_factor must be in [0.0, 1.0], ""mask_decode_mode: must be one of ['accurate', 'fast', 'tradeoff']"},)traceback.print_exc()exceptMissingApiKeyErroraserror:logger.error("%s: %s",type(error).__name__,error)resp=JSONResponse(status_code=400,content={"message":"Required Roboflow API key is missing. Visit https://docs.roboflow.com/api-reference/authentication#retrieve-an-api-key ""to learn how to retrieve one."},)traceback.print_exc()except(WorkflowSyntaxError,InvalidReferenceTargetError,ExecutionGraphStructureError,StepInputDimensionalityError,)aserror:logger.error("%s: %s",type(error).__name__,error)content=WorkflowErrorResponse(message=str(error.public_message),error_type=error.__class__.__name__,context=str(error.context),inner_error_type=str(error.inner_error_type),inner_error_message=str(error.inner_error),blocks_errors=error.blocks_errors,)resp=JSONResponse(status_code=400,content=content.model_dump())except(WorkflowDefinitionError,ReferenceTypeError,RuntimeInputError,InvalidInputTypeError,OperationTypeNotRecognisedError,DynamicBlockError,WorkflowExecutionEngineVersionError,NotSupportedExecutionEngineError,)aserror:logger.error("%s: %s",type(error).__name__,error)resp=JSONResponse(status_code=400,content={"message":error.public_message,"error_type":error.__class__.__name__,"context":error.context,"inner_error_type":error.inner_error_type,"inner_error_message":str(error.inner_error),},)except(ProcessesManagerInvalidPayload,MalformedPayloadError,MessageToBigError,)aserror:logger.error("%s: %s",type(error).__name__,error)resp=JSONResponse(status_code=400,content={"message":error.public_message,"error_type":error.__class__.__name__,"inner_error_type":error.inner_error_type,},)except(RoboflowAPINotAuthorizedError,ProcessesManagerAuthorisationError,)aserror:logger.error("%s: %s",type(error).__name__,error)resp=JSONResponse(status_code=401,content={"message":"Unauthorized access to roboflow API - check API key and make sure the key is valid for ""workspace you use. Visit https://docs.roboflow.com/api-reference/authentication#retrieve-an-api-key ""to learn how to retrieve one."},)traceback.print_exc()except(RoboflowAPINotNotFoundError,InferenceModelNotFound)aserror:logger.error("%s: %s",type(error).__name__,error)resp=JSONResponse(status_code=404,content={"message":"Requested Roboflow resource not found. Make sure that workspace, project or model ""you referred in request exists."},)traceback.print_exc()exceptProcessesManagerNotFoundErroraserror:logger.error("%s: %s",type(error).__name__,error)resp=JSONResponse(status_code=404,content={"message":error.public_message,"error_type":error.__class__.__name__,"inner_error_type":error.inner_error_type,},)traceback.print_exc()except(InvalidEnvironmentVariableError,MissingServiceSecretError,ServiceConfigurationError,)aserror:logger.error("%s: %s",type(error).__name__,error)resp=JSONResponse(status_code=500,content={"message":"Service misconfiguration."})traceback.print_exc()except(PreProcessingError,PostProcessingError,)aserror:logger.error("%s: %s",type(error).__name__,error)resp=JSONResponse(status_code=500,content={"message":"Model configuration related to pre- or post-processing is invalid."},)traceback.print_exc()exceptModelArtefactErroraserror:logger.error("%s: %s",type(error).__name__,error)resp=JSONResponse(status_code=500,content={"message":"Model package is broken."})traceback.print_exc()exceptOnnxProviderNotAvailableaserror:logger.error("%s: %s",type(error).__name__,error)resp=JSONResponse(status_code=501,content={"message":"Could not find requested ONNX Runtime Provider. Check that you are using ""the correct docker image on a supported device."},)traceback.print_exc()except(MalformedRoboflowAPIResponseError,RoboflowAPIUnsuccessfulRequestError,WorkspaceLoadError,MalformedWorkflowResponseError,)aserror:logger.error("%s: %s",type(error).__name__,error)resp=JSONResponse(status_code=502,content={"message":"Internal error. Request to Roboflow API failed."},)traceback.print_exc()exceptRoboflowAPIConnectionErroraserror:logger.error("%s: %s",type(error).__name__,error)resp=JSONResponse(status_code=503,content={"message":"Internal error. Could not connect to Roboflow API."},)traceback.print_exc()exceptRoboflowAPITimeoutErroraserror:logger.error("%s: %s",type(error).__name__,error)resp=JSONResponse(status_code=504,content={"message":"Timeout when attempting to connect to Roboflow API."},)traceback.print_exc()exceptStepExecutionErroraserror:logger.error("%s: %s",type(error).__name__,error)content=WorkflowErrorResponse(message=str(error.public_message),error_type=error.__class__.__name__,context=str(error.context),inner_error_type=str(error.inner_error_type),inner_error_message=str(error.inner_error),blocks_errors=[WorkflowBlockError(block_id=error.block_id,block_type=error.block_type,),],)resp=JSONResponse(status_code=500,content=content.model_dump(),)traceback.print_exc()exceptWorkflowErroraserror:logger.error("%s: %s",type(error).__name__,error)resp=JSONResponse(status_code=500,content={"message":error.public_message,"error_type":error.__class__.__name__,"context":error.context,"inner_error_type":error.inner_error_type,"inner_error_message":str(error.inner_error),},)traceback.print_exc()except(ProcessesManagerClientError,CommunicationProtocolError,)aserror:logger.error("%s: %s",type(error).__name__,error)resp=JSONResponse(status_code=500,content={"message":error.public_message,"error_type":error.__class__.__name__,"inner_error_type":error.inner_error_type,},)traceback.print_exc()exceptExceptionaserror:logger.error("%s: %s",type(error).__name__,error)resp=JSONResponse(status_code=500,content={"message":"Internal error."})traceback.print_exc()returnrespreturnwrapped_route