Webrtc

`OnDemandVideoTrack` ¶

Bases: MediaStreamTrack

Lazy video track that decodes frames on-demand without pre-buffering.

Unlike MediaPlayer which spawns a background thread to decode ALL frames into an unbounded queue, this class decodes one frame per recv() call. This keeps memory usage constant (~50-100MB) regardless of video length.

Use this for video file processing when realtime_processing=False. For throttled playback (realtime_processing=True), use MediaPlayer instead.

Source code in inference/core/interfaces/webrtc_worker/webrtc.py

class OnDemandVideoTrack(MediaStreamTrack):
    """Lazy video track that decodes frames on-demand without pre-buffering.

    Unlike MediaPlayer which spawns a background thread to decode ALL frames
    into an unbounded queue, this class decodes one frame per recv() call.
    This keeps memory usage constant (~50-100MB) regardless of video length.

    Use this for video file processing when realtime_processing=False.
    For throttled playback (realtime_processing=True), use MediaPlayer instead.
    """

    kind = "video"

    def __init__(self, filepath: str):
        super().__init__()
        import av

        self._container = av.open(filepath)
        self._stream = self._container.streams.video[0]
        self._iterator = self._container.decode(self._stream)

    async def recv(self) -> VideoFrame:
        loop = asyncio.get_running_loop()
        frame = await loop.run_in_executor(None, lambda: next(self._iterator, None))
        if frame is None:
            self.stop()
            raise MediaStreamError("End of video file")
        return frame

    def stop(self):
        super().stop()
        if self._container:
            self._container.close()
            self._container = None

`VideoFileUploadHandler` ¶

Handles video file uploads via data channel.

Protocol: [chunk_index:u32][total_chunks:u32][payload] Auto-completes when all chunks received.

Source code in inference/core/interfaces/webrtc_worker/webrtc.py

class VideoFileUploadHandler:
    """Handles video file uploads via data channel.

    Protocol: [chunk_index:u32][total_chunks:u32][payload]
    Auto-completes when all chunks received.
    """

    def __init__(self):
        self._chunks: Dict[int, bytes] = {}
        self._total_chunks: Optional[int] = None
        self._temp_file_path: Optional[str] = None
        self._state = VideoFileUploadState.IDLE
        self.upload_complete_event = asyncio.Event()

    @property
    def temp_file_path(self) -> Optional[str]:
        return self._temp_file_path

    def handle_chunk(self, chunk_index: int, total_chunks: int, data: bytes) -> None:
        """Handle a chunk. Auto-completes when all chunks received."""
        if self._total_chunks is None:
            self._total_chunks = total_chunks
            self._state = VideoFileUploadState.UPLOADING
            logger.info(f"Starting video upload: {total_chunks} chunks")

        self._chunks[chunk_index] = data

        if chunk_index % 100 == 0:
            logger.info(
                "Upload progress: %s/%s chunks", len(self._chunks), total_chunks
            )

        # Auto-complete when all chunks received
        # TODO: Handle the file writing without keeping all chunks in memory
        if len(self._chunks) == total_chunks:
            self._write_to_temp_file()
            self._state = VideoFileUploadState.COMPLETE
            self.upload_complete_event.set()

    def _write_to_temp_file(self) -> None:
        """Reassemble chunks and write to temp file."""
        import tempfile

        total_size = 0
        with tempfile.NamedTemporaryFile(mode="wb", suffix=".mp4", delete=False) as f:
            for i in range(self._total_chunks):
                chunk_data = self._chunks[i]
                f.write(chunk_data)
                total_size += len(chunk_data)
            self._temp_file_path = f.name

        logger.info(
            "Video upload complete: {total_size} bytes -> %s", self._temp_file_path
        )
        self._chunks.clear()  # Free memory

    def try_start_processing(self) -> Optional[str]:
        """Atomically check if upload is complete and transition to PROCESSING.

        Returns video path if processing should start, None otherwise.
        This ensures process_video_file() is only triggered once.
        """
        if self._state == VideoFileUploadState.COMPLETE:
            self._state = VideoFileUploadState.PROCESSING
            return self._temp_file_path
        return None

    async def cleanup(self) -> None:
        """Clean up temp file."""
        if self._temp_file_path:
            import os

            path_to_delete = self._temp_file_path
            self._temp_file_path = None
            try:
                await asyncio.to_thread(os.unlink, path_to_delete)
            except Exception:
                pass

`cleanup()` `async` ¶

Clean up temp file.

Source code in inference/core/interfaces/webrtc_worker/webrtc.py

async def cleanup(self) -> None:
    """Clean up temp file."""
    if self._temp_file_path:
        import os

        path_to_delete = self._temp_file_path
        self._temp_file_path = None
        try:
            await asyncio.to_thread(os.unlink, path_to_delete)
        except Exception:
            pass

`handle_chunk(chunk_index, total_chunks, data)` ¶

Handle a chunk. Auto-completes when all chunks received.

Source code in inference/core/interfaces/webrtc_worker/webrtc.py

def handle_chunk(self, chunk_index: int, total_chunks: int, data: bytes) -> None:
    """Handle a chunk. Auto-completes when all chunks received."""
    if self._total_chunks is None:
        self._total_chunks = total_chunks
        self._state = VideoFileUploadState.UPLOADING
        logger.info(f"Starting video upload: {total_chunks} chunks")

    self._chunks[chunk_index] = data

    if chunk_index % 100 == 0:
        logger.info(
            "Upload progress: %s/%s chunks", len(self._chunks), total_chunks
        )

    # Auto-complete when all chunks received
    # TODO: Handle the file writing without keeping all chunks in memory
    if len(self._chunks) == total_chunks:
        self._write_to_temp_file()
        self._state = VideoFileUploadState.COMPLETE
        self.upload_complete_event.set()

`try_start_processing()` ¶

Atomically check if upload is complete and transition to PROCESSING.

Returns video path if processing should start, None otherwise. This ensures process_video_file() is only triggered once.

Source code in inference/core/interfaces/webrtc_worker/webrtc.py

def try_start_processing(self) -> Optional[str]:
    """Atomically check if upload is complete and transition to PROCESSING.

    Returns video path if processing should start, None otherwise.
    This ensures process_video_file() is only triggered once.
    """
    if self._state == VideoFileUploadState.COMPLETE:
        self._state = VideoFileUploadState.PROCESSING
        return self._temp_file_path
    return None

`VideoFrameProcessor` ¶

Base class for processing video frames through workflow.

Can be used independently for data-only processing (no video track output) or as a base for VideoTransformTrackWithLoop when video output is needed.

Source code in inference/core/interfaces/webrtc_worker/webrtc.py

class VideoFrameProcessor:
    """Base class for processing video frames through workflow.

    Can be used independently for data-only processing (no video track output)
    or as a base for VideoTransformTrackWithLoop when video output is needed.
    """

    def __init__(
        self,
        asyncio_loop: asyncio.AbstractEventLoop,
        workflow_configuration: WorkflowConfiguration,
        api_key: str,
        model_manager: Optional[ModelManager] = None,
        data_output: Optional[List[str]] = None,
        stream_output: Optional[str] = None,
        has_video_track: bool = True,
        declared_fps: float = 30,
        termination_date: Optional[datetime.datetime] = None,
        terminate_event: Optional[asyncio.Event] = None,
        heartbeat_callback: Optional[Callable[[], None]] = None,
        realtime_processing: bool = True,
    ):
        self._loop = asyncio_loop
        self._termination_date = termination_date
        self._terminate_event = terminate_event
        self.track: Optional[MediaStreamTrack] = None
        self._track_active: bool = False
        self._av_logging_set: bool = False
        self._received_frames = 0
        self._declared_fps = declared_fps
        self._stop_processing = False
        self.heartbeat_callback = heartbeat_callback

        self.has_video_track = has_video_track
        self.stream_output = stream_output
        self.data_channel: Optional[RTCDataChannel] = None

        # Video file upload support
        self.video_upload_handler: Optional[VideoFileUploadHandler] = None
        self._track_ready_event: asyncio.Event = asyncio.Event()
        self.realtime_processing = realtime_processing

        # Optional receiver-paced flow control (enabled only after first ACK is received)
        self._ack_last: int = 0
        # If ack=1 and window=4, server may produce/send up to frame 5.
        # Configurable via WEBRTC_DATACHANNEL_ACK_WINDOW env var.
        self._ack_window: int = WEBRTC_DATA_CHANNEL_ACK_WINDOW
        self._ack_event: asyncio.Event = asyncio.Event()

        if data_output is None:
            self.data_output = None
            self._data_mode = DataOutputMode.NONE
        elif isinstance(data_output, list):
            self.data_output = [f for f in data_output if f]
            if self.data_output == ["*"]:
                self._data_mode = DataOutputMode.ALL
            elif len(self.data_output) == 0:
                self._data_mode = DataOutputMode.NONE
            else:
                self._data_mode = DataOutputMode.SPECIFIC
        else:
            raise WebRTCConfigurationError(
                f"data_output must be list or None, got {type(data_output).__name__}"
            )

        self._validate_output_fields(workflow_configuration)

        self._inference_pipeline = InferencePipeline.init_with_workflow(
            video_reference=VideoFrameProducer,
            workflow_specification=workflow_configuration.workflow_specification,
            workspace_name=workflow_configuration.workspace_name,
            workflow_id=workflow_configuration.workflow_id,
            api_key=api_key,
            image_input_name=workflow_configuration.image_input_name,
            workflows_parameters=workflow_configuration.workflows_parameters,
            workflows_thread_pool_workers=workflow_configuration.workflows_thread_pool_workers,
            cancel_thread_pool_tasks_on_exit=workflow_configuration.cancel_thread_pool_tasks_on_exit,
            video_metadata_input_name=workflow_configuration.video_metadata_input_name,
            model_manager=model_manager,
        )

    def set_track(self, track: MediaStreamTrack):
        if not self.track:
            self.track = track
            self._track_ready_event.set()

    async def close(self):
        self._track_active = False
        self._stop_processing = True
        # Clean up video upload handler if present
        if self.video_upload_handler is not None:
            await self.video_upload_handler.cleanup()

    def record_ack(self, ack: int) -> None:
        """Record cumulative ACK from the client.

        ACK semantics: client has fully handled all frames <= ack.
        Backwards compatible: pacing is disabled until we receive the first ACK.
        """
        try:
            ack_int = int(ack)
        except (TypeError, ValueError):
            logger.warning("Invalid ACK value: %s", ack)
            return
        if ack_int < 0:
            logger.warning("Invalid ACK value: %s", ack)
            return
        if ack_int > self._ack_last:
            if ack_int % 100 == 1:
                logger.info("ACK received: %s", ack_int)
            self._ack_last = ack_int
            self._ack_event.set()

    async def _wait_for_ack_window(self, next_frame_id: int) -> None:
        """Block frame production when too far ahead of client ACKs.

        Allows up to (_ack_window) frames in flight beyond the last ACK.
        Only active for non-realtime processing (video file uploads).
        """
        if self.realtime_processing:
            return
        if self._ack_last == 0:
            return
        wait_counter = 0
        while not self._stop_processing and next_frame_id > (
            self._ack_last + self._ack_window
        ):
            if self._check_termination():
                return
            if self.heartbeat_callback:
                self.heartbeat_callback()
            # Wait briefly for an ACK; timeout keeps heartbeats flowing.
            self._ack_event.clear()
            try:
                await asyncio.wait_for(self._ack_event.wait(), timeout=0.2)
            except asyncio.TimeoutError:
                wait_counter += 1
                if wait_counter % 5 == 1:
                    logger.info(
                        "Timeout waiting for ACK window (next_frame_id=%s, ack_last=%s, ack_window=%s)",
                        next_frame_id,
                        self._ack_last,
                        self._ack_window,
                    )

    def _check_termination(self):
        """Check if we should terminate based on timeout"""
        if (
            self._termination_date
            and self._termination_date < datetime.datetime.now()
            or self._terminate_event
            and self._terminate_event.is_set()
        ):
            logger.info("Timeout reached, terminating inference pipeline")
            self._terminate_event.set()
            return True
        return False

    @staticmethod
    def serialize_outputs_sync(
        fields_to_send: List[str],
        workflow_output: Dict[str, Any],
        data_output_mode: DataOutputMode,
    ) -> Tuple[Dict[str, Any], List[str]]:
        """Serialize workflow outputs in a thread to avoid blocking the event loop."""
        serialized = {}
        serialization_errors = []

        for field_name in fields_to_send:
            if field_name not in workflow_output:
                serialization_errors.append(
                    f"Requested output '{field_name}' not found in workflow outputs"
                )
                continue

            output_data = workflow_output[field_name]

            if data_output_mode == DataOutputMode.ALL and isinstance(
                output_data, WorkflowImageData
            ):
                continue

            try:
                serialized_value = serialize_wildcard_kind(output_data)
                serialized[field_name] = serialized_value
            except Exception as e:
                serialization_errors.append(f"{field_name}: {e}")
                serialized[field_name] = {"__serialization_error__": str(e)}

        return serialized, serialization_errors

    async def _send_data_output(
        self,
        workflow_output: Dict[str, Any],
        frame_timestamp: datetime.datetime,
        frame: VideoFrame,
        errors: List[str],
    ):
        if not self.data_channel or self.data_channel.readyState != "open":
            return

        video_metadata = WebRTCVideoMetadata(
            frame_id=self._received_frames,
            received_at=frame_timestamp.isoformat(),
            pts=frame.pts,
            time_base=frame.time_base,
            declared_fps=self._declared_fps,
        )

        webrtc_output = WebRTCOutput(
            serialized_output_data=None,
            video_metadata=video_metadata,
            errors=errors.copy(),
        )

        if self._data_mode == DataOutputMode.NONE:
            # Even empty responses use binary protocol
            json_bytes = await asyncio.to_thread(
                lambda: json.dumps(webrtc_output.model_dump()).encode("utf-8")
            )
            await send_chunked_data(
                self.data_channel,
                self._received_frames,
                json_bytes,
                heartbeat_callback=self.heartbeat_callback,
            )
            return

        if self._data_mode == DataOutputMode.ALL:
            fields_to_send = list(workflow_output.keys())
        else:
            fields_to_send = self.data_output

        # Offload CPU-intensive serialization (especially image base64 encoding) to thread
        serialized_outputs, serialization_errors = await asyncio.to_thread(
            VideoFrameProcessor.serialize_outputs_sync,
            fields_to_send,
            workflow_output,
            self._data_mode,
        )
        webrtc_output.errors.extend(serialization_errors)

        # Set serialized outputs
        if serialized_outputs:
            webrtc_output.serialized_output_data = serialized_outputs

        # Send using binary chunked protocol
        json_bytes = await asyncio.to_thread(
            lambda: json.dumps(webrtc_output.model_dump(mode="json")).encode("utf-8")
        )
        await send_chunked_data(
            self.data_channel,
            self._received_frames,
            json_bytes,
            heartbeat_callback=self.heartbeat_callback,
        )

    async def _send_processing_complete(self):
        """Send final message indicating processing is complete."""
        if not self.data_channel or self.data_channel.readyState != "open":
            return

        completion_output = WebRTCOutput(
            processing_complete=True,
            video_metadata=WebRTCVideoMetadata(
                frame_id=self._received_frames,
                received_at=datetime.datetime.now().isoformat(),
            ),
        )
        json_bytes = json.dumps(completion_output.model_dump()).encode("utf-8")
        await send_chunked_data(
            self.data_channel, self._received_frames + 1, json_bytes
        )
        logger.info(
            "Sent processing_complete signal after %s frames", self._received_frames
        )

    async def process_frames_data_only(self):
        """Process frames for data extraction only, without video track output.

        This is used when stream_output=[] and no video track is needed.
        """
        # Silencing swscaler warnings in multi-threading environment
        if not self._av_logging_set:
            av_logging.set_libav_level(av_logging.ERROR)
            self._av_logging_set = True

        logger.info(
            "Starting data-only frame processing. This mode is used when stream_output=[] and no video track is needed."
        )

        try:
            while not self._stop_processing:
                await self._wait_for_ack_window(next_frame_id=self._received_frames + 1)
                if self._check_termination():
                    break
                if self.heartbeat_callback:
                    self.heartbeat_callback()

                # Get frame from media track (existing behavior)
                if not self.track or self.track.readyState == "ended":
                    break

                # Drain queue if using PlayerStreamTrack (RTSP)
                if (
                    isinstance(self.track, PlayerStreamTrack)
                    and self.realtime_processing
                ):
                    while self.track._queue.qsize() > 30:
                        self.track._queue.get_nowait()

                frame = await self.track.recv()
                self._received_frames += 1

                frame_timestamp = datetime.datetime.now()

                workflow_output, _, errors = await self._process_frame_async(
                    frame=frame,
                    frame_id=self._received_frames,
                    render_output=False,
                    include_errors_on_frame=False,
                )

                # Send data via data channel (await for backpressure)
                await self._send_data_output(
                    workflow_output, frame_timestamp, frame, errors
                )

        except asyncio.CancelledError as exc:
            logger.info("Data-only processing cancelled: %s", exc)
        except MediaStreamError as exc:
            logger.info("Stream ended in data-only processing: %s", exc)
        except Exception as exc:
            logger.error("Error in data-only processing: %s", exc)
        finally:
            # Send completion signal to client
            await self._send_processing_complete()

    @staticmethod
    def _ensure_workflow_specification(
        workflow_configuration: WorkflowConfiguration, api_key: str
    ) -> None:
        has_specification = workflow_configuration.workflow_specification is not None
        has_workspace_and_workflow_id = (
            workflow_configuration.workspace_name is not None
            and workflow_configuration.workflow_id is not None
        )

        if not has_specification and not has_workspace_and_workflow_id:
            raise WebRTCConfigurationError(
                "Either 'workflow_specification' or both 'workspace_name' and 'workflow_id' must be provided"
            )

        if not has_specification and has_workspace_and_workflow_id:
            try:
                workflow_configuration.workflow_specification = (
                    get_workflow_specification(
                        api_key=api_key,
                        workspace_id=workflow_configuration.workspace_name,
                        workflow_id=workflow_configuration.workflow_id,
                    )
                )
                workflow_configuration.workspace_name = None
                workflow_configuration.workflow_id = None
            except Exception as e:
                raise WebRTCConfigurationError(
                    f"Failed to fetch workflow specification from API: {str(e)}"
                )

    def _validate_output_fields(
        self, workflow_configuration: WorkflowConfiguration
    ) -> None:
        if workflow_configuration.workflow_specification is None:
            return

        workflow_outputs = workflow_configuration.workflow_specification.get(
            "outputs", []
        )
        available_output_names = [o.get("name") for o in workflow_outputs]

        if self._data_mode == DataOutputMode.SPECIFIC:
            invalid_fields = [
                field
                for field in self.data_output
                if field not in available_output_names
            ]
            if invalid_fields:
                raise WebRTCConfigurationError(
                    f"Invalid data_output fields: {invalid_fields}. "
                    f"Available workflow outputs: {available_output_names}"
                )

        if self.stream_output and self.stream_output not in available_output_names:
            raise WebRTCConfigurationError(
                f"Invalid stream_output field: '{self.stream_output}'. "
                f"Available workflow outputs: {available_output_names}"
            )

    async def _process_frame_async(
        self,
        frame: VideoFrame,
        frame_id: int,
        stream_output: Optional[str] = None,
        render_output: bool = True,
        include_errors_on_frame: bool = True,
    ) -> Tuple[Dict[str, Any], Optional[VideoFrame], List[str]]:
        """Async wrapper for process_frame using executor."""
        loop = asyncio.get_running_loop()
        return await loop.run_in_executor(
            None,
            process_frame,
            frame,
            frame_id,
            self._inference_pipeline,
            stream_output,
            render_output,
            include_errors_on_frame,
        )

`process_frames_data_only()` `async` ¶

Process frames for data extraction only, without video track output.

This is used when stream_output=[] and no video track is needed.

Source code in inference/core/interfaces/webrtc_worker/webrtc.py

async def process_frames_data_only(self):
    """Process frames for data extraction only, without video track output.

    This is used when stream_output=[] and no video track is needed.
    """
    # Silencing swscaler warnings in multi-threading environment
    if not self._av_logging_set:
        av_logging.set_libav_level(av_logging.ERROR)
        self._av_logging_set = True

    logger.info(
        "Starting data-only frame processing. This mode is used when stream_output=[] and no video track is needed."
    )

    try:
        while not self._stop_processing:
            await self._wait_for_ack_window(next_frame_id=self._received_frames + 1)
            if self._check_termination():
                break
            if self.heartbeat_callback:
                self.heartbeat_callback()

            # Get frame from media track (existing behavior)
            if not self.track or self.track.readyState == "ended":
                break

            # Drain queue if using PlayerStreamTrack (RTSP)
            if (
                isinstance(self.track, PlayerStreamTrack)
                and self.realtime_processing
            ):
                while self.track._queue.qsize() > 30:
                    self.track._queue.get_nowait()

            frame = await self.track.recv()
            self._received_frames += 1

            frame_timestamp = datetime.datetime.now()

            workflow_output, _, errors = await self._process_frame_async(
                frame=frame,
                frame_id=self._received_frames,
                render_output=False,
                include_errors_on_frame=False,
            )

            # Send data via data channel (await for backpressure)
            await self._send_data_output(
                workflow_output, frame_timestamp, frame, errors
            )

    except asyncio.CancelledError as exc:
        logger.info("Data-only processing cancelled: %s", exc)
    except MediaStreamError as exc:
        logger.info("Stream ended in data-only processing: %s", exc)
    except Exception as exc:
        logger.error("Error in data-only processing: %s", exc)
    finally:
        # Send completion signal to client
        await self._send_processing_complete()

`record_ack(ack)` ¶

Record cumulative ACK from the client.

ACK semantics: client has fully handled all frames <= ack. Backwards compatible: pacing is disabled until we receive the first ACK.

Source code in inference/core/interfaces/webrtc_worker/webrtc.py

def record_ack(self, ack: int) -> None:
    """Record cumulative ACK from the client.

    ACK semantics: client has fully handled all frames <= ack.
    Backwards compatible: pacing is disabled until we receive the first ACK.
    """
    try:
        ack_int = int(ack)
    except (TypeError, ValueError):
        logger.warning("Invalid ACK value: %s", ack)
        return
    if ack_int < 0:
        logger.warning("Invalid ACK value: %s", ack)
        return
    if ack_int > self._ack_last:
        if ack_int % 100 == 1:
            logger.info("ACK received: %s", ack_int)
        self._ack_last = ack_int
        self._ack_event.set()

`serialize_outputs_sync(fields_to_send, workflow_output, data_output_mode)` `staticmethod` ¶

Serialize workflow outputs in a thread to avoid blocking the event loop.

Source code in inference/core/interfaces/webrtc_worker/webrtc.py

@staticmethod
def serialize_outputs_sync(
    fields_to_send: List[str],
    workflow_output: Dict[str, Any],
    data_output_mode: DataOutputMode,
) -> Tuple[Dict[str, Any], List[str]]:
    """Serialize workflow outputs in a thread to avoid blocking the event loop."""
    serialized = {}
    serialization_errors = []

    for field_name in fields_to_send:
        if field_name not in workflow_output:
            serialization_errors.append(
                f"Requested output '{field_name}' not found in workflow outputs"
            )
            continue

        output_data = workflow_output[field_name]

        if data_output_mode == DataOutputMode.ALL and isinstance(
            output_data, WorkflowImageData
        ):
            continue

        try:
            serialized_value = serialize_wildcard_kind(output_data)
            serialized[field_name] = serialized_value
        except Exception as e:
            serialization_errors.append(f"{field_name}: {e}")
            serialized[field_name] = {"__serialization_error__": str(e)}

    return serialized, serialization_errors

`VideoTransformTrackWithLoop` ¶

Bases: VideoStreamTrack, VideoFrameProcessor

Video track that processes frames through workflow and sends video back.

Inherits from both VideoStreamTrack (for WebRTC video track functionality) and VideoFrameProcessor (for workflow processing logic).

Source code in inference/core/interfaces/webrtc_worker/webrtc.py

class VideoTransformTrackWithLoop(VideoStreamTrack, VideoFrameProcessor):
    """Video track that processes frames through workflow and sends video back.

    Inherits from both VideoStreamTrack (for WebRTC video track functionality)
    and VideoFrameProcessor (for workflow processing logic).
    """

    def __init__(
        self,
        asyncio_loop: asyncio.AbstractEventLoop,
        workflow_configuration: WorkflowConfiguration,
        api_key: str,
        model_manager: Optional[ModelManager] = None,
        data_output: Optional[List[str]] = None,
        stream_output: Optional[str] = None,
        has_video_track: bool = True,
        declared_fps: float = 30,
        termination_date: Optional[datetime.datetime] = None,
        terminate_event: Optional[asyncio.Event] = None,
        heartbeat_callback: Optional[Callable[[], None]] = None,
        realtime_processing: bool = True,
        *args,
        **kwargs,
    ):
        VideoStreamTrack.__init__(self, *args, **kwargs)
        VideoFrameProcessor.__init__(
            self,
            asyncio_loop=asyncio_loop,
            workflow_configuration=workflow_configuration,
            api_key=api_key,
            data_output=data_output,
            stream_output=stream_output,
            has_video_track=has_video_track,
            declared_fps=declared_fps,
            termination_date=termination_date,
            terminate_event=terminate_event,
            model_manager=model_manager,
            heartbeat_callback=heartbeat_callback,
            realtime_processing=realtime_processing,
        )

    async def _auto_detect_stream_output(
        self, frame: VideoFrame, frame_id: int
    ) -> None:
        workflow_output_for_detect, _, _ = await self._process_frame_async(
            frame=frame,
            frame_id=frame_id,
            render_output=False,
            include_errors_on_frame=False,
        )
        detected_output = detect_image_output(workflow_output_for_detect)
        if detected_output:
            self.stream_output = detected_output
            logger.info(f"Auto-detected stream_output: {detected_output}")
        else:
            logger.warning("No image output detected, will use fallback")
            self.stream_output = ""

    async def recv(self):
        # Silencing swscaler warnings in multi-threading environment
        if not self._av_logging_set:
            av_logging.set_libav_level(av_logging.ERROR)
            self._av_logging_set = True

        if self.heartbeat_callback:
            self.heartbeat_callback()

        # Check if we should terminate
        if self._check_termination():
            raise MediaStreamError("Processing terminated due to timeout")

        # Wait for track to be ready (video file upload case)
        if self.track is None:
            logger.info("Waiting for track to be ready...")
            await self._track_ready_event.wait()
            if self.track is None:
                raise MediaStreamError("Track not available after wait")

        # Optional ACK pacing: block producing the next frame if we're too far ahead.
        await self._wait_for_ack_window(next_frame_id=self._received_frames + 1)

        # Drain queue if using PlayerStreamTrack (RTSP/video file)
        if isinstance(self.track, PlayerStreamTrack) and self.realtime_processing:
            while self.track._queue.qsize() > 30:
                self.track._queue.get_nowait()

        frame: VideoFrame = await self.track.recv()
        self._received_frames += 1
        frame_timestamp = datetime.datetime.now()

        if self.stream_output is None and self._received_frames == 1:
            await self._auto_detect_stream_output(frame, self._received_frames)

        workflow_output, new_frame, errors = await self._process_frame_async(
            frame=frame,
            frame_id=self._received_frames,
            stream_output=self.stream_output,
            render_output=True,
            include_errors_on_frame=True,
        )

        new_frame.pts = frame.pts
        new_frame.time_base = frame.time_base

        await self._send_data_output(workflow_output, frame_timestamp, frame, errors)

        return new_frame

`create_chunked_binary_message(frame_id, chunk_index, total_chunks, payload)` ¶

Create a binary message with standard 12-byte header.

Format: [frame_id: 4][chunk_index: 4][total_chunks: 4][payload: N] All integers are uint32 little-endian.

Source code in inference/core/interfaces/webrtc_worker/webrtc.py

def create_chunked_binary_message(
    frame_id: int, chunk_index: int, total_chunks: int, payload: bytes
) -> bytes:
    """Create a binary message with standard 12-byte header.

    Format: [frame_id: 4][chunk_index: 4][total_chunks: 4][payload: N]
    All integers are uint32 little-endian.
    """
    header = struct.pack("<III", frame_id, chunk_index, total_chunks)
    return header + payload

`send_chunked_data(data_channel, frame_id, payload_bytes, chunk_size=CHUNK_SIZE, heartbeat_callback=None)` `async` ¶

Send payload via data channel with rate limiting.

Automatically chunks large payloads and rate limits to prevent SCTP buffer overflow.

Parameters:

Name	Type	Description	Default
`data_channel`	`RTCDataChannel`	RTCDataChannel to send on	required
`frame_id`	`int`	Frame identifier	required
`payload_bytes`	`bytes`	Data to send (JPEG, JSON UTF-8, etc.)	required
`chunk_size`	`int`	Maximum chunk size (default 48KB)	`CHUNK_SIZE`

Source code in inference/core/interfaces/webrtc_worker/webrtc.py

async def send_chunked_data(
    data_channel: RTCDataChannel,
    frame_id: int,
    payload_bytes: bytes,
    chunk_size: int = CHUNK_SIZE,
    heartbeat_callback: Optional[Callable[[], None]] = None,
) -> None:
    """Send payload via data channel with rate limiting.

    Automatically chunks large payloads and rate limits to prevent
    SCTP buffer overflow.

    Args:
        data_channel: RTCDataChannel to send on
        frame_id: Frame identifier
        payload_bytes: Data to send (JPEG, JSON UTF-8, etc.)
        chunk_size: Maximum chunk size (default 48KB)
    """
    if data_channel.readyState != "open":
        logger.warning(f"Cannot send response for frame {frame_id}, channel not open")
        return

    sleep_count = 0

    async def wait_for_buffer_drain() -> None:
        nonlocal sleep_count
        while data_channel.bufferedAmount > WEBRTC_DATA_CHANNEL_BUFFER_SIZE_LIMIT:
            sleep_count += 1
            if sleep_count % 10 == 0:
                logger.debug(
                    "Waiting for data channel buffer to drain. Data channel buffer size: %s",
                    data_channel.bufferedAmount,
                )
            if heartbeat_callback:
                heartbeat_callback()
            await asyncio.sleep(WEBRTC_DATA_CHANNEL_BUFFER_DRAINING_DELAY)

    await wait_for_buffer_drain()

    total_chunks = (
        len(payload_bytes) + chunk_size - 1
    ) // chunk_size  # Ceiling division

    if frame_id % 100 == 1:
        logger.info(
            f"Sending response for frame {frame_id}: {total_chunks} chunk(s), {len(payload_bytes)} bytes"
        )

    view = memoryview(payload_bytes)
    for chunk_index in range(total_chunks):
        if data_channel.readyState != "open":
            logger.warning("Channel closed while sending frame %s", frame_id)
            return
        await wait_for_buffer_drain()

        start = chunk_index * chunk_size
        end = min(start + chunk_size, len(payload_bytes))
        chunk_data = view[start:end]

        message = create_chunked_binary_message(
            frame_id, chunk_index, total_chunks, chunk_data
        )
        data_channel.send(message)
        await asyncio.sleep(0)

Webrtc

OnDemandVideoTrack ¶

VideoFileUploadHandler ¶

cleanup() async ¶

handle_chunk(chunk_index, total_chunks, data) ¶

try_start_processing() ¶

VideoFrameProcessor ¶

process_frames_data_only() async ¶

record_ack(ack) ¶

serialize_outputs_sync(fields_to_send, workflow_output, data_output_mode) staticmethod ¶

VideoTransformTrackWithLoop ¶

create_chunked_binary_message(frame_id, chunk_index, total_chunks, payload) ¶

send_chunked_data(data_channel, frame_id, payload_bytes, chunk_size=CHUNK_SIZE, heartbeat_callback=None) async ¶

`OnDemandVideoTrack` ¶

`VideoFileUploadHandler` ¶

`cleanup()` `async` ¶

`handle_chunk(chunk_index, total_chunks, data)` ¶

`try_start_processing()` ¶

`VideoFrameProcessor` ¶

`process_frames_data_only()` `async` ¶

`record_ack(ack)` ¶

`serialize_outputs_sync(fields_to_send, workflow_output, data_output_mode)` `staticmethod` ¶

`VideoTransformTrackWithLoop` ¶

`create_chunked_binary_message(frame_id, chunk_index, total_chunks, payload)` ¶

`send_chunked_data(data_channel, frame_id, payload_bytes, chunk_size=CHUNK_SIZE, heartbeat_callback=None)` `async` ¶