Skip to content

Video source

VideoConsumer

This class should be consumed as part of internal implementation. It provides abstraction around stream consumption strategies.

It must always be given the same video source for consecutive invocations, otherwise the internal state does not make sense.

Source code in inference/core/interfaces/camera/video_source.py
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
class VideoConsumer:
    """
    This class should be consumed as part of internal implementation.
    It provides abstraction around stream consumption strategies.

    It must always be given the same video source for consecutive invocations,
    otherwise the internal state does not make sense.
    """

    @classmethod
    def init(
        cls,
        buffer_filling_strategy: Optional[BufferFillingStrategy],
        adaptive_mode_stream_pace_tolerance: float,
        adaptive_mode_reader_pace_tolerance: float,
        minimum_adaptive_mode_samples: int,
        maximum_adaptive_frames_dropped_in_row: int,
        status_update_handlers: List[Callable[[StatusUpdate], None]],
        desired_fps: Optional[Union[float, int]] = None,
    ) -> "VideoConsumer":
        minimum_adaptive_mode_samples = max(minimum_adaptive_mode_samples, 2)
        reader_pace_monitor = sv.FPSMonitor(
            sample_size=10 * minimum_adaptive_mode_samples
        )
        stream_consumption_pace_monitor = sv.FPSMonitor(
            sample_size=10 * minimum_adaptive_mode_samples
        )
        decoding_pace_monitor = sv.FPSMonitor(
            sample_size=10 * minimum_adaptive_mode_samples
        )
        return cls(
            buffer_filling_strategy=buffer_filling_strategy,
            adaptive_mode_stream_pace_tolerance=adaptive_mode_stream_pace_tolerance,
            adaptive_mode_reader_pace_tolerance=adaptive_mode_reader_pace_tolerance,
            minimum_adaptive_mode_samples=minimum_adaptive_mode_samples,
            maximum_adaptive_frames_dropped_in_row=maximum_adaptive_frames_dropped_in_row,
            status_update_handlers=status_update_handlers,
            reader_pace_monitor=reader_pace_monitor,
            stream_consumption_pace_monitor=stream_consumption_pace_monitor,
            decoding_pace_monitor=decoding_pace_monitor,
            desired_fps=desired_fps,
        )

    def __init__(
        self,
        buffer_filling_strategy: Optional[BufferFillingStrategy],
        adaptive_mode_stream_pace_tolerance: float,
        adaptive_mode_reader_pace_tolerance: float,
        minimum_adaptive_mode_samples: int,
        maximum_adaptive_frames_dropped_in_row: int,
        status_update_handlers: List[Callable[[StatusUpdate], None]],
        reader_pace_monitor: sv.FPSMonitor,
        stream_consumption_pace_monitor: sv.FPSMonitor,
        decoding_pace_monitor: sv.FPSMonitor,
        desired_fps: Optional[Union[float, int]],
    ):
        self._buffer_filling_strategy = buffer_filling_strategy
        self._frame_counter = 0
        self._adaptive_mode_stream_pace_tolerance = adaptive_mode_stream_pace_tolerance
        self._adaptive_mode_reader_pace_tolerance = adaptive_mode_reader_pace_tolerance
        self._minimum_adaptive_mode_samples = minimum_adaptive_mode_samples
        self._maximum_adaptive_frames_dropped_in_row = (
            maximum_adaptive_frames_dropped_in_row
        )
        self._adaptive_frames_dropped_in_row = 0
        self._reader_pace_monitor = reader_pace_monitor
        self._stream_consumption_pace_monitor = stream_consumption_pace_monitor
        self._decoding_pace_monitor = decoding_pace_monitor
        self._desired_fps = desired_fps
        self._declared_source_fps = None
        self._is_source_video_file = None
        self._status_update_handlers = status_update_handlers
        self._next_frame_from_video_to_accept = 1

    @property
    def buffer_filling_strategy(self) -> Optional[BufferFillingStrategy]:
        return self._buffer_filling_strategy

    def reset(self, source_properties: SourceProperties) -> None:
        if source_properties.is_file:
            self._set_file_mode_buffering_strategies()
        else:
            self._set_stream_mode_buffering_strategies()
        self._reader_pace_monitor.reset()
        self.reset_stream_consumption_pace()
        self._decoding_pace_monitor.reset()
        self._adaptive_frames_dropped_in_row = 0
        self._next_frame_from_video_to_accept = self._frame_counter + 1

    def reset_stream_consumption_pace(self) -> None:
        self._stream_consumption_pace_monitor.reset()

    def notify_frame_consumed(self) -> None:
        self._reader_pace_monitor.tick()

    def consume_frame(
        self,
        video: VideoFrameProducer,
        declared_source_fps: Optional[float],
        is_source_video_file: Optional[bool],
        buffer: Queue,
        frames_buffering_allowed: bool,
        source_id: Optional[int] = None,
    ) -> bool:
        if self._is_source_video_file is None:
            source_properties = video.discover_source_properties()
            self._is_source_video_file = source_properties.is_file
            self._declared_source_fps = source_properties.fps
        frame_timestamp = datetime.now()
        success = video.grab()
        self._stream_consumption_pace_monitor.tick()
        if not success:
            return False
        self._frame_counter += 1
        send_video_source_status_update(
            severity=UpdateSeverity.DEBUG,
            event_type=FRAME_CAPTURED_EVENT,
            payload={
                "frame_timestamp": frame_timestamp,
                "frame_id": self._frame_counter,
                "source_id": source_id,
            },
            status_update_handlers=self._status_update_handlers,
        )
        measured_source_fps = declared_source_fps
        if not is_source_video_file:
            if hasattr(self._stream_consumption_pace_monitor, "fps"):
                measured_source_fps = self._stream_consumption_pace_monitor.fps
            else:
                measured_source_fps = self._stream_consumption_pace_monitor()

        if self._video_fps_should_be_sub_sampled():
            return True
        return self._consume_stream_frame(
            video=video,
            declared_source_fps=declared_source_fps,
            measured_source_fps=measured_source_fps,
            is_source_video_file=is_source_video_file,
            frame_timestamp=frame_timestamp,
            buffer=buffer,
            frames_buffering_allowed=frames_buffering_allowed,
            source_id=source_id,
        )

    def _set_file_mode_buffering_strategies(self) -> None:
        if self._buffer_filling_strategy is None:
            self._buffer_filling_strategy = BufferFillingStrategy.WAIT

    def _set_stream_mode_buffering_strategies(self) -> None:
        if self._buffer_filling_strategy is None:
            self._buffer_filling_strategy = BufferFillingStrategy.ADAPTIVE_DROP_OLDEST

    def _video_fps_should_be_sub_sampled(self) -> bool:
        if self._desired_fps is None:
            return False
        if self._is_source_video_file:
            actual_fps = self._declared_source_fps
        else:
            fraction_of_pace_monitor_samples = (
                len(self._stream_consumption_pace_monitor.all_timestamps)
                / self._stream_consumption_pace_monitor.all_timestamps.maxlen
            )
            if fraction_of_pace_monitor_samples < 0.9:
                actual_fps = self._declared_source_fps
            elif hasattr(self._stream_consumption_pace_monitor, "fps"):
                actual_fps = self._stream_consumption_pace_monitor.fps
            else:
                actual_fps = self._stream_consumption_pace_monitor()
        if self._frame_counter == self._next_frame_from_video_to_accept:
            stride = calculate_video_file_stride(
                actual_fps=actual_fps,
                desired_fps=self._desired_fps,
            )
            self._next_frame_from_video_to_accept += stride
            return False
        # skipping frame
        return True

    def _consume_stream_frame(
        self,
        video: VideoFrameProducer,
        declared_source_fps: Optional[float],
        measured_source_fps: Optional[float],
        is_source_video_file: Optional[bool],
        frame_timestamp: datetime,
        buffer: Queue,
        frames_buffering_allowed: bool,
        source_id: Optional[int],
    ) -> bool:
        """
        Returns: boolean flag with success status
        """
        if not frames_buffering_allowed:
            send_frame_drop_update(
                frame_timestamp=frame_timestamp,
                frame_id=self._frame_counter,
                cause="Buffering not allowed at the moment",
                status_update_handlers=self._status_update_handlers,
                source_id=source_id,
            )
            return True
        if self._frame_should_be_adaptively_dropped(
            declared_source_fps=declared_source_fps
        ):
            self._adaptive_frames_dropped_in_row += 1
            send_frame_drop_update(
                frame_timestamp=frame_timestamp,
                frame_id=self._frame_counter,
                cause="ADAPTIVE strategy",
                status_update_handlers=self._status_update_handlers,
                source_id=source_id,
            )
            return True
        self._adaptive_frames_dropped_in_row = 0
        if (
            not buffer.full()
            or self._buffer_filling_strategy is BufferFillingStrategy.WAIT
        ):
            return decode_video_frame_to_buffer(
                frame_timestamp=frame_timestamp,
                frame_id=self._frame_counter,
                video=video,
                buffer=buffer,
                decoding_pace_monitor=self._decoding_pace_monitor,
                source_id=source_id,
                declared_source_fps=declared_source_fps,
                measured_source_fps=measured_source_fps,
                comes_from_video_file=is_source_video_file,
            )
        if self._buffer_filling_strategy in DROP_OLDEST_STRATEGIES:
            return self._process_stream_frame_dropping_oldest(
                frame_timestamp=frame_timestamp,
                video=video,
                buffer=buffer,
                source_id=source_id,
                is_video_file=is_source_video_file,
            )
        send_frame_drop_update(
            frame_timestamp=frame_timestamp,
            frame_id=self._frame_counter,
            cause="DROP_LATEST strategy",
            status_update_handlers=self._status_update_handlers,
            source_id=source_id,
        )
        return True

    def _frame_should_be_adaptively_dropped(
        self, declared_source_fps: Optional[float]
    ) -> bool:
        if self._buffer_filling_strategy not in ADAPTIVE_STRATEGIES:
            return False
        if (
            self._adaptive_frames_dropped_in_row
            >= self._maximum_adaptive_frames_dropped_in_row
        ):
            return False
        if (
            len(self._stream_consumption_pace_monitor.all_timestamps)
            <= self._minimum_adaptive_mode_samples
        ):
            # not enough observations
            return False
        if hasattr(self._stream_consumption_pace_monitor, "fps"):
            stream_consumption_pace = self._stream_consumption_pace_monitor.fps
        else:
            stream_consumption_pace = self._stream_consumption_pace_monitor()
        announced_stream_fps = stream_consumption_pace
        if declared_source_fps is not None and declared_source_fps > 0:
            announced_stream_fps = declared_source_fps
        if (
            announced_stream_fps - stream_consumption_pace
            > self._adaptive_mode_stream_pace_tolerance
        ):
            # cannot keep up with stream emission
            return True
        if (
            len(self._reader_pace_monitor.all_timestamps)
            <= self._minimum_adaptive_mode_samples
        ) or (
            len(self._decoding_pace_monitor.all_timestamps)
            <= self._minimum_adaptive_mode_samples
        ):
            # not enough observations
            return False
        actual_reader_pace = get_fps_if_tick_happens_now(
            fps_monitor=self._reader_pace_monitor
        )
        if hasattr(self._decoding_pace_monitor, "fps"):
            decoding_pace = self._decoding_pace_monitor.fps
        else:
            decoding_pace = self._decoding_pace_monitor()
        if (
            decoding_pace - actual_reader_pace
            > self._adaptive_mode_reader_pace_tolerance
        ):
            # we are too fast for the reader - time to save compute on decoding
            return True
        return False

    def _process_stream_frame_dropping_oldest(
        self,
        frame_timestamp: datetime,
        video: VideoFrameProducer,
        buffer: Queue,
        source_id: Optional[int],
        is_video_file: bool,
    ) -> bool:
        drop_single_frame_from_buffer(
            buffer=buffer,
            cause="DROP_OLDEST strategy",
            status_update_handlers=self._status_update_handlers,
        )
        return decode_video_frame_to_buffer(
            frame_timestamp=frame_timestamp,
            frame_id=self._frame_counter,
            video=video,
            buffer=buffer,
            decoding_pace_monitor=self._decoding_pace_monitor,
            source_id=source_id,
            comes_from_video_file=is_video_file,
        )

VideoSource

Source code in inference/core/interfaces/camera/video_source.py
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
class VideoSource:
    @classmethod
    def init(
        cls,
        video_reference: VideoSourceIdentifier,
        buffer_size: int = DEFAULT_BUFFER_SIZE,
        status_update_handlers: Optional[List[Callable[[StatusUpdate], None]]] = None,
        buffer_filling_strategy: Optional[BufferFillingStrategy] = None,
        buffer_consumption_strategy: Optional[BufferConsumptionStrategy] = None,
        adaptive_mode_stream_pace_tolerance: float = DEFAULT_ADAPTIVE_MODE_STREAM_PACE_TOLERANCE,
        adaptive_mode_reader_pace_tolerance: float = DEFAULT_ADAPTIVE_MODE_READER_PACE_TOLERANCE,
        minimum_adaptive_mode_samples: int = DEFAULT_MINIMUM_ADAPTIVE_MODE_SAMPLES,
        maximum_adaptive_frames_dropped_in_row: int = DEFAULT_MAXIMUM_ADAPTIVE_FRAMES_DROPPED_IN_ROW,
        video_source_properties: Optional[Dict[str, float]] = None,
        source_id: Optional[int] = None,
        desired_fps: Optional[Union[float, int]] = None,
    ):
        """
        This class is meant to represent abstraction over video sources - both video files and
        on-line streams that are possible to be consumed and used by other components of `inference`
        library.

        Before digging into details of the class behaviour, it is advised to familiarise with the following
        concepts and implementation assumptions:

        1. Video file can be accessed from local (or remote) storage by the consumer in a pace dictated by
            its processing capabilities. If processing is faster than the frame rate of video, operations
            may be executed in a time shorter than the time of video playback. In the opposite case - consumer
            may freely decode and process frames in its own pace, without risk for failures due to temporal
            dependencies of processing - this is classical offline processing example.
        2. Video streams, on the other hand, usually need to be consumed in a pace near to their frame-rate -
            in other words - this is on-line processing example. Consumer being faster than incoming stream
            frames cannot utilise its resources to the full extent as not-yet-delivered data would be needed.
            Slow consumer, however, may not be able to process everything on time and to keep up with the pace
            of stream - some frames would need to be dropped. Otherwise - over time, consumer could go out of
            sync with the stream causing decoding failures or unpredictable behavior.

        To fit those two types of video sources, `VideoSource` introduces the concept of buffered decoding of
        video stream (like at the YouTube - player buffers some frames that are soon to be displayed).
        The way on how buffer is filled and consumed dictates the behavior of `VideoSource`.

        Starting from `BufferFillingStrategy` - we have 3 basic options:
        * WAIT: in case of slow video consumption, when buffer is full - `VideoSource` will wait for
        the empty spot in buffer before next frame will be processed - this is suitable in cases when
        we want to ensure EACH FRAME of the video to be processed
        * DROP_OLDEST: when buffer is full, the frame that sits there for the longest time will be dropped -
        this is suitable for cases when we want to process the most recent frames possible
        * DROP_LATEST: when buffer is full, the newly decoded frame is dropped - useful in cases when
        it is expected to have processing performance drops, but we would like to consume portions of
        video that are locally smooth - but this is probably the least common use-case.

        On top of that - there are two ADAPTIVE strategies: ADAPTIVE_DROP_OLDEST and ADAPTIVE_DROP_LATEST,
        which are equivalent to DROP_OLDEST and DROP_LATEST with adaptive decoding feature enabled. The notion
        of that mode will be described later.

        Naturally, decoded frames must also be consumed. `VideoSource` provides a handy interface for reading
        a video source frames by a SINGLE consumer. Consumption strategy can also be dictated via
        `BufferConsumptionStrategy`:
        * LAZY - consume all the frames from decoding buffer one-by-one
        * EAGER - at each readout - take all frames already buffered, drop all of them apart from the most recent

        In consequence - there are various combinations of `BufferFillingStrategy` and `BufferConsumptionStrategy`.
        The most popular would be:
        * `BufferFillingStrategy.WAIT` and `BufferConsumptionStrategy.LAZY` - to always decode and process each and
            every frame of the source (useful while processing video files - and default behaviour enforced by
            `inference` if there is no explicit configuration)
        * `BufferFillingStrategy.DROP_OLDEST` and `BufferConsumptionStrategy.EAGER` - to always process the most
            recent frames of source (useful while processing video streams when low latency [real-time experience]
            is required - ADAPTIVE version of this is default for streams)

        ADAPTIVE strategies were introduced to handle corner-cases, when consumer hardware is not capable to consume
        video stream and process frames at the same time (for instance - Nvidia Jetson devices running processing
        against hi-res streams with high FPS ratio). It acts with buffer in nearly the same way as `DROP_OLDEST`
        and `DROP_LATEST` strategies, but there are two more conditions that may influence frame drop:
        * announced rate of source - which in fact dictate the pace of frames grabbing from incoming stream that
        MUST be met by consumer to avoid strange decoding issues causing decoder to fail - if the pace of frame grabbing
        deviates too much - decoding will be postponed, and frames dropped to grab next ones sooner
        * consumption rate - in resource constraints environment, not only decoding is problematic from the performance
        perspective - but also heavy processing. If consumer is not quick enough - allocating more useful resources
        for decoding frames that may never be processed is a waste. That's why - if decoding happens more frequently
        than consumption of frame - ADAPTIVE mode causes decoding to be done in a slower pace and more frames are just
        grabbed and dropped on the floor.
        ADAPTIVE mode increases latency slightly, but may be the only way to operate in some cases.
        Behaviour of adaptive mode, including the maximum acceptable deviations of frames grabbing pace from source,
        reader pace and maximum number of consecutive frames dropped in ADAPTIVE mode are configurable by clients,
        with reasonable defaults being set.

        `VideoSource` emits events regarding its activity - which can be intercepted by custom handlers. Take
        into account that they are always executed in context of thread invoking them (and should be fast to complete,
        otherwise may block the flow of stream consumption). All errors raised will be emitted as logger warnings only.

        `VideoSource` implementation is naturally multithreading, with different thread decoding video and different
        one consuming it and manipulating source state. Implementation of user interface is thread-safe, although
        stream it is meant to be consumed by a single thread only.

        ENV variables involved:
        * VIDEO_SOURCE_BUFFER_SIZE - default: 64
        * VIDEO_SOURCE_ADAPTIVE_MODE_STREAM_PACE_TOLERANCE - default: 0.1
        * VIDEO_SOURCE_ADAPTIVE_MODE_READER_PACE_TOLERANCE - default: 5.0
        * VIDEO_SOURCE_MINIMUM_ADAPTIVE_MODE_SAMPLES - default: 10
        * VIDEO_SOURCE_MAXIMUM_ADAPTIVE_FRAMES_DROPPED_IN_ROW - default: 16

        As an `inference` user, please use .init() method instead of constructor to instantiate objects.

        Args:
            video_reference (Union[str, int]): Either str with file or stream reference, or int representing device ID
            buffer_size (int): size of decoding buffer
            status_update_handlers (Optional[List[Callable[[StatusUpdate], None]]]): List of handlers for status updates
            buffer_filling_strategy (Optional[BufferFillingStrategy]): Settings for buffer filling strategy - if not
                given - automatic choice regarding source type will be applied
            buffer_consumption_strategy (Optional[BufferConsumptionStrategy]): Settings for buffer consumption strategy,
                if not given - automatic choice regarding source type will be applied
            adaptive_mode_stream_pace_tolerance (float): Maximum deviation between frames grabbing pace and stream pace
                that will not trigger adaptive mode frame drop
            adaptive_mode_reader_pace_tolerance (float): Maximum deviation between decoding pace and stream consumption
                pace that will not trigger adaptive mode frame drop
            minimum_adaptive_mode_samples (int): Minimal number of frames to be used to establish actual pace of
                processing, before adaptive mode can drop any frame
            maximum_adaptive_frames_dropped_in_row (int): Maximum number of frames dropped in row due to application of
                adaptive strategy
            video_source_properties (Optional[dict[str, float]]): Optional dictionary with video source properties
                corresponding to OpenCV VideoCapture properties cv2.CAP_PROP_* to set values for the video source.
            source_id (Optional[int]): Optional identifier of video source - mainly useful to recognise specific source
                when multiple ones are in use. Identifier will be added to emitted frames and updates. It is advised
                to keep it unique within all sources in use.

        Returns: Instance of `VideoSource` class
        """
        frames_buffer = Queue(maxsize=buffer_size)
        if status_update_handlers is None:
            status_update_handlers = []
        video_consumer = VideoConsumer.init(
            buffer_filling_strategy=buffer_filling_strategy,
            adaptive_mode_stream_pace_tolerance=adaptive_mode_stream_pace_tolerance,
            adaptive_mode_reader_pace_tolerance=adaptive_mode_reader_pace_tolerance,
            minimum_adaptive_mode_samples=minimum_adaptive_mode_samples,
            maximum_adaptive_frames_dropped_in_row=maximum_adaptive_frames_dropped_in_row,
            status_update_handlers=status_update_handlers,
            desired_fps=desired_fps,
        )
        return cls(
            stream_reference=video_reference,
            frames_buffer=frames_buffer,
            status_update_handlers=status_update_handlers,
            buffer_consumption_strategy=buffer_consumption_strategy,
            video_consumer=video_consumer,
            video_source_properties=video_source_properties,
            source_id=source_id,
        )

    def __init__(
        self,
        stream_reference: VideoSourceIdentifier,
        frames_buffer: Queue,
        status_update_handlers: List[Callable[[StatusUpdate], None]],
        buffer_consumption_strategy: Optional[BufferConsumptionStrategy],
        video_consumer: "VideoConsumer",
        video_source_properties: Optional[Dict[str, float]],
        source_id: Optional[int],
    ):
        self._stream_reference = stream_reference
        self._video: Optional[VideoFrameProducer] = None
        self._source_properties: Optional[SourceProperties] = None
        self._frames_buffer = frames_buffer
        self._status_update_handlers = status_update_handlers
        self._buffer_consumption_strategy = buffer_consumption_strategy
        self._video_consumer = video_consumer
        self._state = StreamState.NOT_STARTED
        self._playback_allowed = Event()
        self._frames_buffering_allowed = True
        self._stream_consumption_thread: Optional[Thread] = None
        self._state_change_lock = Lock()
        self._video_source_properties = video_source_properties or {}
        self._source_id = source_id

    @property
    def source_id(self) -> Optional[int]:
        return self._source_id

    @lock_state_transition
    def restart(
        self, wait_on_frames_consumption: bool = True, purge_frames_buffer: bool = False
    ) -> None:
        """
        Method to restart source consumption. Eligible to be used in states:
        [MUTED, RUNNING, PAUSED, ENDED, ERROR].
        End state:
        * INITIALISING - that should change into RUNNING once first frame is ready to be grabbed
        * ERROR - if it was not possible to connect with source

        Thread safe - only one transition of states possible at the time.

        Args:
            wait_on_frames_consumption (bool): Flag telling if all frames from buffer must be consumed before
                completion of this operation.

        Returns: None
        Throws:
            * StreamOperationNotAllowedError: if executed in context of incorrect state of the source
            * SourceConnectionError: if source cannot be connected
        """
        if self._state not in RESTART_ELIGIBLE_STATES:
            raise StreamOperationNotAllowedError(
                f"Could not RESTART stream in state: {self._state}"
            )
        self._restart(
            wait_on_frames_consumption=wait_on_frames_consumption,
            purge_frames_buffer=purge_frames_buffer,
        )

    @lock_state_transition
    def start(self) -> None:
        """
        Method to be used to start source consumption. Eligible to be used in states:
        [NOT_STARTED, ENDED, (RESTARTING - which is internal state only)]
        End state:
        * INITIALISING - that should change into RUNNING once first frame is ready to be grabbed
        * ERROR - if it was not possible to connect with source

        Thread safe - only one transition of states possible at the time.

        Returns: None
        Throws:
            * StreamOperationNotAllowedError: if executed in context of incorrect state of the source
            * SourceConnectionError: if source cannot be connected
        """
        if self._state not in START_ELIGIBLE_STATES:
            raise StreamOperationNotAllowedError(
                f"Could not START stream in state: {self._state}"
            )
        self._start()

    @lock_state_transition
    def terminate(
        self, wait_on_frames_consumption: bool = True, purge_frames_buffer: bool = False
    ) -> None:
        """
        Method to be used to terminate source consumption. Eligible to be used in states:
        [MUTED, RUNNING, PAUSED, ENDED, ERROR, (RESTARTING - which is internal state only)]
        End state:
        * ENDED - indicating success of the process
        * ERROR - if error with processing occurred

        Must be used to properly dispose resources at the end.

        Thread safe - only one transition of states possible at the time.

        Args:
            wait_on_frames_consumption (bool): Flag telling if all frames from buffer must be consumed before
                completion of this operation.

        Returns: None
        Throws:
            * StreamOperationNotAllowedError: if executed in context of incorrect state of the source
        """
        if self._state not in TERMINATE_ELIGIBLE_STATES:
            raise StreamOperationNotAllowedError(
                f"Could not TERMINATE stream in state: {self._state}"
            )
        self._terminate(
            wait_on_frames_consumption=wait_on_frames_consumption,
            purge_frames_buffer=purge_frames_buffer,
        )

    @lock_state_transition
    def pause(self) -> None:
        """
        Method to be used to pause source consumption. During pause - no new frames are consumed.
        Used on on-line streams for too long may cause stream disconnection.
        Eligible to be used in states:
        [RUNNING]
        End state:
        * PAUSED

        Thread safe - only one transition of states possible at the time.

        Returns: None
        Throws:
            * StreamOperationNotAllowedError: if executed in context of incorrect state of the source
        """
        if self._state not in PAUSE_ELIGIBLE_STATES:
            raise StreamOperationNotAllowedError(
                f"Could not PAUSE stream in state: {self._state}"
            )
        self._pause()

    @lock_state_transition
    def mute(self) -> None:
        """
        Method to be used to mute source consumption. Muting is an equivalent of pause for stream - where
        frames grabbing is not put on hold, just new frames decoding and buffering is not allowed - causing
        intermediate frames to be dropped. May be also used against files, although arguably less useful.
        Eligible to be used in states:
        [RUNNING]
        End state:
        * MUTED

        Thread safe - only one transition of states possible at the time.

        Returns: None
        Throws:
            * StreamOperationNotAllowedError: if executed in context of incorrect state of the source
        """
        if self._state not in MUTE_ELIGIBLE_STATES:
            raise StreamOperationNotAllowedError(
                f"Could not MUTE stream in state: {self._state}"
            )
        self._mute()

    @lock_state_transition
    def resume(self) -> None:
        """
        Method to recover from pause or mute into running state.
        [PAUSED, MUTED]
        End state:
        * RUNNING

        Thread safe - only one transition of states possible at the time.

        Returns: None
        Throws:
            * StreamOperationNotAllowedError: if executed in context of incorrect state of the source
        """
        if self._state not in RESUME_ELIGIBLE_STATES:
            raise StreamOperationNotAllowedError(
                f"Could not RESUME stream in state: {self._state}"
            )
        self._resume()

    def get_state(self) -> StreamState:
        """
        Method to get current state of the `VideoSource`

        Returns: StreamState
        """
        return self._state

    def frame_ready(self) -> bool:
        """
        Method to check if decoded frame is ready for consumer

        Returns: boolean flag indicating frame readiness
        """
        return not self._frames_buffer.empty()

    def read_frame(self, timeout: Optional[float] = None) -> Optional[VideoFrame]:
        """
        Method to be used by the consumer to get decoded source frame.

        Returns: VideoFrame object with decoded frame and its metadata.
        Throws:
            * EndOfStreamError: when trying to get the frame from closed source.
        """
        video_frame: Optional[Union[VideoFrame, str]] = get_from_queue(
            queue=self._frames_buffer,
            on_successful_read=self._video_consumer.notify_frame_consumed,
            timeout=timeout,
            purge=self._buffer_consumption_strategy is BufferConsumptionStrategy.EAGER,
        )
        if video_frame == POISON_PILL:
            raise EndOfStreamError(
                "Attempted to retrieve frame from stream that already ended."
            )
        if video_frame is not None:
            send_video_source_status_update(
                severity=UpdateSeverity.DEBUG,
                event_type=FRAME_CONSUMED_EVENT,
                payload={
                    "frame_timestamp": video_frame.frame_timestamp,
                    "frame_id": video_frame.frame_id,
                    "source_id": video_frame.source_id,
                },
                status_update_handlers=self._status_update_handlers,
            )
        return video_frame

    def describe_source(self) -> SourceMetadata:
        serialized_source_reference = self._stream_reference
        if callable(serialized_source_reference):
            serialized_source_reference = str(self._stream_reference)
        return SourceMetadata(
            source_properties=self._source_properties,
            source_reference=serialized_source_reference,
            buffer_size=self._frames_buffer.maxsize,
            state=self._state,
            buffer_filling_strategy=self._video_consumer.buffer_filling_strategy,
            buffer_consumption_strategy=self._buffer_consumption_strategy,
            source_id=self._source_id,
        )

    def _restart(
        self, wait_on_frames_consumption: bool = True, purge_frames_buffer: bool = False
    ) -> None:
        self._terminate(
            wait_on_frames_consumption=wait_on_frames_consumption,
            purge_frames_buffer=purge_frames_buffer,
        )
        self._change_state(target_state=StreamState.RESTARTING)
        self._playback_allowed = Event()
        self._frames_buffering_allowed = True
        self._video: Optional[VideoFrameProducer] = None
        self._source_properties: Optional[SourceProperties] = None
        self._start()

    def _start(self) -> None:
        self._change_state(target_state=StreamState.INITIALISING)
        if callable(self._stream_reference):
            self._video = self._stream_reference()
        else:
            self._video = CV2VideoFrameProducer(self._stream_reference)
        if not self._video.isOpened():
            self._change_state(target_state=StreamState.ERROR)
            raise SourceConnectionError(
                f"Cannot connect to video source under reference: {self._stream_reference}"
            )
        self._video.initialize_source_properties(self._video_source_properties)
        self._source_properties = self._video.discover_source_properties()
        self._video_consumer.reset(source_properties=self._source_properties)
        if self._source_properties.is_file:
            self._set_file_mode_consumption_strategies()
        else:
            self._set_stream_mode_consumption_strategies()
        self._playback_allowed.set()
        self._stream_consumption_thread = Thread(target=self._consume_video)
        self._stream_consumption_thread.start()

    def _terminate(
        self, wait_on_frames_consumption: bool, purge_frames_buffer: bool
    ) -> None:
        if self._state in RESUME_ELIGIBLE_STATES:
            self._resume()
        previous_state = self._state
        self._change_state(target_state=StreamState.TERMINATING)
        if purge_frames_buffer:
            _ = get_from_queue(queue=self._frames_buffer, timeout=0.0, purge=True)
        if self._stream_consumption_thread is not None:
            self._stream_consumption_thread.join()
        if wait_on_frames_consumption:
            self._frames_buffer.join()
        if previous_state is not StreamState.ERROR:
            self._change_state(target_state=StreamState.ENDED)

    def _pause(self) -> None:
        self._playback_allowed.clear()
        self._change_state(target_state=StreamState.PAUSED)

    def _mute(self) -> None:
        self._frames_buffering_allowed = False
        self._change_state(target_state=StreamState.MUTED)

    def _resume(self) -> None:
        previous_state = self._state
        self._change_state(target_state=StreamState.RUNNING)
        if previous_state is StreamState.PAUSED:
            self._video_consumer.reset_stream_consumption_pace()
            self._playback_allowed.set()
        if previous_state is StreamState.MUTED:
            self._frames_buffering_allowed = True

    def _set_file_mode_consumption_strategies(self) -> None:
        if self._buffer_consumption_strategy is None:
            self._buffer_consumption_strategy = BufferConsumptionStrategy.LAZY

    def _set_stream_mode_consumption_strategies(self) -> None:
        if self._buffer_consumption_strategy is None:
            self._buffer_consumption_strategy = BufferConsumptionStrategy.EAGER

    def _consume_video(self) -> None:
        send_video_source_status_update(
            severity=UpdateSeverity.INFO,
            event_type=VIDEO_CONSUMPTION_STARTED_EVENT,
            status_update_handlers=self._status_update_handlers,
            payload={"source_id": self._source_id},
        )
        logger.info(f"Video consumption started")
        try:
            if self._state is not StreamState.TERMINATING:
                self._change_state(target_state=StreamState.RUNNING)
            declared_source_fps, is_video_file = None, None
            if self._source_properties is not None:
                declared_source_fps = self._source_properties.fps
                is_video_file = self._source_properties.is_file
            while self._video.isOpened():
                if self._state is StreamState.TERMINATING:
                    break
                self._playback_allowed.wait()
                success = self._video_consumer.consume_frame(
                    video=self._video,
                    declared_source_fps=declared_source_fps,
                    is_source_video_file=is_video_file,
                    buffer=self._frames_buffer,
                    frames_buffering_allowed=self._frames_buffering_allowed,
                    source_id=self._source_id,
                )
                if not success:
                    break
            self._frames_buffer.put(POISON_PILL)
            self._video.release()
            self._change_state(target_state=StreamState.ENDED)
            send_video_source_status_update(
                severity=UpdateSeverity.INFO,
                event_type=VIDEO_CONSUMPTION_FINISHED_EVENT,
                status_update_handlers=self._status_update_handlers,
                payload={"source_id": self._source_id},
            )
            logger.info(f"Video consumption finished")
        except Exception as error:
            self._change_state(target_state=StreamState.ERROR)
            payload = {
                "source_id": self._source_id,
                "error_type": error.__class__.__name__,
                "error_message": str(error),
                "error_context": "stream_consumer_thread",
            }
            send_video_source_status_update(
                severity=UpdateSeverity.ERROR,
                event_type=SOURCE_ERROR_EVENT,
                payload=payload,
                status_update_handlers=self._status_update_handlers,
            )
            logger.exception("Encountered error in video consumption thread")

    def _change_state(self, target_state: StreamState) -> None:
        payload = {
            "previous_state": self._state,
            "new_state": target_state,
            "source_id": self._source_id,
        }
        self._state = target_state
        send_video_source_status_update(
            severity=UpdateSeverity.INFO,
            event_type=SOURCE_STATE_UPDATE_EVENT,
            payload=payload,
            status_update_handlers=self._status_update_handlers,
        )

    def __iter__(self) -> "VideoSource":
        return self

    def __next__(self) -> VideoFrame:
        """
        Method allowing to use `VideoSource` convenient to read frames

        Returns: VideoFrame

        Example:
            ```python
            source = VideoSource.init(video_reference="./some.mp4")
            source.start()

            for frame in source:
                 pass
            ```
        """
        try:
            return self.read_frame()
        except EndOfStreamError:
            raise StopIteration()

__next__()

Method allowing to use VideoSource convenient to read frames

Returns: VideoFrame

Example
source = VideoSource.init(video_reference="./some.mp4")
source.start()

for frame in source:
     pass
Source code in inference/core/interfaces/camera/video_source.py
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
def __next__(self) -> VideoFrame:
    """
    Method allowing to use `VideoSource` convenient to read frames

    Returns: VideoFrame

    Example:
        ```python
        source = VideoSource.init(video_reference="./some.mp4")
        source.start()

        for frame in source:
             pass
        ```
    """
    try:
        return self.read_frame()
    except EndOfStreamError:
        raise StopIteration()

frame_ready()

Method to check if decoded frame is ready for consumer

Returns: boolean flag indicating frame readiness

Source code in inference/core/interfaces/camera/video_source.py
518
519
520
521
522
523
524
def frame_ready(self) -> bool:
    """
    Method to check if decoded frame is ready for consumer

    Returns: boolean flag indicating frame readiness
    """
    return not self._frames_buffer.empty()

get_state()

Method to get current state of the VideoSource

Returns: StreamState

Source code in inference/core/interfaces/camera/video_source.py
510
511
512
513
514
515
516
def get_state(self) -> StreamState:
    """
    Method to get current state of the `VideoSource`

    Returns: StreamState
    """
    return self._state

init(video_reference, buffer_size=DEFAULT_BUFFER_SIZE, status_update_handlers=None, buffer_filling_strategy=None, buffer_consumption_strategy=None, adaptive_mode_stream_pace_tolerance=DEFAULT_ADAPTIVE_MODE_STREAM_PACE_TOLERANCE, adaptive_mode_reader_pace_tolerance=DEFAULT_ADAPTIVE_MODE_READER_PACE_TOLERANCE, minimum_adaptive_mode_samples=DEFAULT_MINIMUM_ADAPTIVE_MODE_SAMPLES, maximum_adaptive_frames_dropped_in_row=DEFAULT_MAXIMUM_ADAPTIVE_FRAMES_DROPPED_IN_ROW, video_source_properties=None, source_id=None, desired_fps=None) classmethod

This class is meant to represent abstraction over video sources - both video files and on-line streams that are possible to be consumed and used by other components of inference library.

Before digging into details of the class behaviour, it is advised to familiarise with the following concepts and implementation assumptions:

  1. Video file can be accessed from local (or remote) storage by the consumer in a pace dictated by its processing capabilities. If processing is faster than the frame rate of video, operations may be executed in a time shorter than the time of video playback. In the opposite case - consumer may freely decode and process frames in its own pace, without risk for failures due to temporal dependencies of processing - this is classical offline processing example.
  2. Video streams, on the other hand, usually need to be consumed in a pace near to their frame-rate - in other words - this is on-line processing example. Consumer being faster than incoming stream frames cannot utilise its resources to the full extent as not-yet-delivered data would be needed. Slow consumer, however, may not be able to process everything on time and to keep up with the pace of stream - some frames would need to be dropped. Otherwise - over time, consumer could go out of sync with the stream causing decoding failures or unpredictable behavior.

To fit those two types of video sources, VideoSource introduces the concept of buffered decoding of video stream (like at the YouTube - player buffers some frames that are soon to be displayed). The way on how buffer is filled and consumed dictates the behavior of VideoSource.

Starting from BufferFillingStrategy - we have 3 basic options: * WAIT: in case of slow video consumption, when buffer is full - VideoSource will wait for the empty spot in buffer before next frame will be processed - this is suitable in cases when we want to ensure EACH FRAME of the video to be processed * DROP_OLDEST: when buffer is full, the frame that sits there for the longest time will be dropped - this is suitable for cases when we want to process the most recent frames possible * DROP_LATEST: when buffer is full, the newly decoded frame is dropped - useful in cases when it is expected to have processing performance drops, but we would like to consume portions of video that are locally smooth - but this is probably the least common use-case.

On top of that - there are two ADAPTIVE strategies: ADAPTIVE_DROP_OLDEST and ADAPTIVE_DROP_LATEST, which are equivalent to DROP_OLDEST and DROP_LATEST with adaptive decoding feature enabled. The notion of that mode will be described later.

Naturally, decoded frames must also be consumed. VideoSource provides a handy interface for reading a video source frames by a SINGLE consumer. Consumption strategy can also be dictated via BufferConsumptionStrategy: * LAZY - consume all the frames from decoding buffer one-by-one * EAGER - at each readout - take all frames already buffered, drop all of them apart from the most recent

In consequence - there are various combinations of BufferFillingStrategy and BufferConsumptionStrategy. The most popular would be: * BufferFillingStrategy.WAIT and BufferConsumptionStrategy.LAZY - to always decode and process each and every frame of the source (useful while processing video files - and default behaviour enforced by inference if there is no explicit configuration) * BufferFillingStrategy.DROP_OLDEST and BufferConsumptionStrategy.EAGER - to always process the most recent frames of source (useful while processing video streams when low latency [real-time experience] is required - ADAPTIVE version of this is default for streams)

ADAPTIVE strategies were introduced to handle corner-cases, when consumer hardware is not capable to consume video stream and process frames at the same time (for instance - Nvidia Jetson devices running processing against hi-res streams with high FPS ratio). It acts with buffer in nearly the same way as DROP_OLDEST and DROP_LATEST strategies, but there are two more conditions that may influence frame drop: * announced rate of source - which in fact dictate the pace of frames grabbing from incoming stream that MUST be met by consumer to avoid strange decoding issues causing decoder to fail - if the pace of frame grabbing deviates too much - decoding will be postponed, and frames dropped to grab next ones sooner * consumption rate - in resource constraints environment, not only decoding is problematic from the performance perspective - but also heavy processing. If consumer is not quick enough - allocating more useful resources for decoding frames that may never be processed is a waste. That's why - if decoding happens more frequently than consumption of frame - ADAPTIVE mode causes decoding to be done in a slower pace and more frames are just grabbed and dropped on the floor. ADAPTIVE mode increases latency slightly, but may be the only way to operate in some cases. Behaviour of adaptive mode, including the maximum acceptable deviations of frames grabbing pace from source, reader pace and maximum number of consecutive frames dropped in ADAPTIVE mode are configurable by clients, with reasonable defaults being set.

VideoSource emits events regarding its activity - which can be intercepted by custom handlers. Take into account that they are always executed in context of thread invoking them (and should be fast to complete, otherwise may block the flow of stream consumption). All errors raised will be emitted as logger warnings only.

VideoSource implementation is naturally multithreading, with different thread decoding video and different one consuming it and manipulating source state. Implementation of user interface is thread-safe, although stream it is meant to be consumed by a single thread only.

ENV variables involved: * VIDEO_SOURCE_BUFFER_SIZE - default: 64 * VIDEO_SOURCE_ADAPTIVE_MODE_STREAM_PACE_TOLERANCE - default: 0.1 * VIDEO_SOURCE_ADAPTIVE_MODE_READER_PACE_TOLERANCE - default: 5.0 * VIDEO_SOURCE_MINIMUM_ADAPTIVE_MODE_SAMPLES - default: 10 * VIDEO_SOURCE_MAXIMUM_ADAPTIVE_FRAMES_DROPPED_IN_ROW - default: 16

As an inference user, please use .init() method instead of constructor to instantiate objects.

Parameters:

Name Type Description Default
video_reference Union[str, int]

Either str with file or stream reference, or int representing device ID

required
buffer_size int

size of decoding buffer

DEFAULT_BUFFER_SIZE
status_update_handlers Optional[List[Callable[[StatusUpdate], None]]]

List of handlers for status updates

None
buffer_filling_strategy Optional[BufferFillingStrategy]

Settings for buffer filling strategy - if not given - automatic choice regarding source type will be applied

None
buffer_consumption_strategy Optional[BufferConsumptionStrategy]

Settings for buffer consumption strategy, if not given - automatic choice regarding source type will be applied

None
adaptive_mode_stream_pace_tolerance float

Maximum deviation between frames grabbing pace and stream pace that will not trigger adaptive mode frame drop

DEFAULT_ADAPTIVE_MODE_STREAM_PACE_TOLERANCE
adaptive_mode_reader_pace_tolerance float

Maximum deviation between decoding pace and stream consumption pace that will not trigger adaptive mode frame drop

DEFAULT_ADAPTIVE_MODE_READER_PACE_TOLERANCE
minimum_adaptive_mode_samples int

Minimal number of frames to be used to establish actual pace of processing, before adaptive mode can drop any frame

DEFAULT_MINIMUM_ADAPTIVE_MODE_SAMPLES
maximum_adaptive_frames_dropped_in_row int

Maximum number of frames dropped in row due to application of adaptive strategy

DEFAULT_MAXIMUM_ADAPTIVE_FRAMES_DROPPED_IN_ROW
video_source_properties Optional[dict[str, float]]

Optional dictionary with video source properties corresponding to OpenCV VideoCapture properties cv2.CAP_PROP_* to set values for the video source.

None
source_id Optional[int]

Optional identifier of video source - mainly useful to recognise specific source when multiple ones are in use. Identifier will be added to emitted frames and updates. It is advised to keep it unique within all sources in use.

None

Returns: Instance of VideoSource class

Source code in inference/core/interfaces/camera/video_source.py
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
@classmethod
def init(
    cls,
    video_reference: VideoSourceIdentifier,
    buffer_size: int = DEFAULT_BUFFER_SIZE,
    status_update_handlers: Optional[List[Callable[[StatusUpdate], None]]] = None,
    buffer_filling_strategy: Optional[BufferFillingStrategy] = None,
    buffer_consumption_strategy: Optional[BufferConsumptionStrategy] = None,
    adaptive_mode_stream_pace_tolerance: float = DEFAULT_ADAPTIVE_MODE_STREAM_PACE_TOLERANCE,
    adaptive_mode_reader_pace_tolerance: float = DEFAULT_ADAPTIVE_MODE_READER_PACE_TOLERANCE,
    minimum_adaptive_mode_samples: int = DEFAULT_MINIMUM_ADAPTIVE_MODE_SAMPLES,
    maximum_adaptive_frames_dropped_in_row: int = DEFAULT_MAXIMUM_ADAPTIVE_FRAMES_DROPPED_IN_ROW,
    video_source_properties: Optional[Dict[str, float]] = None,
    source_id: Optional[int] = None,
    desired_fps: Optional[Union[float, int]] = None,
):
    """
    This class is meant to represent abstraction over video sources - both video files and
    on-line streams that are possible to be consumed and used by other components of `inference`
    library.

    Before digging into details of the class behaviour, it is advised to familiarise with the following
    concepts and implementation assumptions:

    1. Video file can be accessed from local (or remote) storage by the consumer in a pace dictated by
        its processing capabilities. If processing is faster than the frame rate of video, operations
        may be executed in a time shorter than the time of video playback. In the opposite case - consumer
        may freely decode and process frames in its own pace, without risk for failures due to temporal
        dependencies of processing - this is classical offline processing example.
    2. Video streams, on the other hand, usually need to be consumed in a pace near to their frame-rate -
        in other words - this is on-line processing example. Consumer being faster than incoming stream
        frames cannot utilise its resources to the full extent as not-yet-delivered data would be needed.
        Slow consumer, however, may not be able to process everything on time and to keep up with the pace
        of stream - some frames would need to be dropped. Otherwise - over time, consumer could go out of
        sync with the stream causing decoding failures or unpredictable behavior.

    To fit those two types of video sources, `VideoSource` introduces the concept of buffered decoding of
    video stream (like at the YouTube - player buffers some frames that are soon to be displayed).
    The way on how buffer is filled and consumed dictates the behavior of `VideoSource`.

    Starting from `BufferFillingStrategy` - we have 3 basic options:
    * WAIT: in case of slow video consumption, when buffer is full - `VideoSource` will wait for
    the empty spot in buffer before next frame will be processed - this is suitable in cases when
    we want to ensure EACH FRAME of the video to be processed
    * DROP_OLDEST: when buffer is full, the frame that sits there for the longest time will be dropped -
    this is suitable for cases when we want to process the most recent frames possible
    * DROP_LATEST: when buffer is full, the newly decoded frame is dropped - useful in cases when
    it is expected to have processing performance drops, but we would like to consume portions of
    video that are locally smooth - but this is probably the least common use-case.

    On top of that - there are two ADAPTIVE strategies: ADAPTIVE_DROP_OLDEST and ADAPTIVE_DROP_LATEST,
    which are equivalent to DROP_OLDEST and DROP_LATEST with adaptive decoding feature enabled. The notion
    of that mode will be described later.

    Naturally, decoded frames must also be consumed. `VideoSource` provides a handy interface for reading
    a video source frames by a SINGLE consumer. Consumption strategy can also be dictated via
    `BufferConsumptionStrategy`:
    * LAZY - consume all the frames from decoding buffer one-by-one
    * EAGER - at each readout - take all frames already buffered, drop all of them apart from the most recent

    In consequence - there are various combinations of `BufferFillingStrategy` and `BufferConsumptionStrategy`.
    The most popular would be:
    * `BufferFillingStrategy.WAIT` and `BufferConsumptionStrategy.LAZY` - to always decode and process each and
        every frame of the source (useful while processing video files - and default behaviour enforced by
        `inference` if there is no explicit configuration)
    * `BufferFillingStrategy.DROP_OLDEST` and `BufferConsumptionStrategy.EAGER` - to always process the most
        recent frames of source (useful while processing video streams when low latency [real-time experience]
        is required - ADAPTIVE version of this is default for streams)

    ADAPTIVE strategies were introduced to handle corner-cases, when consumer hardware is not capable to consume
    video stream and process frames at the same time (for instance - Nvidia Jetson devices running processing
    against hi-res streams with high FPS ratio). It acts with buffer in nearly the same way as `DROP_OLDEST`
    and `DROP_LATEST` strategies, but there are two more conditions that may influence frame drop:
    * announced rate of source - which in fact dictate the pace of frames grabbing from incoming stream that
    MUST be met by consumer to avoid strange decoding issues causing decoder to fail - if the pace of frame grabbing
    deviates too much - decoding will be postponed, and frames dropped to grab next ones sooner
    * consumption rate - in resource constraints environment, not only decoding is problematic from the performance
    perspective - but also heavy processing. If consumer is not quick enough - allocating more useful resources
    for decoding frames that may never be processed is a waste. That's why - if decoding happens more frequently
    than consumption of frame - ADAPTIVE mode causes decoding to be done in a slower pace and more frames are just
    grabbed and dropped on the floor.
    ADAPTIVE mode increases latency slightly, but may be the only way to operate in some cases.
    Behaviour of adaptive mode, including the maximum acceptable deviations of frames grabbing pace from source,
    reader pace and maximum number of consecutive frames dropped in ADAPTIVE mode are configurable by clients,
    with reasonable defaults being set.

    `VideoSource` emits events regarding its activity - which can be intercepted by custom handlers. Take
    into account that they are always executed in context of thread invoking them (and should be fast to complete,
    otherwise may block the flow of stream consumption). All errors raised will be emitted as logger warnings only.

    `VideoSource` implementation is naturally multithreading, with different thread decoding video and different
    one consuming it and manipulating source state. Implementation of user interface is thread-safe, although
    stream it is meant to be consumed by a single thread only.

    ENV variables involved:
    * VIDEO_SOURCE_BUFFER_SIZE - default: 64
    * VIDEO_SOURCE_ADAPTIVE_MODE_STREAM_PACE_TOLERANCE - default: 0.1
    * VIDEO_SOURCE_ADAPTIVE_MODE_READER_PACE_TOLERANCE - default: 5.0
    * VIDEO_SOURCE_MINIMUM_ADAPTIVE_MODE_SAMPLES - default: 10
    * VIDEO_SOURCE_MAXIMUM_ADAPTIVE_FRAMES_DROPPED_IN_ROW - default: 16

    As an `inference` user, please use .init() method instead of constructor to instantiate objects.

    Args:
        video_reference (Union[str, int]): Either str with file or stream reference, or int representing device ID
        buffer_size (int): size of decoding buffer
        status_update_handlers (Optional[List[Callable[[StatusUpdate], None]]]): List of handlers for status updates
        buffer_filling_strategy (Optional[BufferFillingStrategy]): Settings for buffer filling strategy - if not
            given - automatic choice regarding source type will be applied
        buffer_consumption_strategy (Optional[BufferConsumptionStrategy]): Settings for buffer consumption strategy,
            if not given - automatic choice regarding source type will be applied
        adaptive_mode_stream_pace_tolerance (float): Maximum deviation between frames grabbing pace and stream pace
            that will not trigger adaptive mode frame drop
        adaptive_mode_reader_pace_tolerance (float): Maximum deviation between decoding pace and stream consumption
            pace that will not trigger adaptive mode frame drop
        minimum_adaptive_mode_samples (int): Minimal number of frames to be used to establish actual pace of
            processing, before adaptive mode can drop any frame
        maximum_adaptive_frames_dropped_in_row (int): Maximum number of frames dropped in row due to application of
            adaptive strategy
        video_source_properties (Optional[dict[str, float]]): Optional dictionary with video source properties
            corresponding to OpenCV VideoCapture properties cv2.CAP_PROP_* to set values for the video source.
        source_id (Optional[int]): Optional identifier of video source - mainly useful to recognise specific source
            when multiple ones are in use. Identifier will be added to emitted frames and updates. It is advised
            to keep it unique within all sources in use.

    Returns: Instance of `VideoSource` class
    """
    frames_buffer = Queue(maxsize=buffer_size)
    if status_update_handlers is None:
        status_update_handlers = []
    video_consumer = VideoConsumer.init(
        buffer_filling_strategy=buffer_filling_strategy,
        adaptive_mode_stream_pace_tolerance=adaptive_mode_stream_pace_tolerance,
        adaptive_mode_reader_pace_tolerance=adaptive_mode_reader_pace_tolerance,
        minimum_adaptive_mode_samples=minimum_adaptive_mode_samples,
        maximum_adaptive_frames_dropped_in_row=maximum_adaptive_frames_dropped_in_row,
        status_update_handlers=status_update_handlers,
        desired_fps=desired_fps,
    )
    return cls(
        stream_reference=video_reference,
        frames_buffer=frames_buffer,
        status_update_handlers=status_update_handlers,
        buffer_consumption_strategy=buffer_consumption_strategy,
        video_consumer=video_consumer,
        video_source_properties=video_source_properties,
        source_id=source_id,
    )

mute()

Method to be used to mute source consumption. Muting is an equivalent of pause for stream - where frames grabbing is not put on hold, just new frames decoding and buffering is not allowed - causing intermediate frames to be dropped. May be also used against files, although arguably less useful. Eligible to be used in states: [RUNNING] End state: * MUTED

Thread safe - only one transition of states possible at the time.

Throws: * StreamOperationNotAllowedError: if executed in context of incorrect state of the source

Source code in inference/core/interfaces/camera/video_source.py
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
@lock_state_transition
def mute(self) -> None:
    """
    Method to be used to mute source consumption. Muting is an equivalent of pause for stream - where
    frames grabbing is not put on hold, just new frames decoding and buffering is not allowed - causing
    intermediate frames to be dropped. May be also used against files, although arguably less useful.
    Eligible to be used in states:
    [RUNNING]
    End state:
    * MUTED

    Thread safe - only one transition of states possible at the time.

    Returns: None
    Throws:
        * StreamOperationNotAllowedError: if executed in context of incorrect state of the source
    """
    if self._state not in MUTE_ELIGIBLE_STATES:
        raise StreamOperationNotAllowedError(
            f"Could not MUTE stream in state: {self._state}"
        )
    self._mute()

pause()

Method to be used to pause source consumption. During pause - no new frames are consumed. Used on on-line streams for too long may cause stream disconnection. Eligible to be used in states: [RUNNING] End state: * PAUSED

Thread safe - only one transition of states possible at the time.

Throws: * StreamOperationNotAllowedError: if executed in context of incorrect state of the source

Source code in inference/core/interfaces/camera/video_source.py
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
@lock_state_transition
def pause(self) -> None:
    """
    Method to be used to pause source consumption. During pause - no new frames are consumed.
    Used on on-line streams for too long may cause stream disconnection.
    Eligible to be used in states:
    [RUNNING]
    End state:
    * PAUSED

    Thread safe - only one transition of states possible at the time.

    Returns: None
    Throws:
        * StreamOperationNotAllowedError: if executed in context of incorrect state of the source
    """
    if self._state not in PAUSE_ELIGIBLE_STATES:
        raise StreamOperationNotAllowedError(
            f"Could not PAUSE stream in state: {self._state}"
        )
    self._pause()

read_frame(timeout=None)

Method to be used by the consumer to get decoded source frame.

Throws: * EndOfStreamError: when trying to get the frame from closed source.

Source code in inference/core/interfaces/camera/video_source.py
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
def read_frame(self, timeout: Optional[float] = None) -> Optional[VideoFrame]:
    """
    Method to be used by the consumer to get decoded source frame.

    Returns: VideoFrame object with decoded frame and its metadata.
    Throws:
        * EndOfStreamError: when trying to get the frame from closed source.
    """
    video_frame: Optional[Union[VideoFrame, str]] = get_from_queue(
        queue=self._frames_buffer,
        on_successful_read=self._video_consumer.notify_frame_consumed,
        timeout=timeout,
        purge=self._buffer_consumption_strategy is BufferConsumptionStrategy.EAGER,
    )
    if video_frame == POISON_PILL:
        raise EndOfStreamError(
            "Attempted to retrieve frame from stream that already ended."
        )
    if video_frame is not None:
        send_video_source_status_update(
            severity=UpdateSeverity.DEBUG,
            event_type=FRAME_CONSUMED_EVENT,
            payload={
                "frame_timestamp": video_frame.frame_timestamp,
                "frame_id": video_frame.frame_id,
                "source_id": video_frame.source_id,
            },
            status_update_handlers=self._status_update_handlers,
        )
    return video_frame

restart(wait_on_frames_consumption=True, purge_frames_buffer=False)

Method to restart source consumption. Eligible to be used in states: [MUTED, RUNNING, PAUSED, ENDED, ERROR]. End state: * INITIALISING - that should change into RUNNING once first frame is ready to be grabbed * ERROR - if it was not possible to connect with source

Thread safe - only one transition of states possible at the time.

Parameters:

Name Type Description Default
wait_on_frames_consumption bool

Flag telling if all frames from buffer must be consumed before completion of this operation.

True

Throws: * StreamOperationNotAllowedError: if executed in context of incorrect state of the source * SourceConnectionError: if source cannot be connected

Source code in inference/core/interfaces/camera/video_source.py
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
@lock_state_transition
def restart(
    self, wait_on_frames_consumption: bool = True, purge_frames_buffer: bool = False
) -> None:
    """
    Method to restart source consumption. Eligible to be used in states:
    [MUTED, RUNNING, PAUSED, ENDED, ERROR].
    End state:
    * INITIALISING - that should change into RUNNING once first frame is ready to be grabbed
    * ERROR - if it was not possible to connect with source

    Thread safe - only one transition of states possible at the time.

    Args:
        wait_on_frames_consumption (bool): Flag telling if all frames from buffer must be consumed before
            completion of this operation.

    Returns: None
    Throws:
        * StreamOperationNotAllowedError: if executed in context of incorrect state of the source
        * SourceConnectionError: if source cannot be connected
    """
    if self._state not in RESTART_ELIGIBLE_STATES:
        raise StreamOperationNotAllowedError(
            f"Could not RESTART stream in state: {self._state}"
        )
    self._restart(
        wait_on_frames_consumption=wait_on_frames_consumption,
        purge_frames_buffer=purge_frames_buffer,
    )

resume()

Method to recover from pause or mute into running state. [PAUSED, MUTED] End state: * RUNNING

Thread safe - only one transition of states possible at the time.

Throws: * StreamOperationNotAllowedError: if executed in context of incorrect state of the source

Source code in inference/core/interfaces/camera/video_source.py
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
@lock_state_transition
def resume(self) -> None:
    """
    Method to recover from pause or mute into running state.
    [PAUSED, MUTED]
    End state:
    * RUNNING

    Thread safe - only one transition of states possible at the time.

    Returns: None
    Throws:
        * StreamOperationNotAllowedError: if executed in context of incorrect state of the source
    """
    if self._state not in RESUME_ELIGIBLE_STATES:
        raise StreamOperationNotAllowedError(
            f"Could not RESUME stream in state: {self._state}"
        )
    self._resume()

start()

Method to be used to start source consumption. Eligible to be used in states: [NOT_STARTED, ENDED, (RESTARTING - which is internal state only)] End state: * INITIALISING - that should change into RUNNING once first frame is ready to be grabbed * ERROR - if it was not possible to connect with source

Thread safe - only one transition of states possible at the time.

Throws: * StreamOperationNotAllowedError: if executed in context of incorrect state of the source * SourceConnectionError: if source cannot be connected

Source code in inference/core/interfaces/camera/video_source.py
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
@lock_state_transition
def start(self) -> None:
    """
    Method to be used to start source consumption. Eligible to be used in states:
    [NOT_STARTED, ENDED, (RESTARTING - which is internal state only)]
    End state:
    * INITIALISING - that should change into RUNNING once first frame is ready to be grabbed
    * ERROR - if it was not possible to connect with source

    Thread safe - only one transition of states possible at the time.

    Returns: None
    Throws:
        * StreamOperationNotAllowedError: if executed in context of incorrect state of the source
        * SourceConnectionError: if source cannot be connected
    """
    if self._state not in START_ELIGIBLE_STATES:
        raise StreamOperationNotAllowedError(
            f"Could not START stream in state: {self._state}"
        )
    self._start()

terminate(wait_on_frames_consumption=True, purge_frames_buffer=False)

Method to be used to terminate source consumption. Eligible to be used in states: [MUTED, RUNNING, PAUSED, ENDED, ERROR, (RESTARTING - which is internal state only)] End state: * ENDED - indicating success of the process * ERROR - if error with processing occurred

Must be used to properly dispose resources at the end.

Thread safe - only one transition of states possible at the time.

Parameters:

Name Type Description Default
wait_on_frames_consumption bool

Flag telling if all frames from buffer must be consumed before completion of this operation.

True

Throws: * StreamOperationNotAllowedError: if executed in context of incorrect state of the source

Source code in inference/core/interfaces/camera/video_source.py
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
@lock_state_transition
def terminate(
    self, wait_on_frames_consumption: bool = True, purge_frames_buffer: bool = False
) -> None:
    """
    Method to be used to terminate source consumption. Eligible to be used in states:
    [MUTED, RUNNING, PAUSED, ENDED, ERROR, (RESTARTING - which is internal state only)]
    End state:
    * ENDED - indicating success of the process
    * ERROR - if error with processing occurred

    Must be used to properly dispose resources at the end.

    Thread safe - only one transition of states possible at the time.

    Args:
        wait_on_frames_consumption (bool): Flag telling if all frames from buffer must be consumed before
            completion of this operation.

    Returns: None
    Throws:
        * StreamOperationNotAllowedError: if executed in context of incorrect state of the source
    """
    if self._state not in TERMINATE_ELIGIBLE_STATES:
        raise StreamOperationNotAllowedError(
            f"Could not TERMINATE stream in state: {self._state}"
        )
    self._terminate(
        wait_on_frames_consumption=wait_on_frames_consumption,
        purge_frames_buffer=purge_frames_buffer,
    )

get_from_queue(queue, timeout=None, on_successful_read=lambda: None, purge=False)

Function is supposed to take element from the queue waiting on the first element to appear using timeout parameter. One may ask to go to the very last element of the queue and return it - then purge should be set to True. No additional wait on new elements to appear happen and the purge stops once queue is free returning last element consumed. queue.task_done() and on_successful_read(...) will be called on each received element.

Source code in inference/core/interfaces/camera/video_source.py
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
def get_from_queue(
    queue: Queue,
    timeout: Optional[float] = None,
    on_successful_read: Callable[[], None] = lambda: None,
    purge: bool = False,
) -> Optional[Any]:
    """
    Function is supposed to take element from the queue waiting on the first element to appear using `timeout`
    parameter. One may ask to go to the very last element of the queue and return it - then `purge` should be set
    to True. No additional wait on new elements to appear happen and the purge stops once queue is free returning last
    element consumed.
    queue.task_done() and on_successful_read(...) will be called on each received element.
    """
    result = None
    if queue.empty() or not purge:
        try:
            result = queue.get(timeout=timeout)
            queue.task_done()
            on_successful_read()
        except Empty:
            pass
    while not queue.empty() and purge:
        result = queue.get()
        queue.task_done()
        on_successful_read()
    return result