Skip to content

metrics

get_container_stats(docker_socket_path)

Gets the container stats.

Returns:

Name Type Description
dict dict

A dictionary containing the container stats.

Source code in inference/core/managers/metrics.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
def get_container_stats(docker_socket_path: str) -> dict:
    """
    Gets the container stats.

    Returns:
        dict: A dictionary containing the container stats.
    """
    try:
        container_id = socket.gethostname()
        result = subprocess.run(
            [
                "curl",
                "--unix-socket",
                docker_socket_path,
                f"http://localhost/containers/{container_id}/stats?stream=false",
            ],
            capture_output=True,
            text=True,
        )
        if result.returncode != 0:
            raise Exception(result.stderr)
        stats = json.loads(result.stdout.strip())
        return {"stats": stats}
    except Exception as e:
        logger.exception(e)
        raise Exception("An error occurred while fetching container stats.")

get_model_metrics(inference_server_id, model_id, min=-1, max=float('inf'))

Gets the metrics for a given model between a specified time range.

Parameters:

Name Type Description Default
device_id str

The identifier of the device.

required
model_id str

The identifier of the model.

required
start float

The starting timestamp of the time range. Defaults to -1.

required
stop float

The ending timestamp of the time range. Defaults to float("inf").

required

Returns:

Name Type Description
dict dict

A dictionary containing the metrics of the model: - num_inferences (int): The number of inferences made. - avg_inference_time (float): The average inference time. - num_errors (int): The number of errors occurred.

Source code in inference/core/managers/metrics.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
def get_model_metrics(
    inference_server_id: str, model_id: str, min: float = -1, max: float = float("inf")
) -> dict:
    """
    Gets the metrics for a given model between a specified time range.

    Args:
        device_id (str): The identifier of the device.
        model_id (str): The identifier of the model.
        start (float, optional): The starting timestamp of the time range. Defaults to -1.
        stop (float, optional): The ending timestamp of the time range. Defaults to float("inf").

    Returns:
        dict: A dictionary containing the metrics of the model:
              - num_inferences (int): The number of inferences made.
              - avg_inference_time (float): The average inference time.
              - num_errors (int): The number of errors occurred.
    """
    now = time.time()
    inferences_with_times = cache.zrangebyscore(
        f"inference:{inference_server_id}:{model_id}", min=min, max=max, withscores=True
    )
    num_inferences = len(inferences_with_times)
    inference_times = []
    for inference, t in inferences_with_times:
        response = inference["response"]
        if isinstance(response, list):
            times = [r["time"] for r in response if "time" in r]
            inference_times.extend(times)
        else:
            if "time" in response:
                inference_times.append(response["time"])
    avg_inference_time = (
        sum(inference_times) / len(inference_times) if len(inference_times) > 0 else 0
    )
    errors_with_times = cache.zrangebyscore(
        f"error:{inference_server_id}:{model_id}", min=min, max=max, withscores=True
    )
    num_errors = len(errors_with_times)
    return {
        "num_inferences": num_inferences,
        "avg_inference_time": avg_inference_time,
        "num_errors": num_errors,
    }

get_system_info()

Collects system information such as platform, architecture, hostname, IP address, MAC address, and processor details.

Returns:

Name Type Description
dict dict

A dictionary containing detailed system information.

Source code in inference/core/managers/metrics.py
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
def get_system_info() -> dict:
    """Collects system information such as platform, architecture, hostname, IP address, MAC address, and processor details.

    Returns:
        dict: A dictionary containing detailed system information.
    """
    info = {}
    try:
        info["platform"] = platform.system()
        info["platform_release"] = platform.release()
        info["platform_version"] = platform.version()
        info["architecture"] = platform.machine()
        info["hostname"] = socket.gethostname()
        info["ip_address"] = socket.gethostbyname(socket.gethostname())
        info["mac_address"] = ":".join(re.findall("..", "%012x" % uuid.getnode()))
        info["processor"] = platform.processor()
        return info
    except Exception as e:
        logger.exception(e)
    finally:
        return info