geopm 3.1.1.dev456+g3ba31824
GEOPM - Global Extensible Open Power Manager
Loading...
Searching...
No Matches
LevelZero.hpp
Go to the documentation of this file.
1/*
2 * Copyright (c) 2015 - 2024 Intel Corporation
3 * SPDX-License-Identifier: BSD-3-Clause
4 */
5
6#ifndef LEVELZERO_HPP_INCLUDE
7#define LEVELZERO_HPP_INCLUDE
8
9#include <vector>
10#include <string>
11#include <cstdint>
12
13#include "geopm_topo.h"
14
15namespace geopm
16{
18 {
19 public:
26
27 LevelZero() = default;
28 virtual ~LevelZero() = default;
31 virtual int num_gpu() const = 0;
35 virtual int num_gpu(int domain) const = 0;
39 virtual int frequency_domain_count(unsigned int l0_device_idx,
40 int l0_domain) const = 0;
48 virtual double frequency_status(unsigned int l0_device_idx,
49 int l0_domain, int l0_domain_idx) const = 0;
57 virtual double frequency_efficient(unsigned int l0_device_idx,
58 int l0_domain, int l0_domain_idx) const = 0;
66 virtual double frequency_min(unsigned int l0_device_idx, int l0_domain,
67 int l0_domain_idx) const = 0;
75 virtual double frequency_max(unsigned int l0_device_idx, int l0_domain,
76 int l0_domain_idx) const = 0;
84 virtual std::vector<double> frequency_supported(unsigned int l0_device_idx, int l0_domain,
85 int l0_domain_idx) const = 0;
93 virtual uint32_t frequency_throttle_reasons(unsigned int l0_device_idx, int l0_domain,
94 int l0_domain_idx) const = 0;
103 virtual std::pair<double, double> frequency_range(unsigned int l0_device_idx, int l0_domain,
104 int l0_domain_idx) const = 0;
110 virtual int temperature_domain_count(unsigned int l0_device_idx, int l0_domain) const = 0;
118 virtual double temperature_max(unsigned int l0_device_idx, int l0_domain,
119 int l0_domain_idx) const = 0;
123 virtual int engine_domain_count(unsigned int l0_device_idx, int l0_domain) const = 0;
131 virtual std::pair<uint64_t, uint64_t> active_time_pair(unsigned int l0_device_idx, int l0_domain,
132 int l0_domain_idx) const = 0;
140 virtual uint64_t active_time(unsigned int l0_device_idx, int l0_domain,
141 int l0_domain_idx) const = 0;
150 virtual uint64_t active_time_timestamp(unsigned int l0_device_idx,
151 int l0_domain, int l0_domain_idx) const = 0;
157 virtual int power_domain_count(int geopm_domain, unsigned int l0_device_idx,
158 int l0_domain) const = 0;
163 virtual int performance_domain_count(unsigned int l0_device_idx,
164 int l0_domain) const = 0;
171 virtual double performance_factor(unsigned int l0_device_idx,
172 int l0_domain, int l0_domain_idx) const = 0;
177 virtual int32_t power_limit_tdp(unsigned int l0_device_idx) const = 0;
182 virtual int32_t power_limit_min(unsigned int l0_device_idx) const = 0;
187 virtual int32_t power_limit_max(unsigned int l0_device_idx) const = 0;
196 virtual std::pair<uint64_t, uint64_t> energy_pair(int geopm_domain, unsigned int l0_device_idx,
197 int l0_domain_idx) const = 0;
206 virtual uint64_t energy(int geopm_domain, unsigned int l0_device_idx, int l0_domain,
207 int l0_domain_idx) const = 0;
216 virtual uint64_t energy_timestamp(int geopm_domain, unsigned int l0_device_idx, int l0_domain,
217 int l0_domain_idx) const = 0;
225 virtual void frequency_control(unsigned int l0_device_idx, int l0_domain,
226 int l0_domain_idx, double range_min,
227 double range_max) const = 0;
234 virtual void performance_factor_control(unsigned int l0_device_idx,
235 int l0_domain,
236 int l0_domain_idx,
237 double setting) const = 0;
241 virtual int ras_domain_count(unsigned int l0_device_idx,
242 int l0_domain) const = 0;
249 virtual double ras_reset_count_correctable(unsigned int l0_device_idx,
250 int l0_domain, int l0_domain_idx) const = 0;
257 virtual double ras_programming_errcount_correctable(unsigned int l0_device_idx,
258 int l0_domain, int l0_domain_idx) const = 0;
265 virtual double ras_driver_errcount_correctable(unsigned int l0_device_idx,
266 int l0_domain, int l0_domain_idx) const = 0;
273 virtual double ras_compute_errcount_correctable(unsigned int l0_device_idx,
274 int l0_domain, int l0_domain_idx) const = 0;
281 virtual double ras_noncompute_errcount_correctable(unsigned int l0_device_idx,
282 int l0_domain, int l0_domain_idx) const = 0;
289 virtual double ras_cache_errcount_correctable(unsigned int l0_device_idx,
290 int l0_domain, int l0_domain_idx) const = 0;
297 virtual double ras_display_errcount_correctable(unsigned int l0_device_idx,
298 int l0_domain, int l0_domain_idx) const = 0;
305 virtual double ras_reset_count_uncorrectable(unsigned int l0_device_idx,
306 int l0_domain, int l0_domain_idx) const = 0;
313 virtual double ras_programming_errcount_uncorrectable(unsigned int l0_device_idx,
314 int l0_domain, int l0_domain_idx) const = 0;
321 virtual double ras_driver_errcount_uncorrectable(unsigned int l0_device_idx,
322 int l0_domain, int l0_domain_idx) const = 0;
329 virtual double ras_compute_errcount_uncorrectable(unsigned int l0_device_idx,
330 int l0_domain, int l0_domain_idx) const = 0;
337 virtual double ras_noncompute_errcount_uncorrectable(unsigned int l0_device_idx,
338 int l0_domain, int l0_domain_idx) const = 0;
345 virtual double ras_cache_errcount_uncorrectable(unsigned int l0_device_idx,
346 int l0_domain, int l0_domain_idx) const = 0;
353 virtual double ras_display_errcount_uncorrectable(unsigned int l0_device_idx,
354 int l0_domain, int l0_domain_idx) const = 0;
355 };
356
357 const LevelZero &levelzero();
358}
359#endif
Definition LevelZero.hpp:18
virtual uint64_t active_time_timestamp(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the cachced LevelZero device timestamp for the active time value in microseconds.
virtual double frequency_status(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device actual frequency in MHz.
virtual ~LevelZero()=default
virtual double ras_driver_errcount_correctable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of low level driver communication correctable errors have occurred.
virtual uint64_t active_time(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device active time in microseconds.
virtual int frequency_domain_count(unsigned int l0_device_idx, int l0_domain) const =0
Get the number of LevelZero frequency domains of a certain type.
virtual int32_t power_limit_min(unsigned int l0_device_idx) const =0
Get the LevelZero device minimum power limit in milliwatts.
virtual int engine_domain_count(unsigned int l0_device_idx, int l0_domain) const =0
Get the number of LevelZero engine domains.
virtual std::pair< uint64_t, uint64_t > active_time_pair(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device active time and timestamp in microseconds.
virtual double ras_display_errcount_correctable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of correctable errors that have occurred in the display.
virtual std::vector< double > frequency_supported(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device supported frequencies in MHz.
virtual double ras_driver_errcount_uncorrectable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of low level driver communication uncorrectable errors have occurre...
virtual double ras_compute_errcount_correctable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of correctable errors that have occurred in the compute accelerator...
virtual double ras_programming_errcount_uncorrectable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of uncorrectable hardware exceptions generated by the way workloads...
virtual double ras_reset_count_uncorrectable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of uncorrectable accelerator engine resets attempted by the driver.
virtual double ras_compute_errcount_uncorrectable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of uncorrectable errors that have occurred in the compute accelerat...
virtual int32_t power_limit_tdp(unsigned int l0_device_idx) const =0
Get the LevelZero device default power limit in milliwatts.
virtual double ras_programming_errcount_correctable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of correctable hardware exceptions generated by the way workloads h...
virtual double ras_cache_errcount_uncorrectable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of uncorrectable errors that have occurred in caches (L1/L3/registe...
virtual std::pair< uint64_t, uint64_t > energy_pair(int geopm_domain, unsigned int l0_device_idx, int l0_domain_idx) const =0
Get the LevelZero device energy and timestamp in microjoules and microseconds.
virtual uint64_t energy_timestamp(int geopm_domain, unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device energy cached timestamp in microseconds.
virtual int num_gpu() const =0
Number of GPUs on the platform.
virtual int temperature_domain_count(unsigned int l0_device_idx, int l0_domain) const =0
Get the number of LevelZero temperature domains.
virtual std::pair< double, double > frequency_range(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device minimum and maximum frequency control range in MHz.
virtual double ras_noncompute_errcount_correctable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of correctable errors that have occurred in the fixed-function acce...
geopm_levelzero_domain_e
Definition LevelZero.hpp:20
@ M_DOMAIN_MEMORY
Definition LevelZero.hpp:23
@ M_DOMAIN_COMPUTE
Definition LevelZero.hpp:22
@ M_DOMAIN_SIZE
Definition LevelZero.hpp:24
@ M_DOMAIN_ALL
Definition LevelZero.hpp:21
virtual double temperature_max(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device maximum temperature in Celsius.
virtual uint64_t energy(int geopm_domain, unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device energy in microjoules.
virtual void performance_factor_control(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx, double setting) const =0
Set the performance factor for the LevelZero device.
LevelZero()=default
virtual int num_gpu(int domain) const =0
Number of GPUs on the platform.
virtual int performance_domain_count(unsigned int l0_device_idx, int l0_domain) const =0
Get the number of LevelZero perf domains of a certain type.
virtual int ras_domain_count(unsigned int l0_device_idx, int l0_domain) const =0
Get the number of LevelZero RAS domains of a certain type.
virtual double ras_cache_errcount_correctable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of correctable errors that have occurred in caches (L1/L3/register ...
virtual double frequency_min(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device mininmum frequency in MHz.
virtual int power_domain_count(int geopm_domain, unsigned int l0_device_idx, int l0_domain) const =0
Get the number of LevelZero power domains of a certain type.
virtual double ras_display_errcount_uncorrectable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of uncorrectable errors that have occurred in the display.
virtual double performance_factor(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the performance factor value of various LevelZero domains.
virtual void frequency_control(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx, double range_min, double range_max) const =0
Set min and max frequency for LevelZero device.
virtual uint32_t frequency_throttle_reasons(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device frequency throttle reasons.
virtual double frequency_efficient(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device efficient frequency in MHz.
virtual double ras_reset_count_correctable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of correctable accelerator engine resets attempted by the driver.
virtual int32_t power_limit_max(unsigned int l0_device_idx) const =0
Get the LevelZero device maximum power limit in milliwatts.
virtual double frequency_max(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device maximum frequency in MHz.
virtual double ras_noncompute_errcount_uncorrectable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of uncorrectable errors that have occurred in the fixed-function ac...
Definition Agg.cpp:20
const LevelZero & levelzero()
Definition LevelZero.cpp:28