geopm  3.1.1.dev296+g5916b956
GEOPM - Global Extensible Open Power Manager
LevelZero.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 - 2024 Intel Corporation
3  * SPDX-License-Identifier: BSD-3-Clause
4  */
5 
6 #ifndef LEVELZERO_HPP_INCLUDE
7 #define LEVELZERO_HPP_INCLUDE
8 
9 #include <vector>
10 #include <string>
11 #include <cstdint>
12 
13 #include "geopm_topo.h"
14 
15 namespace geopm
16 {
17  class LevelZero
18  {
19  public:
24  M_DOMAIN_SIZE = 3
25  };
26 
27  LevelZero() = default;
28  virtual ~LevelZero() = default;
31  virtual int num_gpu() const = 0;
35  virtual int num_gpu(int domain) const = 0;
39  virtual int frequency_domain_count(unsigned int l0_device_idx,
40  int l0_domain) const = 0;
48  virtual double frequency_status(unsigned int l0_device_idx,
49  int l0_domain, int l0_domain_idx) const = 0;
57  virtual double frequency_efficient(unsigned int l0_device_idx,
58  int l0_domain, int l0_domain_idx) const = 0;
66  virtual double frequency_min(unsigned int l0_device_idx, int l0_domain,
67  int l0_domain_idx) const = 0;
75  virtual double frequency_max(unsigned int l0_device_idx, int l0_domain,
76  int l0_domain_idx) const = 0;
84  virtual std::vector<double> frequency_supported(unsigned int l0_device_idx, int l0_domain,
85  int l0_domain_idx) const = 0;
93  virtual uint32_t frequency_throttle_reasons(unsigned int l0_device_idx, int l0_domain,
94  int l0_domain_idx) const = 0;
103  virtual std::pair<double, double> frequency_range(unsigned int l0_device_idx, int l0_domain,
104  int l0_domain_idx) const = 0;
110  virtual int temperature_domain_count(unsigned int l0_device_idx, int l0_domain) const = 0;
118  virtual double temperature_max(unsigned int l0_device_idx, int l0_domain,
119  int l0_domain_idx) const = 0;
123  virtual int engine_domain_count(unsigned int l0_device_idx, int l0_domain) const = 0;
131  virtual std::pair<uint64_t, uint64_t> active_time_pair(unsigned int l0_device_idx, int l0_domain,
132  int l0_domain_idx) const = 0;
140  virtual uint64_t active_time(unsigned int l0_device_idx, int l0_domain,
141  int l0_domain_idx) const = 0;
150  virtual uint64_t active_time_timestamp(unsigned int l0_device_idx,
151  int l0_domain, int l0_domain_idx) const = 0;
157  virtual int power_domain_count(int geopm_domain, unsigned int l0_device_idx,
158  int l0_domain) const = 0;
163  virtual int performance_domain_count(unsigned int l0_device_idx,
164  int l0_domain) const = 0;
171  virtual double performance_factor(unsigned int l0_device_idx,
172  int l0_domain, int l0_domain_idx) const = 0;
177  virtual int32_t power_limit_tdp(unsigned int l0_device_idx) const = 0;
182  virtual int32_t power_limit_min(unsigned int l0_device_idx) const = 0;
187  virtual int32_t power_limit_max(unsigned int l0_device_idx) const = 0;
196  virtual std::pair<uint64_t, uint64_t> energy_pair(int geopm_domain, unsigned int l0_device_idx,
197  int l0_domain_idx) const = 0;
206  virtual uint64_t energy(int geopm_domain, unsigned int l0_device_idx, int l0_domain,
207  int l0_domain_idx) const = 0;
216  virtual uint64_t energy_timestamp(int geopm_domain, unsigned int l0_device_idx, int l0_domain,
217  int l0_domain_idx) const = 0;
225  virtual void frequency_control(unsigned int l0_device_idx, int l0_domain,
226  int l0_domain_idx, double range_min,
227  double range_max) const = 0;
234  virtual void performance_factor_control(unsigned int l0_device_idx,
235  int l0_domain,
236  int l0_domain_idx,
237  double setting) const = 0;
241  virtual int ras_domain_count(unsigned int l0_device_idx,
242  int l0_domain) const = 0;
249  virtual double ras_reset_count_correctable(unsigned int l0_device_idx,
250  int l0_domain, int l0_domain_idx) const = 0;
257  virtual double ras_programming_errcount_correctable(unsigned int l0_device_idx,
258  int l0_domain, int l0_domain_idx) const = 0;
265  virtual double ras_driver_errcount_correctable(unsigned int l0_device_idx,
266  int l0_domain, int l0_domain_idx) const = 0;
273  virtual double ras_compute_errcount_correctable(unsigned int l0_device_idx,
274  int l0_domain, int l0_domain_idx) const = 0;
281  virtual double ras_noncompute_errcount_correctable(unsigned int l0_device_idx,
282  int l0_domain, int l0_domain_idx) const = 0;
289  virtual double ras_cache_errcount_correctable(unsigned int l0_device_idx,
290  int l0_domain, int l0_domain_idx) const = 0;
297  virtual double ras_display_errcount_correctable(unsigned int l0_device_idx,
298  int l0_domain, int l0_domain_idx) const = 0;
305  virtual double ras_reset_count_uncorrectable(unsigned int l0_device_idx,
306  int l0_domain, int l0_domain_idx) const = 0;
313  virtual double ras_programming_errcount_uncorrectable(unsigned int l0_device_idx,
314  int l0_domain, int l0_domain_idx) const = 0;
321  virtual double ras_driver_errcount_uncorrectable(unsigned int l0_device_idx,
322  int l0_domain, int l0_domain_idx) const = 0;
329  virtual double ras_compute_errcount_uncorrectable(unsigned int l0_device_idx,
330  int l0_domain, int l0_domain_idx) const = 0;
337  virtual double ras_noncompute_errcount_uncorrectable(unsigned int l0_device_idx,
338  int l0_domain, int l0_domain_idx) const = 0;
345  virtual double ras_cache_errcount_uncorrectable(unsigned int l0_device_idx,
346  int l0_domain, int l0_domain_idx) const = 0;
353  virtual double ras_display_errcount_uncorrectable(unsigned int l0_device_idx,
354  int l0_domain, int l0_domain_idx) const = 0;
355  };
356 
357  const LevelZero &levelzero();
358 }
359 #endif
Definition: LevelZero.hpp:18
virtual uint64_t active_time_timestamp(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the cachced LevelZero device timestamp for the active time value in microseconds.
virtual std::pair< uint64_t, uint64_t > energy_pair(int geopm_domain, unsigned int l0_device_idx, int l0_domain_idx) const =0
Get the LevelZero device energy and timestamp in microjoules and microseconds.
virtual double frequency_status(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device actual frequency in MHz.
virtual ~LevelZero()=default
virtual double ras_driver_errcount_correctable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of low level driver communication correctable errors have occurred.
virtual uint64_t active_time(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device active time in microseconds.
virtual int frequency_domain_count(unsigned int l0_device_idx, int l0_domain) const =0
Get the number of LevelZero frequency domains of a certain type.
virtual int32_t power_limit_min(unsigned int l0_device_idx) const =0
Get the LevelZero device minimum power limit in milliwatts.
virtual int engine_domain_count(unsigned int l0_device_idx, int l0_domain) const =0
Get the number of LevelZero engine domains.
virtual double ras_display_errcount_correctable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of correctable errors that have occurred in the display.
virtual double ras_driver_errcount_uncorrectable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of low level driver communication uncorrectable errors have occurre...
virtual double ras_compute_errcount_correctable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of correctable errors that have occurred in the compute accelerator...
virtual double ras_programming_errcount_uncorrectable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of uncorrectable hardware exceptions generated by the way workloads...
virtual double ras_reset_count_uncorrectable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of uncorrectable accelerator engine resets attempted by the driver.
virtual double ras_compute_errcount_uncorrectable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of uncorrectable errors that have occurred in the compute accelerat...
virtual int32_t power_limit_tdp(unsigned int l0_device_idx) const =0
Get the LevelZero device default power limit in milliwatts.
virtual double ras_programming_errcount_correctable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of correctable hardware exceptions generated by the way workloads h...
virtual double ras_cache_errcount_uncorrectable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of uncorrectable errors that have occurred in caches (L1/L3/registe...
virtual uint64_t energy_timestamp(int geopm_domain, unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device energy cached timestamp in microseconds.
virtual int num_gpu() const =0
Number of GPUs on the platform.
virtual int temperature_domain_count(unsigned int l0_device_idx, int l0_domain) const =0
Get the number of LevelZero temperature domains.
virtual std::pair< uint64_t, uint64_t > active_time_pair(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device active time and timestamp in microseconds.
virtual double ras_noncompute_errcount_correctable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of correctable errors that have occurred in the fixed-function acce...
geopm_levelzero_domain_e
Definition: LevelZero.hpp:20
@ M_DOMAIN_MEMORY
Definition: LevelZero.hpp:23
@ M_DOMAIN_COMPUTE
Definition: LevelZero.hpp:22
@ M_DOMAIN_SIZE
Definition: LevelZero.hpp:24
@ M_DOMAIN_ALL
Definition: LevelZero.hpp:21
virtual std::vector< double > frequency_supported(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device supported frequencies in MHz.
virtual std::pair< double, double > frequency_range(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device minimum and maximum frequency control range in MHz.
virtual double temperature_max(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device maximum temperature in Celsius.
virtual uint64_t energy(int geopm_domain, unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device energy in microjoules.
virtual void performance_factor_control(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx, double setting) const =0
Set the performance factor for the LevelZero device.
LevelZero()=default
virtual int num_gpu(int domain) const =0
Number of GPUs on the platform.
virtual int performance_domain_count(unsigned int l0_device_idx, int l0_domain) const =0
Get the number of LevelZero perf domains of a certain type.
virtual int ras_domain_count(unsigned int l0_device_idx, int l0_domain) const =0
Get the number of LevelZero RAS domains of a certain type.
virtual double ras_cache_errcount_correctable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of correctable errors that have occurred in caches (L1/L3/register ...
virtual double frequency_min(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device mininmum frequency in MHz.
virtual int power_domain_count(int geopm_domain, unsigned int l0_device_idx, int l0_domain) const =0
Get the number of LevelZero power domains of a certain type.
virtual double ras_display_errcount_uncorrectable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of uncorrectable errors that have occurred in the display.
virtual double performance_factor(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the performance factor value of various LevelZero domains.
virtual void frequency_control(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx, double range_min, double range_max) const =0
Set min and max frequency for LevelZero device.
virtual uint32_t frequency_throttle_reasons(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device frequency throttle reasons.
virtual double frequency_efficient(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device efficient frequency in MHz.
virtual double ras_reset_count_correctable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of correctable accelerator engine resets attempted by the driver.
virtual int32_t power_limit_max(unsigned int l0_device_idx) const =0
Get the LevelZero device maximum power limit in milliwatts.
virtual double frequency_max(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero device maximum frequency in MHz.
virtual double ras_noncompute_errcount_uncorrectable(unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
Get the LevelZero count of number of uncorrectable errors that have occurred in the fixed-function ac...
Definition: Agg.cpp:20
const LevelZero & levelzero()
Definition: LevelZero.cpp:28