geopm  3.1.1.dev214+gba4f9f6d
GEOPM - Global Extensible Open Power Manager
LevelZeroDevicePool.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 - 2024 Intel Corporation
3  * SPDX-License-Identifier: BSD-3-Clause
4  */
5 
6 #ifndef LEVELZERODEVICEPOOL_HPP_INCLUDE
7 #define LEVELZERODEVICEPOOL_HPP_INCLUDE
8 
9 #include <vector>
10 #include <string>
11 #include <cstdint>
12 
13 #include "geopm_sched.h"
14 #include "geopm_topo.h"
15 
16 namespace geopm
17 {
19  {
20  public:
21  LevelZeroDevicePool() = default;
22  virtual ~LevelZeroDevicePool() = default;
26  virtual int num_gpu(int domain_type) const = 0;
27  // FREQUENCY SIGNAL FUNCTIONS
34  virtual double frequency_status(int domain, unsigned int domain_idx,
35  int l0_domain) const = 0;
42  virtual double frequency_efficient(int domain, unsigned int domain_idx,
43  int l0_domain) const = 0;
50  virtual double frequency_min(int domain, unsigned int domain_idx,
51  int l0_domain) const = 0;
58  virtual double frequency_max(int domain, unsigned int domain_idx,
59  int l0_domain) const = 0;
66  virtual double frequency_step(int domain, unsigned int domain_idx,
67  int l0_domain) const = 0;
74  virtual uint32_t frequency_throttle_reasons(int domain, unsigned int domain_idx,
75  int l0_domain) const = 0;
76  virtual std::pair<double, double> frequency_range(int domain,
77  unsigned int domain_idx,
78  int l0_domain) const = 0;
85  virtual double temperature_max(int domain, unsigned int domain_idx,
86  int l0_domain) const = 0;
87  // UTILIZATION SIGNAL FUNCTIONS
94  virtual std::pair<uint64_t, uint64_t> active_time_pair(int domain, unsigned int domain_idx,
95  int l0_domain) const = 0;
99  virtual uint64_t active_time(int domain, unsigned int domain_idx,
100  int l0_domain) const = 0;
107  virtual uint64_t active_time_timestamp(int domain, unsigned int domain_idx,
108  int l0_domain) const = 0;
109  // POWER SIGNAL FUNCTIONS
116  virtual int32_t power_limit_tdp(int domain, unsigned int domain_idx,
117  int l0_domain) const = 0;
124  virtual int32_t power_limit_min(int domain, unsigned int domain_idx,
125  int l0_domain) const = 0;
132  virtual int32_t power_limit_max(int domain, unsigned int domain_idx,
133  int l0_domain) const = 0;
134  // ENERGY SIGNAL FUNCTIONS
141  virtual std::pair<uint64_t, uint64_t> energy_pair(int domain, unsigned int domain_idx,
142  int l0_domain) const = 0;
149  virtual uint64_t energy(int domain, unsigned int domain_idx,
150  int l0_domain) const = 0;
157  virtual uint64_t energy_timestamp(int domain, unsigned int domain_idx,
158  int l0_domain) const = 0;
165  virtual double performance_factor(int domain,
166  unsigned int domain_idx,
167  int l0_domain) const = 0;
168  // FREQUENCY CONTROL FUNCTIONS
176  virtual void frequency_control(int domain, unsigned int domain_idx,
177  int l0_domain, double range_min,
178  double range_max) const = 0;
185  virtual void performance_factor_control(int domain, unsigned int domain_idx,
186  int l0_domain,
187  double setting) const = 0;
188  // RAS SIGNAL FUNCTIONS
196  virtual double ras_reset_count_correctable(int domain, unsigned int domain_idx,
197  int l0_domain) const = 0;
205  virtual double ras_programming_errcount_correctable(int domain, unsigned int domain_idx,
206  int l0_domain) const = 0;
214  virtual double ras_driver_errcount_correctable(int domain, unsigned int domain_idx,
215  int l0_domain) const = 0;
223  virtual double ras_compute_errcount_correctable(int domain, unsigned int domain_idx,
224  int l0_domain) const = 0;
232  virtual double ras_noncompute_errcount_correctable(int domain, unsigned int domain_idx,
233  int l0_domain) const = 0;
241  virtual double ras_cache_errcount_correctable(int domain, unsigned int domain_idx,
242  int l0_domain) const = 0;
250  virtual double ras_display_errcount_correctable(int domain, unsigned int domain_idx,
251  int l0_domain) const = 0;
259  virtual double ras_reset_count_uncorrectable(int domain, unsigned int domain_idx,
260  int l0_domain) const = 0;
268  virtual double ras_programming_errcount_uncorrectable(int domain, unsigned int domain_idx,
269  int l0_domain) const = 0;
277  virtual double ras_driver_errcount_uncorrectable(int domain, unsigned int domain_idx,
278  int l0_domain) const = 0;
286  virtual double ras_compute_errcount_uncorrectable(int domain, unsigned int domain_idx,
287  int l0_domain) const = 0;
295  virtual double ras_noncompute_errcount_uncorrectable(int domain, unsigned int domain_idx,
296  int l0_domain) const = 0;
304  virtual double ras_cache_errcount_uncorrectable(int domain, unsigned int domain_idx,
305  int l0_domain) const = 0;
313  virtual double ras_display_errcount_uncorrectable(int domain, unsigned int domain_idx,
314  int l0_domain) const = 0;
315  private:
316  };
317 
319 }
320 #endif
Definition: LevelZeroDevicePool.hpp:19
virtual uint64_t energy(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero device energy in microjoules.
virtual double ras_driver_errcount_correctable(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero count of number of low level driver communication correctable errors have occurred.
virtual std::pair< uint64_t, uint64_t > active_time_pair(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero device active time and timestamp in microseconds.
virtual double ras_cache_errcount_correctable(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero count of number of correctable errors that have occurred in caches (L1/L3/register ...
virtual double ras_reset_count_uncorrectable(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero count of number of uncorrectable accelerator engine resets attempted by the driver.
virtual uint64_t active_time(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero device timestamp for the active time value in microseconds.
virtual std::pair< uint64_t, uint64_t > energy_pair(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero device energy in microjoules and timestamp in microseconds.
virtual double ras_noncompute_errcount_uncorrectable(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero count of number of uncorrectable errors that have occurred in the fixed-function ac...
virtual int32_t power_limit_max(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero device maximum power limit in milliwatts.
virtual uint32_t frequency_throttle_reasons(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero device frequency throttle reasons.
virtual double temperature_max(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero domain maximum temperature in Celsius.
virtual double ras_display_errcount_uncorrectable(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero count of number of uncorrectable errors that have occurred in the display.
virtual double frequency_max(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero device maximum frequency in MHz.
virtual double performance_factor(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero device performance factor.
virtual int32_t power_limit_tdp(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero device default power limit in milliwatts.
virtual std::pair< double, double > frequency_range(int domain, unsigned int domain_idx, int l0_domain) const =0
virtual double ras_cache_errcount_uncorrectable(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero count of number of uncorrectable errors that have occurred in caches (L1/L3/registe...
virtual double ras_noncompute_errcount_correctable(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero count of number of correctable errors that have occurred in the fixed-function acce...
virtual int num_gpu(int domain_type) const =0
Number of GPUs on the platform.
virtual double ras_programming_errcount_uncorrectable(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero count of number of uncorrectable hardware exceptions generated by the way workloads...
virtual double ras_driver_errcount_uncorrectable(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero count of number of low level driver communication uncorrectable errors have occurre...
virtual void frequency_control(int domain, unsigned int domain_idx, int l0_domain, double range_min, double range_max) const =0
Set min and max frequency for LevelZero device.
virtual double ras_display_errcount_correctable(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero count of number of correctable errors that have occurred in the display.
virtual double frequency_step(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero device frequency step in MHz.
virtual ~LevelZeroDevicePool()=default
virtual double frequency_efficient(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero device efficient frequency in MHz.
virtual int32_t power_limit_min(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero device minimum power limit in milliwatts.
virtual double ras_programming_errcount_correctable(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero count of number of correctable hardware exceptions generated by the way workloads h...
virtual double ras_compute_errcount_correctable(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero count of number of correctable errors that have occurred in the compute accelerator...
virtual double ras_reset_count_correctable(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero count of number of correctable accelerator engine resets attempted by the driver.
virtual void performance_factor_control(int domain, unsigned int domain_idx, int l0_domain, double setting) const =0
Set performance factor for LevelZero device.
virtual double frequency_min(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero device mininmum frequency in MHz.
virtual double frequency_status(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero device actual frequency in MHz.
virtual uint64_t active_time_timestamp(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero device timestamp for the active time value in microseconds.
virtual double ras_compute_errcount_uncorrectable(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero count of number of uncorrectable errors that have occurred in the compute accelerat...
virtual uint64_t energy_timestamp(int domain, unsigned int domain_idx, int l0_domain) const =0
Get the LevelZero device energy timestamp in microseconds.
Definition: Agg.cpp:20
const LevelZeroDevicePool & levelzero_device_pool()
Definition: LevelZeroDevicePool.cpp:20