geopm  3.1.1.dev214+gba4f9f6d
GEOPM - Global Extensible Open Power Manager
Public Types | Public Member Functions | List of all members
geopm::LevelZero Class Referenceabstract

#include <LevelZero.hpp>

Inheritance diagram for geopm::LevelZero:
Inheritance graph
[legend]

Public Types

enum  geopm_levelzero_domain_e { M_DOMAIN_ALL = 0 , M_DOMAIN_COMPUTE = 1 , M_DOMAIN_MEMORY = 2 , M_DOMAIN_SIZE = 3 }
 

Public Member Functions

 LevelZero ()=default
 
virtual ~LevelZero ()=default
 
virtual int num_gpu () const =0
 Number of GPUs on the platform. More...
 
virtual int num_gpu (int domain) const =0
 Number of GPUs on the platform. More...
 
virtual int frequency_domain_count (unsigned int l0_device_idx, int l0_domain) const =0
 Get the number of LevelZero frequency domains of a certain type. More...
 
virtual double frequency_status (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero device actual frequency in MHz. More...
 
virtual double frequency_efficient (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero device efficient frequency in MHz. More...
 
virtual double frequency_min (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero device mininmum frequency in MHz. More...
 
virtual double frequency_max (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero device maximum frequency in MHz. More...
 
virtual std::vector< double > frequency_supported (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero device supported frequencies in MHz. More...
 
virtual uint32_t frequency_throttle_reasons (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero device frequency throttle reasons. More...
 
virtual std::pair< double, double > frequency_range (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero device minimum and maximum frequency control range in MHz. More...
 
virtual int temperature_domain_count (unsigned int l0_device_idx, int l0_domain) const =0
 Get the number of LevelZero temperature domains. More...
 
virtual double temperature_max (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero device maximum temperature in Celsius. More...
 
virtual int engine_domain_count (unsigned int l0_device_idx, int l0_domain) const =0
 Get the number of LevelZero engine domains. More...
 
virtual std::pair< uint64_t, uint64_t > active_time_pair (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero device active time and timestamp in microseconds. More...
 
virtual uint64_t active_time (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero device active time in microseconds. More...
 
virtual uint64_t active_time_timestamp (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the cachced LevelZero device timestamp for the active time value in microseconds. More...
 
virtual int power_domain_count (int geopm_domain, unsigned int l0_device_idx, int l0_domain) const =0
 Get the number of LevelZero power domains of a certain type. More...
 
virtual int performance_domain_count (unsigned int l0_device_idx, int l0_domain) const =0
 Get the number of LevelZero perf domains of a certain type. More...
 
virtual double performance_factor (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the performance factor value of various LevelZero domains. More...
 
virtual int32_t power_limit_tdp (unsigned int l0_device_idx) const =0
 Get the LevelZero device default power limit in milliwatts. More...
 
virtual int32_t power_limit_min (unsigned int l0_device_idx) const =0
 Get the LevelZero device minimum power limit in milliwatts. More...
 
virtual int32_t power_limit_max (unsigned int l0_device_idx) const =0
 Get the LevelZero device maximum power limit in milliwatts. More...
 
virtual std::pair< uint64_t, uint64_t > energy_pair (int geopm_domain, unsigned int l0_device_idx, int l0_domain_idx) const =0
 Get the LevelZero device energy and timestamp in microjoules and microseconds. More...
 
virtual uint64_t energy (int geopm_domain, unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero device energy in microjoules. More...
 
virtual uint64_t energy_timestamp (int geopm_domain, unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero device energy cached timestamp in microseconds. More...
 
virtual void frequency_control (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx, double range_min, double range_max) const =0
 Set min and max frequency for LevelZero device. More...
 
virtual void performance_factor_control (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx, double setting) const =0
 Set the performance factor for the LevelZero device. More...
 
virtual int ras_domain_count (unsigned int l0_device_idx, int l0_domain) const =0
 Get the number of LevelZero RAS domains of a certain type. More...
 
virtual double ras_reset_count_correctable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero count of number of correctable accelerator engine resets attempted by the driver. More...
 
virtual double ras_programming_errcount_correctable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero count of number of correctable hardware exceptions generated by the way workloads have programmed the hardware. More...
 
virtual double ras_driver_errcount_correctable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero count of number of low level driver communication correctable errors have occurred. More...
 
virtual double ras_compute_errcount_correctable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero count of number of correctable errors that have occurred in the compute accelerator hardware. More...
 
virtual double ras_noncompute_errcount_correctable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero count of number of correctable errors that have occurred in the fixed-function accelerator hardware. More...
 
virtual double ras_cache_errcount_correctable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero count of number of correctable errors that have occurred in caches (L1/L3/register file/shared local memory/sampler) More...
 
virtual double ras_display_errcount_correctable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero count of number of correctable errors that have occurred in the display. More...
 
virtual double ras_reset_count_uncorrectable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero count of number of uncorrectable accelerator engine resets attempted by the driver. More...
 
virtual double ras_programming_errcount_uncorrectable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero count of number of uncorrectable hardware exceptions generated by the way workloads have programmed the hardware. More...
 
virtual double ras_driver_errcount_uncorrectable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero count of number of low level driver communication uncorrectable errors have occurred. More...
 
virtual double ras_compute_errcount_uncorrectable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero count of number of uncorrectable errors that have occurred in the compute accelerator hardware. More...
 
virtual double ras_noncompute_errcount_uncorrectable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero count of number of uncorrectable errors that have occurred in the fixed-function accelerator hardware. More...
 
virtual double ras_cache_errcount_uncorrectable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero count of number of uncorrectable errors that have occurred in caches (L1/L3/register file/shared local memory/sampler) More...
 
virtual double ras_display_errcount_uncorrectable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const =0
 Get the LevelZero count of number of uncorrectable errors that have occurred in the display. More...
 

Member Enumeration Documentation

◆ geopm_levelzero_domain_e

Enumerator
M_DOMAIN_ALL 
M_DOMAIN_COMPUTE 
M_DOMAIN_MEMORY 
M_DOMAIN_SIZE 

Constructor & Destructor Documentation

◆ LevelZero()

geopm::LevelZero::LevelZero ( )
default

◆ ~LevelZero()

virtual geopm::LevelZero::~LevelZero ( )
virtualdefault

Member Function Documentation

◆ active_time()

virtual uint64_t geopm::LevelZero::active_time ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero device active time in microseconds.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe LevelZero index indicating a particular domain of the GPU.
Returns
GPU active time in microseconds.

Implemented in geopm::LevelZeroImp.

◆ active_time_pair()

virtual std::pair<uint64_t, uint64_t> geopm::LevelZero::active_time_pair ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero device active time and timestamp in microseconds.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe LevelZero index indicating a particular domain of the GPU.
Returns
GPU active time and timestamp in microseconds.

Implemented in geopm::LevelZeroImp.

◆ active_time_timestamp()

virtual uint64_t geopm::LevelZero::active_time_timestamp ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the cachced LevelZero device timestamp for the active time value in microseconds.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe LevelZero index indicating a particular domain of the GPU.
Returns
GPU device timestamp for the active time value in microseconds.

Implemented in geopm::LevelZeroImp.

◆ energy()

virtual uint64_t geopm::LevelZero::energy ( int  geopm_domain,
unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero device energy in microjoules.

Parameters
[in]geopm_domainThe GEOPM domain being targeted
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe index indicating a particular Level Zero domain.
Returns
GPU energy in microjoules.

Implemented in geopm::LevelZeroImp.

◆ energy_pair()

virtual std::pair<uint64_t, uint64_t> geopm::LevelZero::energy_pair ( int  geopm_domain,
unsigned int  l0_device_idx,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero device energy and timestamp in microjoules and microseconds.

Parameters
[in]geopm_domainThe GEOPM domain being targeted
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domain_idxThe index indicating a particular Level Zero domain.
Returns
GPU energy in microjoules and timestamp in microseconds

Implemented in geopm::LevelZeroImp.

◆ energy_timestamp()

virtual uint64_t geopm::LevelZero::energy_timestamp ( int  geopm_domain,
unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero device energy cached timestamp in microseconds.

Parameters
[in]geopm_domainThe GEOPM domain being targeted
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe index indicating a particular Level Zero domain.
Returns
Accelerator energy timestamp in microseconds

Implemented in geopm::LevelZeroImp.

◆ engine_domain_count()

virtual int geopm::LevelZero::engine_domain_count ( unsigned int  l0_device_idx,
int  l0_domain 
) const
pure virtual

Get the number of LevelZero engine domains.

Parameters
[in]l0_domainThe LevelZero domain type being targeted
Returns
GPU engine domain count.

Implemented in geopm::LevelZeroImp.

◆ frequency_control()

virtual void geopm::LevelZero::frequency_control ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx,
double  range_min,
double  range_max 
) const
pure virtual

Set min and max frequency for LevelZero device.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe domain type being targeted
[in]l0_domain_idxThe domain being targeted
[in]range_minMin target frequency in MHz.
[in]range_maxMax target frequency in MHz.

Implemented in geopm::LevelZeroImp.

◆ frequency_domain_count()

virtual int geopm::LevelZero::frequency_domain_count ( unsigned int  l0_device_idx,
int  l0_domain 
) const
pure virtual

Get the number of LevelZero frequency domains of a certain type.

Parameters
[in]l0_domainThe LevelZero domain type being targeted
Returns
GPU frequency domain count.

Implemented in geopm::LevelZeroImp.

◆ frequency_efficient()

virtual double geopm::LevelZero::frequency_efficient ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero device efficient frequency in MHz.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe LevelZero index indicating a particular domain of the GPU.
Returns
GPU device efficient clock rate in MHz.

Implemented in geopm::LevelZeroImp.

◆ frequency_max()

virtual double geopm::LevelZero::frequency_max ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero device maximum frequency in MHz.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe LevelZero index indicating a particular domain of the GPU.
Returns
GPU maximum frequency in MHz.

Implemented in geopm::LevelZeroImp.

◆ frequency_min()

virtual double geopm::LevelZero::frequency_min ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero device mininmum frequency in MHz.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe LevelZero index indicating a particular domain of the GPU.
Returns
GPU minimum frequency in MHz.

Implemented in geopm::LevelZeroImp.

◆ frequency_range()

virtual std::pair<double, double> geopm::LevelZero::frequency_range ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero device minimum and maximum frequency control range in MHz.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe LevelZero index indicating a particular domain of the GPU.
Returns
GPU minimum and maximum frequency range in MHz.

Implemented in geopm::LevelZeroImp.

◆ frequency_status()

virtual double geopm::LevelZero::frequency_status ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero device actual frequency in MHz.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe LevelZero index indicating a particular domain of the GPU.
Returns
GPU device core clock rate in MHz.

Implemented in geopm::LevelZeroImp.

◆ frequency_supported()

virtual std::vector<double> geopm::LevelZero::frequency_supported ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero device supported frequencies in MHz.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe LevelZero index indicating a particular domain of the GPU.
Returns
GPU supported frequencies in MHz.

Implemented in geopm::LevelZeroImp.

◆ frequency_throttle_reasons()

virtual uint32_t geopm::LevelZero::frequency_throttle_reasons ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero device frequency throttle reasons.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe LevelZero index indicating a particular domain of the GPU..
Returns
Frequency throttle reasons

Implemented in geopm::LevelZeroImp.

◆ num_gpu() [1/2]

virtual int geopm::LevelZero::num_gpu ( ) const
pure virtual

Number of GPUs on the platform.

Returns
Number of LevelZero GPUs.

Implemented in geopm::LevelZeroImp.

◆ num_gpu() [2/2]

virtual int geopm::LevelZero::num_gpu ( int  domain) const
pure virtual

Number of GPUs on the platform.

Parameters
[in]domainThe GEOPM domain type being targeted
Returns
Number of LevelZero GPUs or GPU chips.

Implemented in geopm::LevelZeroImp.

◆ performance_domain_count()

virtual int geopm::LevelZero::performance_domain_count ( unsigned int  l0_device_idx,
int  l0_domain 
) const
pure virtual

Get the number of LevelZero perf domains of a certain type.

Parameters
[in]l0_device_idxThe LevelZero device being targeted
[in]l0_domainThe LevelZero domain type being targeted
Returns
GPU perf domain count.

Implemented in geopm::LevelZeroImp.

◆ performance_factor()

virtual double geopm::LevelZero::performance_factor ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the performance factor value of various LevelZero domains.

Parameters
[in]l0_device_idxThe LevelZero device being targeted
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe index indicating a particular Level Zero domain.
Returns
Subdevice performance factor value

Implemented in geopm::LevelZeroImp.

◆ performance_factor_control()

virtual void geopm::LevelZero::performance_factor_control ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx,
double  setting 
) const
pure virtual

Set the performance factor for the LevelZero device.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero accelerator.
[in]l0_domainThe level zero domain type being targeted
[in]l0_domain_idxThe level zero domain being targeted
[in]settingThe performance factor value, 0-100

Implemented in geopm::LevelZeroImp.

◆ power_domain_count()

virtual int geopm::LevelZero::power_domain_count ( int  geopm_domain,
unsigned int  l0_device_idx,
int  l0_domain 
) const
pure virtual

Get the number of LevelZero power domains of a certain type.

Parameters
[in]geopm_domainThe GEOPM domain being targeted
[in]l0_device_idxThe LevelZero device being targeted
[in]l0_domainThe LevelZero domain type being targeted
Returns
GPU frequency domain count.

Implemented in geopm::LevelZeroImp.

◆ power_limit_max()

virtual int32_t geopm::LevelZero::power_limit_max ( unsigned int  l0_device_idx) const
pure virtual

Get the LevelZero device maximum power limit in milliwatts.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
Returns
GPU maximum power limit in milliwatts

Implemented in geopm::LevelZeroImp.

◆ power_limit_min()

virtual int32_t geopm::LevelZero::power_limit_min ( unsigned int  l0_device_idx) const
pure virtual

Get the LevelZero device minimum power limit in milliwatts.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
Returns
GPU minimum power limit in milliwatts

Implemented in geopm::LevelZeroImp.

◆ power_limit_tdp()

virtual int32_t geopm::LevelZero::power_limit_tdp ( unsigned int  l0_device_idx) const
pure virtual

Get the LevelZero device default power limit in milliwatts.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
Returns
GPU default power limit in milliwatts

Implemented in geopm::LevelZeroImp.

◆ ras_cache_errcount_correctable()

virtual double geopm::LevelZero::ras_cache_errcount_correctable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero count of number of correctable errors that have occurred in caches (L1/L3/register file/shared local memory/sampler)

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Cache Error Count

Implemented in geopm::LevelZeroImp.

◆ ras_cache_errcount_uncorrectable()

virtual double geopm::LevelZero::ras_cache_errcount_uncorrectable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero count of number of uncorrectable errors that have occurred in caches (L1/L3/register file/shared local memory/sampler)

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Cache Error Count

Implemented in geopm::LevelZeroImp.

◆ ras_compute_errcount_correctable()

virtual double geopm::LevelZero::ras_compute_errcount_correctable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero count of number of correctable errors that have occurred in the compute accelerator hardware.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Compute Error Count

Implemented in geopm::LevelZeroImp.

◆ ras_compute_errcount_uncorrectable()

virtual double geopm::LevelZero::ras_compute_errcount_uncorrectable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero count of number of uncorrectable errors that have occurred in the compute accelerator hardware.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Compute Error Count

Implemented in geopm::LevelZeroImp.

◆ ras_display_errcount_correctable()

virtual double geopm::LevelZero::ras_display_errcount_correctable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero count of number of correctable errors that have occurred in the display.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Display Error Count

Implemented in geopm::LevelZeroImp.

◆ ras_display_errcount_uncorrectable()

virtual double geopm::LevelZero::ras_display_errcount_uncorrectable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero count of number of uncorrectable errors that have occurred in the display.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Display Error Count

Implemented in geopm::LevelZeroImp.

◆ ras_domain_count()

virtual int geopm::LevelZero::ras_domain_count ( unsigned int  l0_device_idx,
int  l0_domain 
) const
pure virtual

Get the number of LevelZero RAS domains of a certain type.

Parameters
[in]l0_domainThe LevelZero domain type being targeted
Returns
RAS domain count.

Implemented in geopm::LevelZeroImp.

◆ ras_driver_errcount_correctable()

virtual double geopm::LevelZero::ras_driver_errcount_correctable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero count of number of low level driver communication correctable errors have occurred.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Programming Error Count

Implemented in geopm::LevelZeroImp.

◆ ras_driver_errcount_uncorrectable()

virtual double geopm::LevelZero::ras_driver_errcount_uncorrectable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero count of number of low level driver communication uncorrectable errors have occurred.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Programming Error Count

Implemented in geopm::LevelZeroImp.

◆ ras_noncompute_errcount_correctable()

virtual double geopm::LevelZero::ras_noncompute_errcount_correctable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero count of number of correctable errors that have occurred in the fixed-function accelerator hardware.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Non Compute Error Count

Implemented in geopm::LevelZeroImp.

◆ ras_noncompute_errcount_uncorrectable()

virtual double geopm::LevelZero::ras_noncompute_errcount_uncorrectable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero count of number of uncorrectable errors that have occurred in the fixed-function accelerator hardware.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Non Compute Error Count

Implemented in geopm::LevelZeroImp.

◆ ras_programming_errcount_correctable()

virtual double geopm::LevelZero::ras_programming_errcount_correctable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero count of number of correctable hardware exceptions generated by the way workloads have programmed the hardware.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Programming Error Count

Implemented in geopm::LevelZeroImp.

◆ ras_programming_errcount_uncorrectable()

virtual double geopm::LevelZero::ras_programming_errcount_uncorrectable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero count of number of uncorrectable hardware exceptions generated by the way workloads have programmed the hardware.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Programming Error Count

Implemented in geopm::LevelZeroImp.

◆ ras_reset_count_correctable()

virtual double geopm::LevelZero::ras_reset_count_correctable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero count of number of correctable accelerator engine resets attempted by the driver.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Accelerator Engine Reset count

Implemented in geopm::LevelZeroImp.

◆ ras_reset_count_uncorrectable()

virtual double geopm::LevelZero::ras_reset_count_uncorrectable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero count of number of uncorrectable accelerator engine resets attempted by the driver.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Accelerator Engine Reset count

Implemented in geopm::LevelZeroImp.

◆ temperature_domain_count()

virtual int geopm::LevelZero::temperature_domain_count ( unsigned int  l0_device_idx,
int  l0_domain 
) const
pure virtual

Get the number of LevelZero temperature domains.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
GPU temperature domain count.

Implemented in geopm::LevelZeroImp.

◆ temperature_max()

virtual double geopm::LevelZero::temperature_max ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
pure virtual

Get the LevelZero device maximum temperature in Celsius.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe LevelZero index indicating a particular domain of the GPU.
Returns
Domain maximum temperature in Celsius.

Implemented in geopm::LevelZeroImp.


The documentation for this class was generated from the following file: