geopm  3.1.1.dev214+gba4f9f6d
GEOPM - Global Extensible Open Power Manager
Classes | Public Member Functions | List of all members
geopm::LevelZeroImp Class Reference

#include <LevelZeroImp.hpp>

Inheritance diagram for geopm::LevelZeroImp:
Inheritance graph
[legend]
Collaboration diagram for geopm::LevelZeroImp:
Collaboration graph
[legend]

Public Member Functions

 LevelZeroImp ()
 
virtual ~LevelZeroImp ()=default
 
int num_gpu (void) const override
 Number of GPUs on the platform. More...
 
int num_gpu (int domain) const override
 Number of GPUs on the platform. More...
 
int frequency_domain_count (unsigned int l0_device_idx, int domain) const override
 Get the number of LevelZero frequency domains of a certain type. More...
 
double frequency_status (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero device actual frequency in MHz. More...
 
double frequency_efficient (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero device efficient frequency in MHz. More...
 
double frequency_min (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero device mininmum frequency in MHz. More...
 
double frequency_max (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero device maximum frequency in MHz. More...
 
std::vector< double > frequency_supported (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero device supported frequencies in MHz. More...
 
uint32_t frequency_throttle_reasons (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero device frequency throttle reasons. More...
 
std::pair< double, double > frequency_range (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero device minimum and maximum frequency control range in MHz. More...
 
int temperature_domain_count (unsigned int l0_device_idx, int l0_domain) const override
 Get the number of LevelZero temperature domains. More...
 
double temperature_max (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero device maximum temperature in Celsius. More...
 
int engine_domain_count (unsigned int l0_device_idx, int domain) const override
 Get the number of LevelZero engine domains. More...
 
std::pair< uint64_t, uint64_t > active_time_pair (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero device active time and timestamp in microseconds. More...
 
uint64_t active_time (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero device active time in microseconds. More...
 
uint64_t active_time_timestamp (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the cachced LevelZero device timestamp for the active time value in microseconds. More...
 
int power_domain_count (int geopm_domain, unsigned int l0_device_idx, int l0_domain) const override
 Get the number of LevelZero power domains of a certain type. More...
 
std::pair< uint64_t, uint64_t > energy_pair (int geopm_domain, unsigned int l0_device_idx, int l0_domain_idx) const override
 Get the LevelZero device energy and timestamp in microjoules and microseconds. More...
 
uint64_t energy (int geopm_domain, unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero device energy in microjoules. More...
 
uint64_t energy_timestamp (int geopm_domain, unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero device energy cached timestamp in microseconds. More...
 
int performance_domain_count (unsigned int l0_device_idx, int l0_domain) const override
 Get the number of LevelZero perf domains of a certain type. More...
 
double performance_factor (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the performance factor value of various LevelZero domains. More...
 
int32_t power_limit_tdp (unsigned int l0_device_idx) const override
 Get the LevelZero device default power limit in milliwatts. More...
 
int32_t power_limit_min (unsigned int l0_device_idx) const override
 Get the LevelZero device minimum power limit in milliwatts. More...
 
int32_t power_limit_max (unsigned int l0_device_idx) const override
 Get the LevelZero device maximum power limit in milliwatts. More...
 
void frequency_control (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx, double range_min, double range_max) const override
 Set min and max frequency for LevelZero device. More...
 
void performance_factor_control (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx, double setting) const override
 Set the performance factor for the LevelZero device. More...
 
int ras_domain_count (unsigned int l0_device_idx, int l0_domain) const override
 Get the number of LevelZero RAS domains of a certain type. More...
 
double ras_reset_count_correctable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero count of number of correctable accelerator engine resets attempted by the driver. More...
 
double ras_programming_errcount_correctable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero count of number of correctable hardware exceptions generated by the way workloads have programmed the hardware. More...
 
double ras_driver_errcount_correctable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero count of number of low level driver communication correctable errors have occurred. More...
 
double ras_compute_errcount_correctable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero count of number of correctable errors that have occurred in the compute accelerator hardware. More...
 
double ras_noncompute_errcount_correctable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero count of number of correctable errors that have occurred in the fixed-function accelerator hardware. More...
 
double ras_cache_errcount_correctable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero count of number of correctable errors that have occurred in caches (L1/L3/register file/shared local memory/sampler) More...
 
double ras_display_errcount_correctable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero count of number of correctable errors that have occurred in the display. More...
 
double ras_reset_count_uncorrectable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero count of number of uncorrectable accelerator engine resets attempted by the driver. More...
 
double ras_programming_errcount_uncorrectable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero count of number of uncorrectable hardware exceptions generated by the way workloads have programmed the hardware. More...
 
double ras_driver_errcount_uncorrectable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero count of number of low level driver communication uncorrectable errors have occurred. More...
 
double ras_compute_errcount_uncorrectable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero count of number of uncorrectable errors that have occurred in the compute accelerator hardware. More...
 
double ras_noncompute_errcount_uncorrectable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero count of number of uncorrectable errors that have occurred in the fixed-function accelerator hardware. More...
 
double ras_cache_errcount_uncorrectable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero count of number of uncorrectable errors that have occurred in caches (L1/L3/register file/shared local memory/sampler) More...
 
double ras_display_errcount_uncorrectable (unsigned int l0_device_idx, int l0_domain, int l0_domain_idx) const override
 Get the LevelZero count of number of uncorrectable errors that have occurred in the display. More...
 
- Public Member Functions inherited from geopm::LevelZero
 LevelZero ()=default
 
virtual ~LevelZero ()=default
 

Additional Inherited Members

- Public Types inherited from geopm::LevelZero
enum  geopm_levelzero_domain_e { M_DOMAIN_ALL = 0 , M_DOMAIN_COMPUTE = 1 , M_DOMAIN_MEMORY = 2 , M_DOMAIN_SIZE = 3 }
 

Constructor & Destructor Documentation

◆ LevelZeroImp()

geopm::LevelZeroImp::LevelZeroImp ( )

◆ ~LevelZeroImp()

virtual geopm::LevelZeroImp::~LevelZeroImp ( )
virtualdefault

Member Function Documentation

◆ active_time()

uint64_t geopm::LevelZeroImp::active_time ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero device active time in microseconds.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe LevelZero index indicating a particular domain of the GPU.
Returns
GPU active time in microseconds.

Implements geopm::LevelZero.

◆ active_time_pair()

std::pair< uint64_t, uint64_t > geopm::LevelZeroImp::active_time_pair ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero device active time and timestamp in microseconds.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe LevelZero index indicating a particular domain of the GPU.
Returns
GPU active time and timestamp in microseconds.

Implements geopm::LevelZero.

◆ active_time_timestamp()

uint64_t geopm::LevelZeroImp::active_time_timestamp ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the cachced LevelZero device timestamp for the active time value in microseconds.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe LevelZero index indicating a particular domain of the GPU.
Returns
GPU device timestamp for the active time value in microseconds.

Implements geopm::LevelZero.

◆ energy()

uint64_t geopm::LevelZeroImp::energy ( int  geopm_domain,
unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero device energy in microjoules.

Parameters
[in]geopm_domainThe GEOPM domain being targeted
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe index indicating a particular Level Zero domain.
Returns
GPU energy in microjoules.

Implements geopm::LevelZero.

◆ energy_pair()

std::pair< uint64_t, uint64_t > geopm::LevelZeroImp::energy_pair ( int  geopm_domain,
unsigned int  l0_device_idx,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero device energy and timestamp in microjoules and microseconds.

Parameters
[in]geopm_domainThe GEOPM domain being targeted
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domain_idxThe index indicating a particular Level Zero domain.
Returns
GPU energy in microjoules and timestamp in microseconds

Implements geopm::LevelZero.

◆ energy_timestamp()

uint64_t geopm::LevelZeroImp::energy_timestamp ( int  geopm_domain,
unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero device energy cached timestamp in microseconds.

Parameters
[in]geopm_domainThe GEOPM domain being targeted
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe index indicating a particular Level Zero domain.
Returns
Accelerator energy timestamp in microseconds

Implements geopm::LevelZero.

◆ engine_domain_count()

int geopm::LevelZeroImp::engine_domain_count ( unsigned int  l0_device_idx,
int  l0_domain 
) const
overridevirtual

Get the number of LevelZero engine domains.

Parameters
[in]l0_domainThe LevelZero domain type being targeted
Returns
GPU engine domain count.

Implements geopm::LevelZero.

◆ frequency_control()

void geopm::LevelZeroImp::frequency_control ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx,
double  range_min,
double  range_max 
) const
overridevirtual

Set min and max frequency for LevelZero device.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe domain type being targeted
[in]l0_domain_idxThe domain being targeted
[in]range_minMin target frequency in MHz.
[in]range_maxMax target frequency in MHz.

Implements geopm::LevelZero.

◆ frequency_domain_count()

int geopm::LevelZeroImp::frequency_domain_count ( unsigned int  l0_device_idx,
int  l0_domain 
) const
overridevirtual

Get the number of LevelZero frequency domains of a certain type.

Parameters
[in]l0_domainThe LevelZero domain type being targeted
Returns
GPU frequency domain count.

Implements geopm::LevelZero.

◆ frequency_efficient()

double geopm::LevelZeroImp::frequency_efficient ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero device efficient frequency in MHz.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe LevelZero index indicating a particular domain of the GPU.
Returns
GPU device efficient clock rate in MHz.

Implements geopm::LevelZero.

◆ frequency_max()

double geopm::LevelZeroImp::frequency_max ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero device maximum frequency in MHz.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe LevelZero index indicating a particular domain of the GPU.
Returns
GPU maximum frequency in MHz.

Implements geopm::LevelZero.

◆ frequency_min()

double geopm::LevelZeroImp::frequency_min ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero device mininmum frequency in MHz.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe LevelZero index indicating a particular domain of the GPU.
Returns
GPU minimum frequency in MHz.

Implements geopm::LevelZero.

◆ frequency_range()

std::pair< double, double > geopm::LevelZeroImp::frequency_range ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero device minimum and maximum frequency control range in MHz.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe LevelZero index indicating a particular domain of the GPU.
Returns
GPU minimum and maximum frequency range in MHz.

Implements geopm::LevelZero.

◆ frequency_status()

double geopm::LevelZeroImp::frequency_status ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero device actual frequency in MHz.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe LevelZero index indicating a particular domain of the GPU.
Returns
GPU device core clock rate in MHz.

Implements geopm::LevelZero.

◆ frequency_supported()

std::vector< double > geopm::LevelZeroImp::frequency_supported ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero device supported frequencies in MHz.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe LevelZero index indicating a particular domain of the GPU.
Returns
GPU supported frequencies in MHz.

Implements geopm::LevelZero.

◆ frequency_throttle_reasons()

uint32_t geopm::LevelZeroImp::frequency_throttle_reasons ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero device frequency throttle reasons.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe LevelZero index indicating a particular domain of the GPU..
Returns
Frequency throttle reasons

Implements geopm::LevelZero.

◆ num_gpu() [1/2]

int geopm::LevelZeroImp::num_gpu ( int  domain) const
overridevirtual

Number of GPUs on the platform.

Parameters
[in]domainThe GEOPM domain type being targeted
Returns
Number of LevelZero GPUs or GPU chips.

Implements geopm::LevelZero.

◆ num_gpu() [2/2]

int geopm::LevelZeroImp::num_gpu ( void  ) const
overridevirtual

Number of GPUs on the platform.

Returns
Number of LevelZero GPUs.

Implements geopm::LevelZero.

◆ performance_domain_count()

int geopm::LevelZeroImp::performance_domain_count ( unsigned int  l0_device_idx,
int  l0_domain 
) const
overridevirtual

Get the number of LevelZero perf domains of a certain type.

Parameters
[in]l0_device_idxThe LevelZero device being targeted
[in]l0_domainThe LevelZero domain type being targeted
Returns
GPU perf domain count.

Implements geopm::LevelZero.

◆ performance_factor()

double geopm::LevelZeroImp::performance_factor ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the performance factor value of various LevelZero domains.

Parameters
[in]l0_device_idxThe LevelZero device being targeted
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe index indicating a particular Level Zero domain.
Returns
Subdevice performance factor value

Implements geopm::LevelZero.

◆ performance_factor_control()

void geopm::LevelZeroImp::performance_factor_control ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx,
double  setting 
) const
overridevirtual

Set the performance factor for the LevelZero device.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero accelerator.
[in]l0_domainThe level zero domain type being targeted
[in]l0_domain_idxThe level zero domain being targeted
[in]settingThe performance factor value, 0-100

Implements geopm::LevelZero.

◆ power_domain_count()

int geopm::LevelZeroImp::power_domain_count ( int  geopm_domain,
unsigned int  l0_device_idx,
int  l0_domain 
) const
overridevirtual

Get the number of LevelZero power domains of a certain type.

Parameters
[in]geopm_domainThe GEOPM domain being targeted
[in]l0_device_idxThe LevelZero device being targeted
[in]l0_domainThe LevelZero domain type being targeted
Returns
GPU frequency domain count.

Implements geopm::LevelZero.

◆ power_limit_max()

int32_t geopm::LevelZeroImp::power_limit_max ( unsigned int  l0_device_idx) const
overridevirtual

Get the LevelZero device maximum power limit in milliwatts.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
Returns
GPU maximum power limit in milliwatts

Implements geopm::LevelZero.

◆ power_limit_min()

int32_t geopm::LevelZeroImp::power_limit_min ( unsigned int  l0_device_idx) const
overridevirtual

Get the LevelZero device minimum power limit in milliwatts.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
Returns
GPU minimum power limit in milliwatts

Implements geopm::LevelZero.

◆ power_limit_tdp()

int32_t geopm::LevelZeroImp::power_limit_tdp ( unsigned int  l0_device_idx) const
overridevirtual

Get the LevelZero device default power limit in milliwatts.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
Returns
GPU default power limit in milliwatts

Implements geopm::LevelZero.

◆ ras_cache_errcount_correctable()

double geopm::LevelZeroImp::ras_cache_errcount_correctable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero count of number of correctable errors that have occurred in caches (L1/L3/register file/shared local memory/sampler)

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Cache Error Count

Implements geopm::LevelZero.

◆ ras_cache_errcount_uncorrectable()

double geopm::LevelZeroImp::ras_cache_errcount_uncorrectable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero count of number of uncorrectable errors that have occurred in caches (L1/L3/register file/shared local memory/sampler)

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Cache Error Count

Implements geopm::LevelZero.

◆ ras_compute_errcount_correctable()

double geopm::LevelZeroImp::ras_compute_errcount_correctable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero count of number of correctable errors that have occurred in the compute accelerator hardware.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Compute Error Count

Implements geopm::LevelZero.

◆ ras_compute_errcount_uncorrectable()

double geopm::LevelZeroImp::ras_compute_errcount_uncorrectable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero count of number of uncorrectable errors that have occurred in the compute accelerator hardware.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Compute Error Count

Implements geopm::LevelZero.

◆ ras_display_errcount_correctable()

double geopm::LevelZeroImp::ras_display_errcount_correctable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero count of number of correctable errors that have occurred in the display.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Display Error Count

Implements geopm::LevelZero.

◆ ras_display_errcount_uncorrectable()

double geopm::LevelZeroImp::ras_display_errcount_uncorrectable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero count of number of uncorrectable errors that have occurred in the display.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Display Error Count

Implements geopm::LevelZero.

◆ ras_domain_count()

int geopm::LevelZeroImp::ras_domain_count ( unsigned int  l0_device_idx,
int  l0_domain 
) const
overridevirtual

Get the number of LevelZero RAS domains of a certain type.

Parameters
[in]l0_domainThe LevelZero domain type being targeted
Returns
RAS domain count.

Implements geopm::LevelZero.

◆ ras_driver_errcount_correctable()

double geopm::LevelZeroImp::ras_driver_errcount_correctable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero count of number of low level driver communication correctable errors have occurred.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Programming Error Count

Implements geopm::LevelZero.

◆ ras_driver_errcount_uncorrectable()

double geopm::LevelZeroImp::ras_driver_errcount_uncorrectable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero count of number of low level driver communication uncorrectable errors have occurred.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Programming Error Count

Implements geopm::LevelZero.

◆ ras_noncompute_errcount_correctable()

double geopm::LevelZeroImp::ras_noncompute_errcount_correctable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero count of number of correctable errors that have occurred in the fixed-function accelerator hardware.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Non Compute Error Count

Implements geopm::LevelZero.

◆ ras_noncompute_errcount_uncorrectable()

double geopm::LevelZeroImp::ras_noncompute_errcount_uncorrectable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero count of number of uncorrectable errors that have occurred in the fixed-function accelerator hardware.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Non Compute Error Count

Implements geopm::LevelZero.

◆ ras_programming_errcount_correctable()

double geopm::LevelZeroImp::ras_programming_errcount_correctable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero count of number of correctable hardware exceptions generated by the way workloads have programmed the hardware.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Programming Error Count

Implements geopm::LevelZero.

◆ ras_programming_errcount_uncorrectable()

double geopm::LevelZeroImp::ras_programming_errcount_uncorrectable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero count of number of uncorrectable hardware exceptions generated by the way workloads have programmed the hardware.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Programming Error Count

Implements geopm::LevelZero.

◆ ras_reset_count_correctable()

double geopm::LevelZeroImp::ras_reset_count_correctable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero count of number of correctable accelerator engine resets attempted by the driver.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Accelerator Engine Reset count

Implements geopm::LevelZero.

◆ ras_reset_count_uncorrectable()

double geopm::LevelZeroImp::ras_reset_count_uncorrectable ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero count of number of uncorrectable accelerator engine resets attempted by the driver.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
Accelerator Engine Reset count

Implements geopm::LevelZero.

◆ temperature_domain_count()

int geopm::LevelZeroImp::temperature_domain_count ( unsigned int  l0_device_idx,
int  l0_domain 
) const
overridevirtual

Get the number of LevelZero temperature domains.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
Returns
GPU temperature domain count.

Implements geopm::LevelZero.

◆ temperature_max()

double geopm::LevelZeroImp::temperature_max ( unsigned int  l0_device_idx,
int  l0_domain,
int  l0_domain_idx 
) const
overridevirtual

Get the LevelZero device maximum temperature in Celsius.

Parameters
[in]l0_device_idxThe index indicating a particular Level Zero GPU.
[in]l0_domainThe LevelZero domain type being targeted
[in]l0_domain_idxThe LevelZero index indicating a particular domain of the GPU.
Returns
Domain maximum temperature in Celsius.

Implements geopm::LevelZero.


The documentation for this class was generated from the following files: