geopm  3.1.1.dev296+g5916b956
GEOPM - Global Extensible Open Power Manager
NVMLIOGroup.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 - 2024 Intel Corporation
3  * SPDX-License-Identifier: BSD-3-Clause
4  */
5 
6 #ifndef NVMLIOGROUP_HPP_INCLUDE
7 #define NVMLIOGROUP_HPP_INCLUDE
8 
9 #include <map>
10 #include <vector>
11 #include <string>
12 #include <memory>
13 
14 #include "geopm/IOGroup.hpp"
15 
16 namespace geopm
17 {
18  class PlatformTopo;
19  class NVMLDevicePool;
20  class SaveControl;
21 
23  class NVMLIOGroup : public IOGroup
24  {
25  public:
26  NVMLIOGroup();
28  const NVMLDevicePool &device_pool,
29  std::shared_ptr<SaveControl> save_control);
30  virtual ~NVMLIOGroup() = default;
31  std::set<std::string> signal_names(void) const override;
32  std::set<std::string> control_names(void) const override;
33  bool is_valid_signal(const std::string &signal_name) const override;
34  bool is_valid_control(const std::string &control_name) const override;
35  int signal_domain_type(const std::string &signal_name) const override;
36  int control_domain_type(const std::string &control_name) const override;
37  int push_signal(const std::string &signal_name, int domain_type, int domain_idx) override;
38  int push_control(const std::string &control_name, int domain_type, int domain_idx) override;
39  void read_batch(void) override;
40  void write_batch(void) override;
41  double sample(int batch_idx) override;
42  void adjust(int batch_idx, double setting) override;
43  double read_signal(const std::string &signal_name, int domain_type, int domain_idx) override;
44  void write_control(const std::string &control_name, int domain_type, int domain_idx, double setting) override;
45  void save_control(void) override;
46  void restore_control(void) override;
47  std::function<double(const std::vector<double> &)> agg_function(const std::string &signal_name) const override;
48  std::function<std::string(double)> format_function(const std::string &signal_name) const override;
49  std::string signal_description(const std::string &signal_name) const override;
50  std::string control_description(const std::string &control_name) const override;
51  int signal_behavior(const std::string &signal_name) const override;
52  void save_control(const std::string &save_path) override;
53  void restore_control(const std::string &save_path) override;
54  std::string name(void) const override;
55  static std::string plugin_name(void);
56  static std::unique_ptr<geopm::IOGroup> make_plugin(void);
57  private:
58  void register_signal_alias(const std::string &alias_name, const std::string &signal_name);
59  void register_control_alias(const std::string &alias_name, const std::string &control_name);
60 
61  std::map<pid_t, double> gpu_process_map(void) const;
62  double cpu_gpu_affinity(int cpu_idx, std::map<pid_t, double> process_map) const;
63 
64  static const std::string M_PLUGIN_NAME;
65  static const std::string M_NAME_PREFIX;
66  const PlatformTopo &m_platform_topo;
67  const NVMLDevicePool &m_nvml_device_pool;
68  bool m_is_batch_read;
69  std::vector<double> m_frequency_max_control_request;
70  std::vector<double> m_frequency_min_control_request;
71  std::vector<double> m_initial_power_limit;
72  std::vector<std::vector<unsigned int> > m_supported_freq;
73  std::vector<double> m_frequency_step;
74 
75  struct signal_s
76  {
77  double m_value;
78  bool m_do_read;
79  };
80 
81  struct control_s
82  {
83  double m_setting;
84  bool m_is_adjusted;
85  };
86 
87  struct signal_info {
88  std::string m_description;
89  std::vector<std::shared_ptr<signal_s> > signals;
90  int domain;
91  std::function<double(const std::vector<double> &)> agg_function;
92  int behavior;
93  std::function<std::string(double)> format_function;
94  };
95 
96  struct control_info {
97  std::string m_description;
98  std::vector<std::shared_ptr<control_s> > controls;
99  int domain;
100  std::function<double(const std::vector<double> &)> agg_function;
101  std::function<std::string(double)> format_function;
102  };
103 
104  std::map<std::string, signal_info> m_signal_available;
105  std::map<std::string, control_info> m_control_available;
106  std::vector<std::shared_ptr<signal_s> > m_signal_pushed;
107  std::vector<std::shared_ptr<control_s> > m_control_pushed;
108 
109  std::shared_ptr<SaveControl> m_mock_save_ctl;
110  };
111 }
112 #endif
Definition: IOGroup.hpp:21
Definition: NVMLDevicePool.hpp:19
IOGroup that provides signals and controls for NVML GPUs.
Definition: NVMLIOGroup.hpp:24
NVMLIOGroup()
Definition: NVMLIOGroup.cpp:33
bool is_valid_signal(const std::string &signal_name) const override
Test if signal_name refers to a signal supported by the group.
Definition: NVMLIOGroup.cpp:361
void adjust(int batch_idx, double setting) override
Adjust a setting for a particular control that was previously pushed with push_control()....
Definition: NVMLIOGroup.cpp:609
std::string name(void) const override
Get the IOGroup name.
Definition: NVMLIOGroup.cpp:893
double read_signal(const std::string &signal_name, int domain_type, int domain_idx) override
Read from platform and interpret into SI units a signal given its name and domain....
Definition: NVMLIOGroup.cpp:621
virtual ~NVMLIOGroup()=default
std::function< double(const std::vector< double > &)> agg_function(const std::string &signal_name) const override
Return a function that should be used when aggregating the given signal.
Definition: NVMLIOGroup.cpp:804
int push_signal(const std::string &signal_name, int domain_type, int domain_idx) override
Add a signal to the list of signals that is read by read_batch() and sampled by sample().
Definition: NVMLIOGroup.cpp:395
int push_control(const std::string &control_name, int domain_type, int domain_idx) override
Add a control to the list of controls that is written by write_batch() and configured with adjust().
Definition: NVMLIOGroup.cpp:439
std::function< std::string(double)> format_function(const std::string &signal_name) const override
Returns a function that can be used to convert a signal of the given name into a printable string....
Definition: NVMLIOGroup.cpp:816
bool is_valid_control(const std::string &control_name) const override
Test if control_name refers to a control supported by the group.
Definition: NVMLIOGroup.cpp:367
void restore_control(void) override
Restore all controls to values recorded in previous call to the save() method.
Definition: NVMLIOGroup.cpp:781
static std::string plugin_name(void)
Definition: NVMLIOGroup.cpp:899
std::set< std::string > control_names(void) const override
Returns the names of all controls provided by the IOGroup.
Definition: NVMLIOGroup.cpp:351
std::string signal_description(const std::string &signal_name) const override
Returns a description of the signal. This string can be used by tools to generate help text for users...
Definition: NVMLIOGroup.cpp:828
int control_domain_type(const std::string &control_name) const override
Query the domain for a named control.
Definition: NVMLIOGroup.cpp:384
void save_control(void) override
Save the state of all controls so that any subsequent changes made through the IOGroup can be undone ...
Definition: NVMLIOGroup.cpp:771
std::string control_description(const std::string &control_name) const override
Returns a description of the control. This string can be used by tools to generate help text for user...
Definition: NVMLIOGroup.cpp:839
std::set< std::string > signal_names(void) const override
Returns the names of all signals provided by the IOGroup.
Definition: NVMLIOGroup.cpp:341
static std::unique_ptr< geopm::IOGroup > make_plugin(void)
Definition: NVMLIOGroup.cpp:905
void write_batch(void) override
Write all of the pushed controls so that values previously given to adjust() are written to the platf...
Definition: NVMLIOGroup.cpp:558
int signal_behavior(const std::string &signal_name) const override
Returns a hint about how a signal will change as a function of time.
Definition: NVMLIOGroup.cpp:850
void read_batch(void) override
Read all pushed signals from the platform so that the next call to sample() will reflect the updated ...
Definition: NVMLIOGroup.cpp:529
double sample(int batch_idx) override
Retrieve signal value from data read by last call to read_batch() for a particular signal previously ...
Definition: NVMLIOGroup.cpp:593
void write_control(const std::string &control_name, int domain_type, int domain_idx, double setting) override
Interpret the setting and write setting to the platform. Does not modify the values stored by calling...
Definition: NVMLIOGroup.cpp:716
int signal_domain_type(const std::string &signal_name) const override
Query the domain for a named signal.
Definition: NVMLIOGroup.cpp:373
Definition: PlatformTopo.hpp:28
Definition: Agg.cpp:20
const PlatformTopo & platform_topo(void)
Definition: PlatformTopo.cpp:81