geopm  3.1.1.dev296+g5916b956
GEOPM - Global Extensible Open Power Manager
GPUActivityAgent.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 - 2024 Intel Corporation
3  * SPDX-License-Identifier: BSD-3-Clause
4  */
5 
6 #ifndef GPUACTIVITYAGENT_HPP_INCLUDE
7 #define GPUACTIVITYAGENT_HPP_INCLUDE
8 
9 #include <functional>
10 #include <vector>
11 
12 #include "geopm/Agent.hpp"
13 
14 namespace geopm
15 {
16  class PlatformTopo;
17  class PlatformIO;
18  class Waiter;
19 
21  class GPUActivityAgent : public Agent
22  {
23  public:
25  GPUActivityAgent(PlatformIO &plat_io, const PlatformTopo &topo,
26  std::shared_ptr<Waiter> waiter);
27  virtual ~GPUActivityAgent() = default;
28  void init(int level, const std::vector<int> &fan_in, bool is_level_root) override;
29  void validate_policy(std::vector<double> &in_policy) const override;
30  void split_policy(const std::vector<double> &in_policy,
31  std::vector<std::vector<double> > &out_policy) override;
32  bool do_send_policy(void) const override;
33  void aggregate_sample(const std::vector<std::vector<double> > &in_sample,
34  std::vector<double> &out_sample) override;
35  bool do_send_sample(void) const override;
36  void adjust_platform(const std::vector<double> &in_policy) override;
37  bool do_write_batch(void) const override;
38  void sample_platform(std::vector<double> &out_sample) override;
39  void wait(void) override;
40  std::vector<std::pair<std::string, std::string> > report_header(void) const override;
41  std::vector<std::pair<std::string, std::string> > report_host(void) const override;
42  std::map<uint64_t, std::vector<std::pair<std::string, std::string> > > report_region(void) const override;
43  std::vector<std::string> trace_names(void) const override;
44  void trace_values(std::vector<double> &values) override;
45  void enforce_policy(const std::vector<double> &policy) const override;
46  std::vector<std::function<std::string(double)> > trace_formats(void) const override;
47 
48  static std::string plugin_name(void);
49  static std::unique_ptr<Agent> make_plugin(void);
50  static std::vector<std::string> policy_names(void);
51  static std::vector<std::string> sample_names(void);
52  private:
53  PlatformIO &m_platform_io;
54  const PlatformTopo &m_platform_topo;
55  static constexpr double M_WAIT_SEC = 0.020; // 20ms wait default
56  const double M_POLICY_PHI_DEFAULT;
57  const int M_NUM_GPU;
58  const int M_NUM_GPU_CHIP;
59  const int M_NUM_CHIP_PER_GPU;
60  bool m_do_write_batch;
61  bool m_do_send_policy;
62 
63  int m_agent_domain_count;
64  int m_agent_domain;
65 
66  struct m_signal
67  {
68  int batch_idx;
69  double value;
70  };
71 
72  struct m_control
73  {
74  int batch_idx;
75  double last_setting;
76  };
77 
78  // Policy indices; must match policy_names()
79  enum m_policy_e {
80  M_POLICY_GPU_PHI,
81  M_NUM_POLICY
82  };
83 
84  // Sample indices; must match sample_names()
85  enum m_sample_e {
86  M_NUM_SAMPLE
87  };
88 
89  std::map<std::string, double> m_policy_available;
90 
91  double m_gpu_frequency_requests;
92  double m_gpu_frequency_clipped;
93  double m_freq_gpu_min;
94  double m_freq_gpu_max;
95  double m_freq_gpu_efficient;
96  double m_resolved_f_gpu_max;
97  double m_resolved_f_gpu_efficient;
98  double m_f_range;
99 
100  std::vector<m_signal> m_gpu_core_activity;
101  std::vector<m_signal> m_gpu_utilization;
102  std::vector<m_signal> m_gpu_energy;
103  m_signal m_time;
104 
105  std::vector<m_control> m_gpu_freq_min_control;
106  std::vector<m_control> m_gpu_freq_max_control;
107  std::shared_ptr<Waiter> m_waiter;
108 
109  void init_platform_io(void);
110  };
111 }
112 #endif
Definition: Agent.hpp:20
Agent.
Definition: GPUActivityAgent.hpp:22
void adjust_platform(const std::vector< double > &in_policy) override
Adjust the platform settings based the policy from above.
Definition: GPUActivityAgent.cpp:220
static std::string plugin_name(void)
Definition: GPUActivityAgent.cpp:431
void split_policy(const std::vector< double > &in_policy, std::vector< std::vector< double > > &out_policy) override
Called by Controller to split policy for children at next level down the tree.
Definition: GPUActivityAgent.cpp:192
std::map< uint64_t, std::vector< std::pair< std::string, std::string > > > report_region(void) const override
Custom fields for each region in the report.
Definition: GPUActivityAgent.cpp:405
bool do_send_sample(void) const override
Definition: GPUActivityAgent.cpp:215
void validate_policy(std::vector< double > &in_policy) const override
Called by Controller to validate incoming policy values and configure defaults requested in incoming ...
Definition: GPUActivityAgent.cpp:172
bool do_send_policy(void) const override
Called by Controller to determine if new policy values should be sent down the tree to the Agent's ch...
Definition: GPUActivityAgent.cpp:203
std::vector< std::pair< std::string, std::string > > report_header(void) const override
Custom fields that will be added to the report header when this agent is used.
Definition: GPUActivityAgent.cpp:384
static std::unique_ptr< Agent > make_plugin(void)
Definition: GPUActivityAgent.cpp:437
static std::vector< std::string > sample_names(void)
Definition: GPUActivityAgent.cpp:449
static std::vector< std::string > policy_names(void)
Definition: GPUActivityAgent.cpp:443
virtual ~GPUActivityAgent()=default
void wait(void) override
Called by Controller to wait for sample period to elapse. This controls the cadence of the Controller...
Definition: GPUActivityAgent.cpp:378
void trace_values(std::vector< double > &values) override
Called by Controller to get latest values to be added to the trace.
Definition: GPUActivityAgent.cpp:417
std::vector< std::pair< std::string, std::string > > report_host(void) const override
Custom fields for the host section of the report.
Definition: GPUActivityAgent.cpp:390
void sample_platform(std::vector< double > &out_sample) override
Read signals from the platform and interpret/aggregate these signals to create a sample which can be ...
Definition: GPUActivityAgent.cpp:356
GPUActivityAgent()
Definition: GPUActivityAgent.cpp:28
std::vector< std::function< std::string(double)> > trace_formats(void) const override
Returns format string for each column added to the trace.
Definition: GPUActivityAgent.cpp:425
void aggregate_sample(const std::vector< std::vector< double > > &in_sample, std::vector< double > &out_sample) override
Aggregate samples from children for the next level up the tree.
Definition: GPUActivityAgent.cpp:208
void init(int level, const std::vector< int > &fan_in, bool is_level_root) override
Set the level where this Agent is active and push signals/controls for that level.
Definition: GPUActivityAgent.cpp:63
bool do_write_batch(void) const override
Called by the Controller to decide whether to call write_batch() to update platform controls.
Definition: GPUActivityAgent.cpp:350
std::vector< std::string > trace_names(void) const override
Column headers to be added to the trace.
Definition: GPUActivityAgent.cpp:411
void enforce_policy(const std::vector< double > &policy) const override
Enforce the policy one time with PlatformIO::write_control(). Called to enforce static policies in th...
Definition: GPUActivityAgent.cpp:421
Definition: Accumulator.cpp:12