P4C
The P4 Compiler
Loading...
Searching...
No Matches
mau_power.h
1
19#ifndef BF_P4C_MAU_MAU_POWER_H_
20#define BF_P4C_MAU_MAU_POWER_H_
21
22#include <fstream>
23#include <map>
24#include <ostream>
25#include <string>
26#include <vector>
27
28#include "backends/tofino/bf-p4c/device.h"
29#include "ir/ir.h"
30#include "ir/unique_id.h"
31#include "lib/dyn_vector.h"
32#include "power_schema.h"
33
34namespace MauPower {
35
36using namespace P4;
37
38enum mau_dep_t { DEP_CONCURRENT = 0, DEP_ACTION = 1, DEP_MATCH = 2 };
39enum stage_feature_t {
40 HAS_EXACT = 0,
41 HAS_TCAM = 1,
42 HAS_STATS = 2,
43 HAS_SEL = 3,
44 HAS_LPF_OR_WRED = 4,
45 HAS_STFUL = 5,
46 WIDE_SEL = 6
47};
48enum lut_t { NEXT_TABLE_LUT = 0, GLOB_EXEC_LUT = 1, LONG_BRANCH_LUT = 2 };
49
50std::ostream &operator<<(std::ostream &, mau_dep_t);
51// The internal encoding of stage numbers is:
52// 0 to n-1 for ingress stages
53// n to 2n-1 for egress stages
54// 2n to 3n-1 for ghost stages
55// However, note that function signatures assume the stage argument
56// runs from 0 to n-1. This is checked.
57std::string float2str(double d);
58
65 public:
66 static const int kNumberGress = GRESS_T_COUNT;
67 using PowerLogging = Logging::Power_Schema_Logger;
68 // For calculating latencies
69 // map from gress and stage number to Boolean indicating if resource type is in use.
70 bitvec has_exact_[kNumberGress];
71 // has_tcam_ would also include ternary result buses in ram array.
72 bitvec has_tcam_[kNumberGress];
73 bitvec has_meter_lpf_or_wred_[kNumberGress];
74 bitvec has_selector_[kNumberGress];
75 dyn_vector<int> max_selector_words_[kNumberGress]; // max across tables in stage
76 bitvec has_stateful_[kNumberGress];
77 bitvec has_stats_[kNumberGress];
78
79 // map from UniqueId to Boolean indicating
80 // if it will run at EOP.
81 // The UniqueId is the attached table's UniqueId.
82 std::map<UniqueId, bool> counter_runs_at_eop_;
83 std::map<UniqueId, bool> meter_runs_at_eop_;
84 // for keeping track if have LPF or WRED 'meter'
85 std::map<UniqueId, bool> meter_is_lpf_or_wred_;
86 // stores maximum number of selector members
87 // (more than 120 requires multiple RAM words)
88 std::map<UniqueId, int> selector_group_size_;
89
90 // Maps stage number to vector of logical tables found in the stage
91 dyn_vector<std::vector<const IR::MAU::Table *>> stage_to_tables_[kNumberGress];
92 // Maps UniqueId to stage number it's in.
93 std::map<UniqueId, int> table_to_stage_;
94 // Maps UniqueId back to the table
95 std::map<UniqueId, const IR::MAU::Table *> uid_to_table_;
96
97 private:
98 // Maps stage number to its dependency type to previous stage.
99 // Note the encoding is as described above.
100 std::map<int, mau_dep_t> stage_dep_to_previous_[kNumberGress];
101
102 public:
109 bool stage_has_feature(gress_t gress, int stage, stage_feature_t feature) const;
117 int get_max_selector_words(gress_t gress, int stage) const;
123 mau_dep_t get_dependency_for_gress_stage(gress_t g, int stage) const;
124 void set_dependency_for_gress_stage(gress_t g, int stage, mau_dep_t dep);
137
142 int compute_pipe_latency(gress_t gress) const;
148 int compute_stage_latency(gress_t gress, int stage) const;
154 int compute_pred_cycle(gress_t gress, int stage) const;
164 bool stage_has_chained_feature(gress_t gress, int stage, stage_feature_t feature) const;
168 void print_features(std::ostream &out, gress_t gress) const;
172 void print_latency(std::ostream &out, gress_t gress) const;
176 void log_json_stage_characteristics(gress_t g, PowerLogging *logger) const;
181 std::ostream &emit_dep_asm(std::ostream &out, gress_t g, int stage) const;
186 bool requires_dep_asm(gress_t g, int stage) const;
191 bool are_there_more_tables(gress_t gress, int start_stage) const;
192};
193
202 public:
203 const gress_t gress_;
204 MauFeatures &mau_features_;
205 MprSettings(gress_t gress, MauFeatures &);
212 void set_mpr_stage(int stage, int mpr_stage);
213 int get_mpr_stage(int stage) const;
221 void set_mpr_next_table(int stage, int logical_id, int id_vector);
222 void set_or_mpr_next_table(int stage, int logical_id, int id_vector);
223 int get_mpr_next_table(int stage, int logical_id) const;
231 void set_mpr_global_exec(int stage, int exec_bit, int id_vector);
232 void set_or_mpr_global_exec(int stage, int exec_bit, int id_vector);
233 int get_mpr_global_exec(int stage, int exec_bit) const;
241 void set_mpr_long_branch(int stage, int tag_id, int id_vector);
242 int get_mpr_long_branch(int stage, int tag_id) const;
251 void set_or_mpr_long_branch(int stage, int tag_id, int id_vector);
257 void set_mpr_always_run(int stage, int id_vector);
264 void set_or_mpr_always_run(int stage, int id_vector);
265 int get_mpr_always_run_for_stage(int stage) const;
278 void set_mpr_bus_dep_glob_exec(int stage, int id_vector);
279 void set_or_mpr_bus_dep_glob_exec(int stage, int id_vector);
280 int get_mpr_bus_dep_glob_exec(int stage) const;
293 void set_mpr_bus_dep_long_brch(int stage, int id_vector);
294 void set_or_mpr_bus_dep_long_brch(int stage, int id_vector);
295 int get_mpr_bus_dep_long_brch(int stage) const;
296
297 friend std::ostream &operator<<(std::ostream &out, const MprSettings &m);
304 bool need_to_emit(lut_t type, int stage) const;
305 std::ostream &emit_stage_asm(std::ostream &out, int stage) const;
306
307 private:
319 dyn_vector<int> mpr_stage_id_;
320
330 dyn_vector<dyn_vector<int>> mpr_next_table_;
331
339 dyn_vector<dyn_vector<int>> mpr_global_exec_;
340
347 dyn_vector<dyn_vector<int>> mpr_long_branch_;
348
353 dyn_vector<int> mpr_always_run_;
354
363 dyn_vector<int> mpr_bus_dep_glob_exec_;
364
373 dyn_vector<int> mpr_bus_dep_long_brch_;
374
375 void print_data(std::ostream &out, int cols, std::string id_name, std::vector<int> data,
376 bool use_bin) const;
377
378 public:
379 /* for each stage, which mpr_glob_exec bits output by this stage are needed by later stages.
380 * for action dependent stages, this will be identical to the previous match dependent stage.
381 */
382 dyn_vector<int> glob_exec_use;
383 /* for each stage, which mpr_long_branch tags output by this stage are needed by later stages.
384 * for action dependent stages, this will be identical to the previous match dependent stage.
385 */
386 dyn_vector<int> long_branch_use;
387};
388
389/*
390 * rams_read
391 * The total number of RAMs read.
392 * rams_write
393 * The total number of RAMs written. Only attached synth-2-port
394 * resources can write RAMs.
395 * tcam_read
396 * The total number of TCAMs read. Ternary tables have to read
397 * all TCAMs.
398 * map_rams_read
399 * The total number of MapRAMs read. MapRAMs are read by synth-2-port,
400 * idletime, and meter color.
401 * map_rams_write
402 * The total number of MapRAMs written. MapRAMs are written by
403 * synth-2-port, idletime, and meter color.
404 * deferred_rams_read
405 * The total number of deferred RAMs read. Deferred RAMs are read
406 * by meters and counters that run at EOP time.
407 * deferred_rams_write
408 * The total number of DeferredRAMs written. Deferred RAMs are written
409 * by meters and counters that run at EOP time.
410 */
412 using PowerLogging = Logging::Power_Schema_Logger;
413 int ram_read = 0;
414 int ram_write = 0;
415 int tcam_read = 0;
416 int map_ram_read = 0;
417 int map_ram_write = 0;
418 int deferred_ram_read = 0;
419 int deferred_ram_write = 0;
420
422 : ram_read(0),
423 ram_write(0),
424 tcam_read(0),
425 map_ram_read(0),
426 map_ram_write(0),
427 deferred_ram_read(0),
428 deferred_ram_write(0) {}
429
430 explicit PowerMemoryAccess(int ram_read, int ram_write, int tcam_read, int map_ram_read,
431 int map_ram_write, int deferred_ram_read, int deferred_ram_write)
432 : ram_read(ram_read),
433 ram_write(ram_write),
434 tcam_read(tcam_read),
435 map_ram_read(map_ram_read),
436 map_ram_write(map_ram_write),
437 deferred_ram_read(deferred_ram_read),
438 deferred_ram_write(deferred_ram_write) {}
439
440 friend std::ostream &operator<<(std::ostream &out, const PowerMemoryAccess &p) {
441 out << "Memory access:" << std::endl;
442 out << " RAMs read " << p.ram_read << std::endl;
443 out << " RAMs write " << p.ram_write << std::endl;
444 out << " TCAMs read " << p.tcam_read << std::endl;
445 out << " MapRAMs read " << p.map_ram_read << std::endl;
446 out << " MapRAMs write " << p.map_ram_write << std::endl;
447 out << " Deferred RAMs read " << p.deferred_ram_read << std::endl;
448 out << " Deferred RAMs write " << p.deferred_ram_write << std::endl;
449 return out;
450 }
451
452 PowerMemoryAccess &operator+=(const PowerMemoryAccess &p) {
453 ram_read += p.ram_read;
454 ram_write += p.ram_write;
455 tcam_read += p.tcam_read;
456 map_ram_read += p.map_ram_read;
457 map_ram_write += p.map_ram_write;
458 deferred_ram_read += p.deferred_ram_read;
459 deferred_ram_write += p.deferred_ram_write;
460 return *this;
461 }
462
463 PowerMemoryAccess operator+(const PowerMemoryAccess &p) const {
464 PowerMemoryAccess rv = *this;
465 rv += p;
466 return rv;
467 }
468
488 double compute_table_power(int num_pipes) const;
489
498 double compute_table_weight(double computed_power, int num_pipes) const;
499 // Sets up JSON logging information.
500 void log_json_memories(PowerLogging::StageDetails *) const;
501};
502
503} // end namespace MauPower
504
505#endif /* BF_P4C_MAU_MAU_POWER_H_ */
Definition mau_power.h:64
int compute_stage_latency(gress_t gress, int stage) const
Definition mau_power.cpp:185
void log_json_stage_characteristics(gress_t g, PowerLogging *logger) const
Definition mau_power.cpp:375
bool stage_has_chained_feature(gress_t gress, int stage, stage_feature_t feature) const
Definition mau_power.cpp:208
std::ostream & emit_dep_asm(std::ostream &out, gress_t g, int stage) const
Definition mau_power.cpp:83
bool try_convert_to_match_dep()
Definition mau_power.cpp:111
int compute_pred_cycle(gress_t gress, int stage) const
Definition mau_power.cpp:170
bool requires_dep_asm(gress_t g, int stage) const
Definition mau_power.cpp:93
bool stage_has_feature(gress_t gress, int stage, stage_feature_t feature) const
Definition mau_power.cpp:61
void update_deps_for_device()
Definition mau_power.cpp:132
void print_features(std::ostream &out, gress_t gress) const
Definition mau_power.cpp:249
int compute_pipe_latency(gress_t gress) const
Definition mau_power.cpp:154
int get_max_selector_words(gress_t gress, int stage) const
Definition mau_power.cpp:57
void print_latency(std::ostream &out, gress_t gress) const
Definition mau_power.cpp:321
bool are_there_more_tables(gress_t gress, int start_stage) const
Definition mau_power.cpp:242
mau_dep_t get_dependency_for_gress_stage(gress_t g, int stage) const
Definition mau_power.cpp:104
Definition mau_power.h:201
void set_mpr_global_exec(int stage, int exec_bit, int id_vector)
Definition mau_power.cpp:511
void set_or_mpr_always_run(int stage, int id_vector)
Definition mau_power.cpp:488
void set_mpr_long_branch(int stage, int tag_id, int id_vector)
Definition mau_power.cpp:527
void set_mpr_always_run(int stage, int id_vector)
Definition mau_power.cpp:483
void set_or_mpr_long_branch(int stage, int tag_id, int id_vector)
Definition mau_power.cpp:537
void set_mpr_next_table(int stage, int logical_id, int id_vector)
Definition mau_power.cpp:495
void set_mpr_stage(int stage, int mpr_stage)
Definition mau_power.cpp:476
bool need_to_emit(lut_t type, int stage) const
Definition mau_power.cpp:419
void set_mpr_bus_dep_glob_exec(int stage, int id_vector)
Definition mau_power.cpp:543
void set_mpr_bus_dep_long_brch(int stage, int id_vector)
Definition mau_power.cpp:557
Definition bitvec.h:120
Definition dyn_vector.h:27
Definition mau/asm_output.h:39
TODO: this is not really specific to BMV2, it should reside somewhere else.
Definition applyOptionsPragmas.cpp:24
Definition mau_power.h:411
double compute_table_weight(double computed_power, int num_pipes) const
Definition mau_power.cpp:739
double compute_table_power(int num_pipes) const
Definition mau_power.cpp:722