P4C
The P4 Compiler
Loading...
Searching...
No Matches
allocators.h
1
19#ifndef BACKENDS_TOFINO_BF_P4C_PARDE_COMMON_ALLOCATORS_H_
20#define BACKENDS_TOFINO_BF_P4C_PARDE_COMMON_ALLOCATORS_H_
21
22#include "backends/tofino/bf-p4c/common/ir_utils.h"
23#include "backends/tofino/bf-p4c/parde/clot/clot_info.h"
24#include "backends/tofino/bf-p4c/parde/parde_visitor.h"
25#include "ir/ir.h"
26
31 void sort_state_primitives() {
32 for (auto p : state->extracts) {
33 if (auto e = p->to<IR::BFN::LoweredExtractPhv>()) {
34 phv_extracts.push_back(e);
35 } else if (auto e = p->to<IR::BFN::LoweredExtractClot>()) {
36 clot_extracts.push_back(e);
37 }
38 }
39 for (auto p : state->checksums) {
40 if (auto c = p->to<IR::BFN::LoweredParserChecksum>()) {
41 checksums.push_back(c);
42 }
43 }
44 for (auto p : state->counters) {
45 if (auto pc = p->to<IR::BFN::ParserCounterPrimitive>()) {
46 counters.push_back(pc);
47 }
48 }
49 }
50
51 struct OutOfBuffer : Inspector {
52 bool lo = false, hi = false;
53 bool preorder(const IR::BFN::LoweredPacketRVal *rval) override {
54 auto max = Device::pardeSpec().byteInputBufferSize() * 8;
55 hi |= rval->range.hi >= max;
56 lo |= rval->range.lo >= max;
57 return false;
58 }
59 };
60
61 template <typename T>
62 static bool within_buffer(const T *p) {
63 OutOfBuffer oob;
64 p->apply(oob);
65 return !oob.hi;
66 }
67
68 struct Allocator {
69 LoweredParserMatchAllocator &lpm_allocator;
70
72
73 explicit Allocator(LoweredParserMatchAllocator &lpm_allocator)
74 : lpm_allocator(lpm_allocator) {}
75
76 virtual void allocate() = 0;
77
78 void add_to_result() {
79 for (auto c : current) lpm_allocator.current_statements.push_back(c);
80
81 for (auto s : spilled) lpm_allocator.spilled_statements.push_back(s);
82 }
83 };
84
85 struct ExtractAllocator : Allocator {
87 container_to_extracts;
88
89 virtual std::pair<size_t, unsigned> inbuf_extractor_use(size_t container_size) = 0;
90
91 virtual std::map<size_t, unsigned> constant_extractor_use_choices(
92 uint32_t value, size_t container_size) = 0;
93
94 std::map<size_t, unsigned> constant_extractor_use_choices(
95 PHV::Container container,
97 std::map<size_t, unsigned> rv;
98 bool has_clr_on_write = false;
99
100 unsigned c = merge_const_source(extracts, has_clr_on_write);
101
102 if (c || has_clr_on_write) {
103 rv = constant_extractor_use_choices(c, container.size());
104
105 LOG4("constant: " << c);
106
107 for (auto &[size, count] : rv)
108 LOG4("extractors needed: " << size << " : " << count);
109 }
110
111 return rv;
112 }
113
114 std::map<size_t, unsigned> inbuf_extractor_needed(
115 PHV::Container container,
117 std::map<size_t, unsigned> rv;
118
119 if (has_inbuf_extract(extracts)) {
120 auto iu = inbuf_extractor_use(container.size());
121 rv.insert(iu);
122 }
123
124 return rv;
125 }
126
127 bool has_inbuf_extract(const ordered_set<const IR::BFN::LoweredExtractPhv *> &extracts) {
128 for (auto e : extracts) {
129 if (e->source->is<IR::BFN::LoweredInputBufferRVal>()) return true;
130 }
131 return false;
132 }
133
134 unsigned merge_const_source(const ordered_set<const IR::BFN::LoweredExtractPhv *> &extracts,
135 bool &has_clr_on_write) {
136 unsigned merged = 0;
137
138 for (auto e : extracts) {
139 if (auto c = e->source->to<IR::BFN::LoweredConstantRVal>()) {
140 merged = c->constant;
141
142 if (e->write_mode == IR::BFN::ParserWriteMode::CLEAR_ON_WRITE)
143 has_clr_on_write = true;
144 }
145 }
146
147 return merged;
148 }
149
150 bool extract_out_of_buffer(const IR::BFN::LoweredExtractPhv *e) {
151 GetExtractBufferPos get_buf_pos;
152 e->apply(get_buf_pos);
153
154 return get_buf_pos.max > Device::pardeSpec().byteInputBufferSize() * 8;
155 }
156
157 void allocate() override {
158 std::map<size_t, unsigned> extractors_by_size;
159
160 std::map<size_t, unsigned> constants_by_size;
161
162 std::map<size_t, unsigned> csum_verify_by_size;
163
164 // reserve extractor for checksum verification
165 for (auto c : lpm_allocator.current_statements) {
166 if (auto lpc = c->to<IR::BFN::LoweredParserChecksum>()) {
167 const IR::BFN::ContainerRef *dest = lpc->phv_dest;
168 if (lpc->type == IR::BFN::ChecksumMode::VERIFY && lpc->csum_err)
169 dest = lpc->csum_err->container;
170 if (!dest) continue;
171
172 auto container = dest->container;
173 BUG_CHECK(container.size() != 32,
174 "checksum verification cannot be 32-bit container");
175
176 extractors_by_size[container.size()]++;
177 csum_verify_by_size[container.size()]++;
178
179 // reserve a dummy for checksum verification
180 // see MODEL-210 for discussion.
181
182 if (Device::currentDevice() == Device::TOFINO)
183 extractors_by_size[container.size()]++;
184
185 LOG2("reserved " << container.size()
186 << "b extractor for checksum verification");
187 }
188 }
189
190 for (auto &kv : container_to_extracts) {
191 auto container = kv.first;
192 auto &extracts = kv.second;
193
194 auto ibuf_needed = inbuf_extractor_needed(container, extracts);
195
196 auto constant_choices = constant_extractor_use_choices(container, extracts);
197
198 bool constant_avail = true;
199
200 if (!constant_choices.empty()) {
201 if (Device::currentDevice() == Device::TOFINO) {
202 std::map<size_t, unsigned> valid_choices;
203
204 for (auto &choice : constant_choices) {
205 if (choice.first == 16 || choice.first == 32) {
206 if (choice.second + constants_by_size[choice.first] > 2) continue;
207 }
208 // For narrow-to-wide extractions: verify
209 // extractors with constants are correctly aligned.
210 //
211 // Only a problem when generating 1 checksum validation result
212 // because this pushes extractor results back be 1 position.
213 // E.g.,
214 // extractor 0's output -> result bus 1
215 // extractor 1's output -> result bus 2
216 // The narrow-to-wide pair is not on an even+odd result bus pair.
217 //
218 // Only consider 16b extractors: limited to 2 constants.
219 // Don't consider 32b extractors: no 64b containers.
220 // Don't consider 8b extractors: all support constants.
221 if (choice.first < container.size() &&
222 csum_verify_by_size[choice.first] == 1 && choice.first == 16)
223 continue;
224
225 valid_choices.insert(choice);
226 }
227
228 constant_choices = valid_choices;
229 constant_avail = !valid_choices.empty();
230 } else if (Device::currentDevice() == Device::JBAY) {
231 constant_avail = constant_choices.at(16) + constants_by_size[16] <= 2;
232 }
233 }
234
235 bool extractor_avail = true;
236
237 std::pair<size_t, unsigned> constant_needed;
238
239 std::map<size_t, unsigned> total_needed;
240
241 if (!constant_choices.empty()) {
242 extractor_avail = false;
243
244 for (auto it = constant_choices.rbegin(); it != constant_choices.rend(); ++it) {
245 auto choice = *it;
246
247 total_needed = ibuf_needed;
248
249 if (total_needed.count(choice.first))
250 total_needed[choice.first] += choice.second;
251 else
252 total_needed.insert(choice);
253
254 bool choice_ok = true;
255
256 for (auto &kv : total_needed) {
257 auto avail = Device::pardeSpec().extractorSpec().at(kv.first);
258 if (extractors_by_size[kv.first] + kv.second > avail) {
259 choice_ok = false;
260 break;
261 }
262 }
263
264 if (choice_ok) {
265 extractor_avail = true;
266 constant_needed = choice;
267 break;
268 }
269 }
270 } else {
271 total_needed = ibuf_needed;
272
273 for (auto &kv : total_needed) {
274 auto avail = Device::pardeSpec().extractorSpec().at(kv.first);
275 if (extractors_by_size[kv.first] + kv.second > avail) {
276 extractor_avail = false;
277 break;
278 }
279 }
280 }
281
282 bool oob = false;
283
284 if (extractor_avail && constant_avail) {
285 for (auto e : extracts) {
286 if (extract_out_of_buffer(e)) {
287 oob = true;
288 break;
289 }
290 }
291 }
292
293 if (!oob && extractor_avail && constant_avail) {
294 // allocate
295 for (auto e : extracts) current.push_back(e);
296
297 for (auto &kv : total_needed) extractors_by_size[kv.first] += kv.second;
298
299 constants_by_size[constant_needed.first] += constant_needed.second;
300 } else {
301 std::stringstream reason;
302
303 if (oob) reason << "(out of buffer) ";
304 if (!extractor_avail) reason << "(ran out of extractors) ";
305 if (!constant_avail) reason << "(ran out of constants) ";
306
307 for (auto e : extracts) LOG3("spill " << e << " { " << reason.str() << "}");
308
309 // spill
310 for (auto e : extracts) spilled.push_back(e);
311 }
312 }
313 }
314
315 explicit ExtractAllocator(LoweredParserMatchAllocator &lpm_allocator)
316 : Allocator(lpm_allocator) {
317 PHV::FieldUse use(PHV::FieldUse::WRITE);
318 for (auto e : lpm_allocator.phv_extracts) {
319 auto container = e->dest->container;
320 container_to_extracts[container].insert(e);
321 }
322 }
323 };
324
325 class TofinoExtractAllocator : public ExtractAllocator {
326 bool can_extract(unsigned val, unsigned extractor_size) {
327 if (val == 0) return true;
328
329 switch (extractor_size) {
330 case 32:
331 for (int i = 0; i < 32; i++) {
332 if ((val & 1) && (0x7 & val) == val) return true;
333 val = ((val >> 1) | (val << 31)) & 0xffffffffU;
334 }
335 return false;
336 case 16:
337 if ((val >> 16) && !can_extract(val >> 16, extractor_size)) return false;
338 val &= 0xffff;
339 for (int i = 0; i < 16; i++) {
340 if ((val & 1) && (0xf & val) == val) return true;
341 val = ((val >> 1) | (val << 15)) & 0xffffU;
342 }
343 return false;
344 case 8:
345 return true;
346 }
347
348 return false;
349 }
350
351 std::map<size_t, unsigned> constant_extractor_use_choices(unsigned value,
352 size_t container_size) override {
353 std::map<size_t, unsigned> rv;
354
355 for (const auto extractor_size : {PHV::Size::b32, PHV::Size::b16, PHV::Size::b8}) {
356 // can not use larger extractor on smaller container
357 if (container_size < size_t(extractor_size)) continue;
358
359 if (can_extract(value, unsigned(extractor_size)))
360 rv[size_t(extractor_size)] = container_size / unsigned(extractor_size);
361 }
362
363 BUG_CHECK(!rv.empty(), "Impossible constant value write in parser: %1%", value);
364
365 return rv;
366 }
367
368 std::pair<size_t, unsigned> inbuf_extractor_use(size_t container_size) override {
369 return {container_size, 1};
370 }
371
372 public:
373 explicit TofinoExtractAllocator(LoweredParserMatchAllocator &lpm_allocator)
374 : ExtractAllocator(lpm_allocator) {
375 allocate();
376 add_to_result();
377 }
378 };
379
380 class JBayExtractAllocator : public ExtractAllocator {
381 std::map<size_t, unsigned> constant_extractor_use_choices(uint32_t value,
382 size_t container_size) override {
383 std::map<size_t, unsigned> rv;
384
385 unsigned num = 0;
386
387 if (container_size == size_t(PHV::Size::b32) && value)
388 num = bool(value & 0xffff) + bool(value >> 16);
389 else
390 num = 1;
391
392 rv[16] = num;
393
394 return rv;
395 }
396
397 std::pair<size_t, unsigned> inbuf_extractor_use(size_t container_size) override {
398 return {16, container_size == 32 ? 2 : 1};
399 }
400
401 public:
402 explicit JBayExtractAllocator(LoweredParserMatchAllocator &lpm_allocator)
403 : ExtractAllocator(lpm_allocator) {
404 allocate();
405 add_to_result();
406 }
407 };
408
409 void allocate() {
410 sort_state_primitives();
411 for (const auto &checksum : checksums) current_statements.push_back(checksum);
412
413 if (Device::currentDevice() == Device::TOFINO) {
414 TofinoExtractAllocator tea(*this);
415 } else if (Device::currentDevice() == Device::JBAY) {
416 JBayExtractAllocator jea(*this);
417 } else {
418 BUG("Unknown device");
419 }
420
421 for (auto o : others) {
422 if (within_buffer(o)) {
423 current_statements.push_back(o);
424 } else {
425 spilled_statements.push_back(o);
426 LOG3("spill " << o << " (out of buffer)");
427 }
428 }
429 }
430
431 struct GetExtractBufferPos : Inspector {
432 int min = Device::pardeSpec().byteInputBufferSize() * 8;
433 int max = -1;
434
435 bool preorder(const IR::BFN::LoweredExtractPhv *extract) override {
436 if (auto rval = extract->source->to<IR::BFN::LoweredPacketRVal>()) {
437 min = std::min(min, rval->range.lo);
438 max = std::max(max, rval->range.hi);
439 }
440 return false;
441 }
442 };
443
444 public:
445 LoweredParserMatchAllocator(const IR::BFN::LoweredParserMatch *s, ClotInfo &clot)
446 : state(s), clot(clot) {
447 allocate();
448 }
449
450 const IR::BFN::LoweredParserMatch *state;
451
452 ClotInfo &clot;
453
454 std::vector<const IR::BFN::LoweredExtractPhv *> phv_extracts;
455 std::vector<const IR::BFN::LoweredExtractClot *> clot_extracts;
456 std::vector<const IR::BFN::LoweredParserChecksum *> checksums;
457 std::vector<const IR::BFN::ParserCounterPrimitive *> counters;
458 std::vector<const IR::BFN::LoweredParserPrimitive *> others;
459
460 IR::Vector<IR::BFN::LoweredParserPrimitive> current_statements, spilled_statements;
461 bool spill_selects = false;
462};
463
464#endif /* BACKENDS_TOFINO_BF_P4C_PARDE_COMMON_ALLOCATORS_H_ */
Definition clot_info.h:41
Definition vector.h:59
Definition visitor.h:400
Definition ordered_map.h:32
Definition ordered_set.h:32
Definition phv.h:176
Definition phv.h:248
int byteInputBufferSize() const
The size of input buffer, in bytes.
Definition parde_spec.h:410
virtual const std::map< unsigned, unsigned > & extractorSpec() const =0
Definition allocators.h:30