P4C
The P4 Compiler
Loading...
Searching...
No Matches
tofino/bf-asm/jbay/deparser.cpp
1
17
18/* deparser template specializations for jbay -- #included directly in top-level deparser.cpp */
19
20#define YES(X) X
21#define NO(X)
22
23#define JBAY_POV(GRESS, VAL, REG) \
24 if (VAL.pov.size() == 1) \
25 REG.pov = deparser.pov[GRESS].at(&VAL.pov.front()->reg) + VAL.pov.front()->lo; \
26 else \
27 error(VAL.val.lineno, "POV bit required for Tofino2");
28
29#define JBAY_SIMPLE_INTRINSIC(GRESS, VAL, REG, IFSHIFT) \
30 REG.phv = VAL.val->reg.deparser_id(); \
31 JBAY_POV(GRESS, VAL, REG) \
32 IFSHIFT(REG.shft = intrin.vals[0].val->lo;)
33
34#define JBAY_ARRAY_INTRINSIC(GRESS, VAL, ARRAY, REG, POV, IFSHIFT) \
35 for (auto &r : ARRAY) { \
36 r.REG.phv = VAL.val->reg.deparser_id(); \
37 IFSHIFT(r.REG.shft = intrin.vals[0].val->lo;) \
38 } \
39 JBAY_POV(GRESS, VAL, POV)
40
41#define EI_INTRINSIC(NAME, IFSHIFT) \
42 DEPARSER_INTRINSIC(JBay, EGRESS, NAME, 1) { \
43 JBAY_SIMPLE_INTRINSIC(EGRESS, intrin.vals[0], regs.dprsrreg.inp.ipp.egr.m_##NAME, IFSHIFT) \
44 }
45#define HO_E_INTRINSIC(NAME, IFSHIFT) \
46 DEPARSER_INTRINSIC(JBay, EGRESS, NAME, 1) { \
47 JBAY_ARRAY_INTRINSIC(EGRESS, intrin.vals[0], regs.dprsrreg.ho_e, her.meta.m_##NAME, \
48 regs.dprsrreg.inp.icr.egr_meta_pov.m_##NAME, IFSHIFT) \
49 }
50#define II_INTRINSIC(NAME, IFSHIFT) \
51 DEPARSER_INTRINSIC(JBay, INGRESS, NAME, 1) { \
52 JBAY_SIMPLE_INTRINSIC(INGRESS, intrin.vals[0], regs.dprsrreg.inp.ipp.ingr.m_##NAME, \
53 IFSHIFT) \
54 }
55#define II_INTRINSIC_RENAME(NAME, REGNAME, IFSHIFT) \
56 DEPARSER_INTRINSIC(JBay, INGRESS, NAME, 1) { \
57 JBAY_SIMPLE_INTRINSIC(INGRESS, intrin.vals[0], regs.dprsrreg.inp.ipp.ingr.m_##REGNAME, \
58 IFSHIFT) \
59 }
60#define HO_I_INTRINSIC(NAME, IFSHIFT) \
61 DEPARSER_INTRINSIC(JBay, INGRESS, NAME, 1) { \
62 JBAY_ARRAY_INTRINSIC(INGRESS, intrin.vals[0], regs.dprsrreg.ho_i, hir.meta.m_##NAME, \
63 regs.dprsrreg.inp.icr.ingr_meta_pov.m_##NAME, IFSHIFT) \
64 }
65#define HO_I_INTRINSIC_RENAME(NAME, REGNAME, IFSHIFT) \
66 DEPARSER_INTRINSIC(JBay, INGRESS, NAME, 1) { \
67 JBAY_ARRAY_INTRINSIC(INGRESS, intrin.vals[0], regs.dprsrreg.ho_i, hir.meta.m_##REGNAME, \
68 regs.dprsrreg.inp.icr.ingr_meta_pov.m_##REGNAME, IFSHIFT) \
69 }
70
71EI_INTRINSIC(drop_ctl, YES)
72EI_INTRINSIC(egress_unicast_port, NO)
73HO_E_INTRINSIC(afc, YES)
74HO_E_INTRINSIC(capture_tx_ts, YES)
75HO_E_INTRINSIC(force_tx_err, YES)
76HO_E_INTRINSIC(tx_pkt_has_offsets, YES)
77HO_E_INTRINSIC(mirr_c2c_ctrl, YES)
78HO_E_INTRINSIC(mirr_coal_smpl_len, YES)
79HO_E_INTRINSIC(mirr_dond_ctrl, YES)
80HO_E_INTRINSIC(mirr_epipe_port, YES)
81HO_E_INTRINSIC(mirr_hash, YES)
82HO_E_INTRINSIC(mirr_icos, YES)
83HO_E_INTRINSIC(mirr_io_sel, YES)
84HO_E_INTRINSIC(mirr_mc_ctrl, YES)
85HO_E_INTRINSIC(mirr_qid, YES)
86HO_E_INTRINSIC(mtu_trunc_err_f, YES)
87HO_E_INTRINSIC(mtu_trunc_len, YES)
88
89II_INTRINSIC(copy_to_cpu, YES)
90II_INTRINSIC(drop_ctl, YES)
91II_INTRINSIC(egress_unicast_port, NO)
92II_INTRINSIC_RENAME(egress_multicast_group_0, mgid1, NO)
93II_INTRINSIC_RENAME(egress_multicast_group_1, mgid2, NO)
94II_INTRINSIC(pgen, YES)
95II_INTRINSIC(pgen_len, YES)
96II_INTRINSIC(pgen_addr, YES)
97HO_I_INTRINSIC(afc, YES)
98HO_I_INTRINSIC(bypss_egr, YES)
99HO_I_INTRINSIC(copy_to_cpu_cos, YES)
100HO_I_INTRINSIC(ct_disable, YES)
101HO_I_INTRINSIC(ct_mcast, YES)
102HO_I_INTRINSIC(deflect_on_drop, YES)
103HO_I_INTRINSIC(icos, YES)
104HO_I_INTRINSIC(mirr_c2c_ctrl, YES)
105HO_I_INTRINSIC(mirr_coal_smpl_len, YES)
106HO_I_INTRINSIC(mirr_dond_ctrl, YES)
107HO_I_INTRINSIC(mirr_epipe_port, YES)
108HO_I_INTRINSIC(mirr_hash, YES)
109HO_I_INTRINSIC(mirr_icos, YES)
110HO_I_INTRINSIC(mirr_io_sel, YES)
111HO_I_INTRINSIC(mirr_mc_ctrl, YES)
112HO_I_INTRINSIC(mirr_qid, YES)
113HO_I_INTRINSIC(mtu_trunc_err_f, YES)
114HO_I_INTRINSIC(mtu_trunc_len, YES)
115HO_I_INTRINSIC(qid, YES)
116HO_I_INTRINSIC(rid, YES)
117HO_I_INTRINSIC_RENAME(meter_color, pkt_color, YES)
118HO_I_INTRINSIC_RENAME(xid, xid_l1, YES)
119HO_I_INTRINSIC_RENAME(yid, xid_l2, YES)
120HO_I_INTRINSIC_RENAME(hash_lag_ecmp_mcast_0, hash1, YES)
121HO_I_INTRINSIC_RENAME(hash_lag_ecmp_mcast_1, hash2, YES)
122
123#undef EI_INTRINSIC
124#undef HO_E_INTRINSIC
125#undef II_INTRINSIC
126#undef II_INTRINSIC_RENAME
127#undef HO_I_INTRINSIC
128#undef HO_I_INTRINSIC_RENAME
129
143
144#define JBAY_SIMPLE_DIGEST(GRESS, NAME, TBL, SEL, IFID, CNT, REVERSE, IFIDX) \
145 JBAY_COMMON_DIGEST(GRESS, NAME, TBL, SEL, IFID, CNT, REVERSE, IFIDX) \
146 JBAY_DIGEST_TABLE(GRESS, NAME, TBL, IFID, YES, CNT, REVERSE, IFIDX) \
147 }
148#define JBAY_ARRAY_DIGEST(GRESS, NAME, ARRAY, TBL, SEL, IFID, CNT, REVERSE, IFIDX) \
149 JBAY_COMMON_DIGEST(GRESS, NAME, TBL, SEL, IFID, CNT, REVERSE, IFIDX) \
150 for (auto &r : ARRAY) { \
151 JBAY_DIGEST_TABLE(GRESS, NAME, r.TBL, IFID, NO, CNT, REVERSE, IFIDX) \
152 } \
153 }
154
155#define JBAY_COMMON_DIGEST(GRESS, NAME, TBL, SEL, IFID, CNT, REVERSE, IFIDX) \
156 DEPARSER_DIGEST(JBay, GRESS, NAME, CNT, can_shift = true;) { \
157 SEL.phv = data.select.val->reg.deparser_id(); \
158 JBAY_POV(GRESS, data.select, SEL) \
159 SEL.shft = data.shift + data.select->lo; \
160 SEL.disable_ = 0;
161
162#define JBAY_DIGEST_TABLE(GRESS, NAME, REG, IFID, IFVALID, CNT, REVERSE, IFIDX) \
163 for (auto &set : data.layout) { \
164 int id = set.first >> data.shift; \
165 int idx = 0; \
166 int maxidx = REG IFIDX([id]).phvs.size() - 1; \
167 bool first = true; \
168 int last = -1; \
169 for (auto &reg : set.second) { \
170 if (first) { \
171 first = false; \
172 IFID(REG IFIDX([id]).id_phv = reg->reg.deparser_id(); continue;) \
173 } \
174 /* The same 16b/32b container cannot appear consecutively, but 8b can. */ \
175 if (last == reg->reg.deparser_id() && reg->reg.size != 8) { \
176 error(data.lineno, "%s: %db container %s seen in consecutive locations", #NAME, \
177 reg->reg.size, reg->reg.name); \
178 continue; \
179 } \
180 for (int i = reg->reg.size / 8; i > 0; i--) { \
181 if (idx > maxidx) { \
182 error(data.lineno, "%s digest limited to %d bytes", #NAME, maxidx + 1); \
183 break; \
184 } \
185 REG IFIDX([id]).phvs[REVERSE(maxidx -) idx++] = reg->reg.deparser_id(); \
186 } \
187 last = reg->reg.deparser_id(); \
188 } \
189 IFVALID(REG IFIDX([id]).valid = 1;) \
190 REG IFIDX([id]).len = idx; \
191 }
192
193JBAY_SIMPLE_DIGEST(INGRESS, learning, regs.dprsrreg.inp.ipp.ingr.learn_tbl,
194 regs.dprsrreg.inp.ipp.ingr.m_learn_sel, NO, 8, YES, YES)
195JBAY_ARRAY_DIGEST(INGRESS, mirror, regs.dprsrreg.ho_i, him.mirr_hdr_tbl.entry,
196 regs.dprsrreg.inp.ipp.ingr.m_mirr_sel, YES, 16, NO, YES)
197JBAY_ARRAY_DIGEST(EGRESS, mirror, regs.dprsrreg.ho_e, hem.mirr_hdr_tbl.entry,
198 regs.dprsrreg.inp.ipp.egr.m_mirr_sel, YES, 16, NO, YES)
199JBAY_SIMPLE_DIGEST(INGRESS, resubmit, regs.dprsrreg.inp.ipp.ingr.resub_tbl,
200 regs.dprsrreg.inp.ipp.ingr.m_resub_sel, NO, 8, NO, YES)
201JBAY_SIMPLE_DIGEST(INGRESS, pktgen, regs.dprsrreg.inp.ipp.ingr.pgen_tbl,
202 regs.dprsrreg.inp.ipp.ingr.m_pgen, NO, 1, NO, NO)
203
204// all the jbay deparser subtrees with a dis or disable_ bit
205// FIXME -- should be a way of doing this with a smart template or other metaprogramming.
206#define JBAY_DISABLE_REGBITS(M) \
207 M(YES, regs.dprsrreg.ho_e, her.meta.m_afc, dis) \
208 M(YES, regs.dprsrreg.ho_e, her.meta.m_capture_tx_ts, dis) \
209 M(YES, regs.dprsrreg.ho_e, her.meta.m_force_tx_err, dis) \
210 M(YES, regs.dprsrreg.ho_e, her.meta.m_mirr_c2c_ctrl, dis) \
211 M(YES, regs.dprsrreg.ho_e, her.meta.m_mirr_coal_smpl_len, dis) \
212 M(YES, regs.dprsrreg.ho_e, her.meta.m_mirr_dond_ctrl, dis) \
213 M(YES, regs.dprsrreg.ho_e, her.meta.m_mirr_epipe_port, dis) \
214 M(YES, regs.dprsrreg.ho_e, her.meta.m_mirr_hash, dis) \
215 M(YES, regs.dprsrreg.ho_e, her.meta.m_mirr_icos, dis) \
216 M(YES, regs.dprsrreg.ho_e, her.meta.m_mirr_io_sel, dis) \
217 M(YES, regs.dprsrreg.ho_e, her.meta.m_mirr_mc_ctrl, dis) \
218 M(YES, regs.dprsrreg.ho_e, her.meta.m_mirr_qid, dis) \
219 M(YES, regs.dprsrreg.ho_e, her.meta.m_mtu_trunc_err_f, dis) \
220 M(YES, regs.dprsrreg.ho_e, her.meta.m_mtu_trunc_len, dis) \
221 M(YES, regs.dprsrreg.ho_e, her.meta.m_tx_pkt_has_offsets, dis) \
222 M(YES, regs.dprsrreg.ho_i, hir.meta.m_afc, dis) \
223 M(YES, regs.dprsrreg.ho_i, hir.meta.m_bypss_egr, dis) \
224 M(YES, regs.dprsrreg.ho_i, hir.meta.m_copy_to_cpu_cos, dis) \
225 M(YES, regs.dprsrreg.ho_i, hir.meta.m_ct_disable, dis) \
226 M(YES, regs.dprsrreg.ho_i, hir.meta.m_ct_mcast, dis) \
227 M(YES, regs.dprsrreg.ho_i, hir.meta.m_deflect_on_drop, dis) \
228 M(YES, regs.dprsrreg.ho_i, hir.meta.m_hash1, dis) \
229 M(YES, regs.dprsrreg.ho_i, hir.meta.m_hash2, dis) \
230 M(YES, regs.dprsrreg.ho_i, hir.meta.m_icos, dis) \
231 M(YES, regs.dprsrreg.ho_i, hir.meta.m_mirr_c2c_ctrl, dis) \
232 M(YES, regs.dprsrreg.ho_i, hir.meta.m_mirr_coal_smpl_len, dis) \
233 M(YES, regs.dprsrreg.ho_i, hir.meta.m_mirr_dond_ctrl, dis) \
234 M(YES, regs.dprsrreg.ho_i, hir.meta.m_mirr_epipe_port, dis) \
235 M(YES, regs.dprsrreg.ho_i, hir.meta.m_mirr_hash, dis) \
236 M(YES, regs.dprsrreg.ho_i, hir.meta.m_mirr_icos, dis) \
237 M(YES, regs.dprsrreg.ho_i, hir.meta.m_mirr_io_sel, dis) \
238 M(YES, regs.dprsrreg.ho_i, hir.meta.m_mirr_mc_ctrl, dis) \
239 M(YES, regs.dprsrreg.ho_i, hir.meta.m_mirr_qid, dis) \
240 M(YES, regs.dprsrreg.ho_i, hir.meta.m_mtu_trunc_err_f, dis) \
241 M(YES, regs.dprsrreg.ho_i, hir.meta.m_mtu_trunc_len, dis) \
242 M(YES, regs.dprsrreg.ho_i, hir.meta.m_pkt_color, dis) \
243 M(YES, regs.dprsrreg.ho_i, hir.meta.m_qid, dis) \
244 M(YES, regs.dprsrreg.ho_i, hir.meta.m_rid, dis) \
245 M(YES, regs.dprsrreg.ho_i, hir.meta.m_xid_l1, dis) \
246 M(YES, regs.dprsrreg.ho_i, hir.meta.m_xid_l2, dis) \
247 M(NO, , regs.dprsrreg.inp.ipp.egr.m_drop_ctl, disable_) \
248 M(NO, , regs.dprsrreg.inp.ipp.egr.m_egress_unicast_port, disable_) \
249 M(NO, , regs.dprsrreg.inp.ipp.egr.m_mirr_sel, disable_) \
250 M(NO, , regs.dprsrreg.inp.ipp.ingr.m_copy_to_cpu, disable_) \
251 M(NO, , regs.dprsrreg.inp.ipp.ingr.m_drop_ctl, disable_) \
252 M(NO, , regs.dprsrreg.inp.ipp.ingr.m_egress_unicast_port, disable_) \
253 M(NO, , regs.dprsrreg.inp.ipp.ingr.m_learn_sel, disable_) \
254 M(NO, , regs.dprsrreg.inp.ipp.ingr.m_mgid1, disable_) \
255 M(NO, , regs.dprsrreg.inp.ipp.ingr.m_mgid2, disable_) \
256 M(NO, , regs.dprsrreg.inp.ipp.ingr.m_mirr_sel, disable_) \
257 M(NO, , regs.dprsrreg.inp.ipp.ingr.m_pgen, disable_) \
258 M(NO, , regs.dprsrreg.inp.ipp.ingr.m_pgen_addr, disable_) \
259 M(NO, , regs.dprsrreg.inp.ipp.ingr.m_pgen_len, disable_) \
260 M(NO, , regs.dprsrreg.inp.ipp.ingr.m_resub_sel, disable_)
261
262// Compiler workaround for TOF2LAB-44, skip certain chunk indices
263void tof2lab44_workaround(int lineno, unsigned &chunk_index) {
264 if (options.tof2lab44_workaround) {
265 static std::set<unsigned> skipped_chunks = {24, 32, 40, 48, 56, 64, 72,
266 80, 88, 96, 104, 112, 120};
267 while (skipped_chunks.count(chunk_index)) chunk_index++;
268 }
269}
270
271// INVARIANT: check_chunk is idempotent.
272bool check_chunk(int lineno, unsigned &chunk) {
273 tof2lab44_workaround(lineno, chunk);
274
275 const unsigned TOTAL_CHUNKS = Target::JBay::DEPARSER_TOTAL_CHUNKS;
276 static bool suppress_repeated = false;
277 if (chunk >= TOTAL_CHUNKS) {
278 if (!suppress_repeated)
279 error(lineno, "Ran out of chunks in field dictionary (%d)", TOTAL_CHUNKS);
280 suppress_repeated = true;
281 return false;
282 }
283 return true;
284}
285
287using WriteChunk = std::function<void(
288 unsigned & /* chunk_index */, const Phv::Slice & /* prev_pov */, int /* prev_entry_encoded */,
289 int /* entry_lineno */, const Phv::Ref & /* entry_pov */,
290 Deparser::FDEntry::Base * /* entry_what */, unsigned /* byte */, unsigned /* size */)>;
291
293using FinishChunk =
294 std::function<void(unsigned /* chunk_index */, unsigned /* dictionary entry number */,
295 const Phv::Slice & /* pov_bit */, unsigned /* byte */)>;
296
299using WriteClot = std::function<void(
300 unsigned & /* chunk_index */, unsigned & /* dictionary entry number */, int /* segment_tag */,
301 int /* clot_tag */, const Phv::Ref & /* pov_bit */, Deparser::FDEntry::Clot * /* clot */)>;
302
305template <class POV, class DICT>
306void output_jbay_field_dictionary_helper(int lineno, POV &pov, DICT &dict, WriteChunk write_chunk,
307 FinishChunk finish_chunk, WriteClot write_clot) {
308 const unsigned CHUNK_SIZE = Target::JBay::DEPARSER_CHUNK_SIZE;
309 const unsigned CHUNK_GROUPS = Target::JBay::DEPARSER_CHUNK_GROUPS;
310 const unsigned CHUNKS_PER_GROUP = Target::JBay::DEPARSER_CHUNKS_PER_GROUP;
311 const unsigned CLOTS_PER_GROUP = Target::JBay::DEPARSER_CLOTS_PER_GROUP;
312 unsigned ch = 0, entry_n = 0, byte = 0, group = 0, clots_in_group = 0;
313 Phv::Slice prev_pov;
314 int prev = -1;
315
316 // INVARIANT: check_chunk should be called immediately before doing anything with a chunk.
317 // Because check_chunk is idempotent, it is fine to call it on a chunk that has previously been
318 // checked.
319
320 for (auto &ent : dict) {
321 auto *clot = dynamic_cast<Deparser::FDEntry::Clot *>(ent.what.get());
322 // FIXME -- why does the following give an error from gcc?
323 // auto *clot = ent.what->to<Deparser::FDEntry::Clot>();
324 unsigned size = ent.what->size();
325
326 // Finish the current chunk if needed.
327 if (byte &&
328 (clot || byte + size > CHUNK_SIZE || (prev_pov && *ent.pov.front() != prev_pov))) {
329 finish_chunk(ch++, entry_n++, prev_pov, byte);
330 byte = 0;
331 }
332 if (ch / CHUNKS_PER_GROUP != group) {
333 // into a new group
334 group = ch / CHUNKS_PER_GROUP;
335 clots_in_group = 0;
336 }
337 if (clot) {
338 // Start a new group if needed. Each group has a maximum number of CLOTs that can be
339 // deparsed, and CLOTs cannot span multiple groups.
340 bool out_of_clots_in_group = clots_in_group >= CLOTS_PER_GROUP;
341 auto chunks_in_clot = (size + CHUNK_SIZE - 1) / CHUNK_SIZE;
342 bool out_of_chunks_in_group = ch % CHUNKS_PER_GROUP + chunks_in_clot > CHUNKS_PER_GROUP;
343 if (out_of_clots_in_group || out_of_chunks_in_group) {
344 // go on to the next group
345 ch = (ch | (CHUNKS_PER_GROUP - 1)) + 1;
346 group = ch / CHUNKS_PER_GROUP;
347 clots_in_group = 0;
348 }
349
350 // Write the CLOT to the next segment in the current group.
351 if (chunks_in_clot == CHUNKS_PER_GROUP && (ch % CHUNKS_PER_GROUP))
352 error(clot->lineno, "--tof2lab44-workaround incompatible with clot >56 bytes");
353 int clot_tag = Parser::clot_tag(clot->gress, clot->tag);
354 int seg_tag = clots_in_group++;
355 write_clot(ch, entry_n, seg_tag, clot_tag, ent.pov.front(), clot);
356
357 prev = -1;
358 } else {
359 // Phv, Constant, or Checksum
360 write_chunk(ch, prev_pov, prev, ent.lineno, ent.pov.front(), ent.what.get(), byte,
361 size);
362 byte += size;
363 prev = ent.what->encode();
364 }
365 prev_pov = *ent.pov.front();
366 }
367
368 if (byte > 0) {
369 finish_chunk(ch, entry_n, prev_pov, byte);
370 }
371}
372
373template <class REGS, class POV_FMT, class POV, class DICT>
374void output_jbay_field_dictionary(int lineno, REGS &regs, POV_FMT &pov_layout, POV &pov,
375 DICT &dict) {
376 // Initialize pov_layout.
377 unsigned byte = 0;
378 for (auto &r : pov) {
379 for (int bits = 0; bits < r.first->size; bits += 8) {
380 if (byte > pov_layout.size()) error(lineno, "Ran out of space in POV in deparser");
381 pov_layout[byte++] = r.first->deparser_id();
382 }
383 }
384 while (byte < pov_layout.size()) pov_layout[byte++] = 0xff;
385 LOG5("jbay field dictionary:");
386
387 // Declare some callback functions, and then delegate to helper.
388 auto write_chunk = [](unsigned ch, const Phv::Slice &prev_pov, int prev, int ent_lineno,
389 const Phv::Ref &ent_pov, Deparser::FDEntry::Base *ent_what, unsigned byte,
390 unsigned size) {
391 // Just do an error check here. Defer actual writing to finish_chunk.
392 LOG5(" chunk " << ch << ": " << *ent_what << " (pov " << ent_pov << ")");
393 if (dynamic_cast<Deparser::FDEntry::Phv *>(ent_what) && prev_pov == *ent_pov &&
394 int(ent_what->encode()) == prev && (size & 6))
395 error(ent_lineno, "16 and 32-bit container cannot be repeatedly deparsed");
396 };
397
398 auto finish_chunk = [&](unsigned ch, unsigned entry_n, const Phv::Slice &pov_bit,
399 unsigned byte) {
400 if (check_chunk(lineno, ch)) {
401 regs.chunk_info[ch].chunk_vld = 1;
402 regs.chunk_info[ch].pov = pov.at(&pov_bit.reg) + pov_bit.lo;
403 regs.chunk_info[ch].seg_vld = 0;
404 regs.chunk_info[ch].seg_slice = byte & 7;
405 regs.chunk_info[ch].seg_sel = byte >> 3;
406 }
407 };
408
409 auto write_clot = [&](unsigned &ch, unsigned &entry_n, int seg_tag, int clot_tag,
410 const Phv::Ref &pov_bit, Deparser::FDEntry::Clot *clot) {
411 const unsigned CHUNKS_PER_GROUP = Target::JBay::DEPARSER_CHUNKS_PER_GROUP;
412 const int group = ch / CHUNKS_PER_GROUP;
413 if (group < regs.fd_tags.size()) regs.fd_tags[group].segment_tag[seg_tag] = clot_tag;
414 LOG5(" chunk " << ch << ": " << *clot << " (pov " << pov_bit << ")");
415 for (int i = 0; i < clot->length; i += 8, ++ch) {
416 // CLOTs cannot span multiple groups.
417 BUG_CHECK(ch / CHUNKS_PER_GROUP == group || error_count > 0, "CLOT spanning groups");
418 if (check_chunk(lineno, ch)) {
419 regs.chunk_info[ch].chunk_vld = 1;
420 regs.chunk_info[ch].pov = pov.at(&pov_bit->reg) + pov_bit->lo;
421 regs.chunk_info[ch].seg_vld = 1;
422 regs.chunk_info[ch].seg_sel = seg_tag;
423 regs.chunk_info[ch].seg_slice = i / 8U;
424 }
425 }
426 };
427
428 output_jbay_field_dictionary_helper(lineno, pov, dict, write_chunk, finish_chunk, write_clot);
429}
430
431template <class CHUNKS, class CLOTS, class POV, class DICT>
432void output_jbay_field_dictionary_slice(int lineno, CHUNKS &chunk, CLOTS &clots, POV &pov,
433 DICT &dict, json::vector &fd_gress,
434 json::vector &fd_entries, gress_t gress) {
435 json::map fd;
436 json::map fd_entry;
437 json::vector chunk_bytes;
438 json::vector fd_entry_chunk_bytes;
439
440 auto write_chunk = [&](unsigned ch, const Phv::Slice &prev_pov, int prev, int ent_lineno,
441 const Phv::Ref &ent_pov, Deparser::FDEntry::Base *ent_what,
442 unsigned byte, unsigned size) {
443 while (size--) {
444 json::map chunk_byte;
445 json::map fd_entry_chunk_byte;
446 json::map fd_entry_chunk;
447 chunk_byte["Byte"] = byte;
448 fd_entry_chunk_byte["chunk_number"] = byte;
449 if (ent_what->encode() < CONSTANTS_PHVID_JBAY_LOW) {
450 auto *phv = dynamic_cast<Deparser::FDEntry::Phv *>(ent_what);
451 auto phv_reg = phv->reg();
452 write_field_name_in_json(phv_reg, &ent_pov->reg, ent_pov->lo, chunk_byte,
453 fd_entry_chunk, 19, gress);
454 } else {
455 write_csum_const_in_json(ent_what->encode(), chunk_byte, fd_entry_chunk, gress);
456 }
457 fd_entry_chunk_byte["chunk"] = std::move(fd_entry_chunk);
458 chunk_bytes.push_back(std::move(chunk_byte));
459 fd_entry_chunk_bytes.push_back(std::move(fd_entry_chunk_byte));
460 if (check_chunk(lineno, ch)) {
461 chunk[ch].is_phv |= 1 << byte;
462 chunk[ch].byte_off.phv_offset[byte++] = ent_what->encode();
463 }
464 }
465 };
466
467 auto finish_chunk = [&](unsigned ch, unsigned entry_n, const Phv::Slice &pov_bit,
468 unsigned byte) {
469 fd["Field Dictionary Number"] = entry_n;
470 fd["Field Dictionary Chunk"] = ch;
471 fd_entry["entry"] = entry_n;
472 // fd_entry["fde_chunk"] = ch; -- requires compiler_interfaces change
473 Deparser::write_pov_in_json(fd, fd_entry, &pov_bit.reg, pov.at(&pov_bit.reg) + pov_bit.lo,
474 pov_bit.lo);
475 if (check_chunk(lineno, ch)) {
476 chunk[ch].cfg.seg_vld = 0; // no CLOTs yet
477 chunk[ch].cfg.seg_slice = byte & 7;
478 chunk[ch].cfg.seg_sel = byte >> 3;
479 }
480
481 fd["Content"] = std::move(chunk_bytes);
482 fd_entry["chunks"] = std::move(fd_entry_chunk_bytes);
483 fd_entries.push_back(std::move(fd_entry));
484 fd_gress.push_back(std::move(fd));
485 };
486
487 auto write_clot = [&](unsigned &ch, unsigned &entry_n, int seg_tag, int clot_tag,
488 const Phv::Ref &pov_bit, Deparser::FDEntry::Clot *clot) {
489 const unsigned CHUNKS_PER_GROUP = Target::JBay::DEPARSER_CHUNKS_PER_GROUP;
490 const int group = ch / CHUNKS_PER_GROUP;
491 if (group < clots.size()) clots[group].segment_tag[seg_tag] = clot_tag;
492 auto phv_repl = clot->phv_replace.begin();
493 auto csum_repl = clot->csum_replace.begin();
494 for (int i = 0; i < clot->length; i += 8, ++ch, ++entry_n) {
495 // CLOTs cannot span multiple groups.
496 BUG_CHECK(ch / CHUNKS_PER_GROUP == group || error_count > 0, "CLOT spanning groups");
497
498 fd["Field Dictionary Number"] = entry_n;
499 fd["Field Dictionary Chunk"] = ch;
500 fd_entry["entry"] = entry_n;
501 // fd_entry["fde_chunk"] = ch; -- requires compiler_interfaces change
502 Deparser::write_pov_in_json(fd, fd_entry, &pov_bit->reg,
503 pov.at(&pov_bit->reg) + pov_bit->lo, pov_bit->lo);
504
505 if (check_chunk(lineno, ch)) {
506 chunk[ch].cfg.seg_vld = 1;
507 chunk[ch].cfg.seg_sel = seg_tag;
508 chunk[ch].cfg.seg_slice = i / 8U;
509 }
510
511 for (int j = 0; j < 8 && i + j < clot->length; ++j) {
512 json::map chunk_byte;
513 json::map fd_entry_chunk_byte;
514 json::map fd_entry_chunk;
515 chunk_byte["Byte"] = j;
516 fd_entry_chunk_byte["chunk_number"] = j;
517 if (phv_repl != clot->phv_replace.end() && int(phv_repl->first) <= i + j) {
518 // This is PHV replaced, PHV is used
519 chunk[ch].is_phv |= 1 << j;
520 chunk[ch].byte_off.phv_offset[j] = phv_repl->second->reg.deparser_id();
521 auto phv_reg = &phv_repl->second->reg;
522 write_field_name_in_json(phv_reg, &pov_bit->reg, pov_bit->lo, chunk_byte,
523 fd_entry_chunk, 19, gress);
524 if (int(phv_repl->first + phv_repl->second->size() / 8U) <= i + j + 1)
525 ++phv_repl;
526 } else if (csum_repl != clot->csum_replace.end() &&
527 int(csum_repl->first) <= i + j) {
528 if (check_chunk(lineno, ch)) {
529 chunk[ch].is_phv |= 1 << j;
530 chunk[ch].byte_off.phv_offset[j] = csum_repl->second.encode();
531 }
532 write_csum_const_in_json(csum_repl->second.encode(), chunk_byte, fd_entry_chunk,
533 gress);
534 if (int(csum_repl->first + 2) <= i + j + 1) ++csum_repl;
535 } else {
536 if (check_chunk(lineno, ch)) chunk[ch].byte_off.phv_offset[j] = i + j;
537 chunk_byte["CLOT"] = clot_tag;
538 chunk_byte["CLOT_OFFSET"] = i + j;
539 fd_entry_chunk["clot_tag"] = clot_tag;
540 // fd_entry_chunk["clot_offset"] = i + j; requires compiler_interfaces change
541 }
542 fd_entry_chunk_byte["chunk"] = std::move(fd_entry_chunk);
543 chunk_bytes.push_back(std::move(chunk_byte));
544 fd_entry_chunk_bytes.push_back(std::move(fd_entry_chunk_byte));
545 }
546 fd["Content"] = std::move(chunk_bytes);
547 fd_entry["chunks"] = std::move(fd_entry_chunk_bytes);
548 fd_entries.push_back(std::move(fd_entry));
549 fd_gress.push_back(std::move(fd));
550 }
551 };
552
553 output_jbay_field_dictionary_helper(lineno, pov, dict, write_chunk, finish_chunk, write_clot);
554}
555
556static void check_jbay_ownership(bitvec phv_use[2]) {
557 unsigned mask = 0;
558 int group = -1;
559 for (auto i : phv_use[INGRESS]) {
560 if ((i | mask) == (group | mask)) continue;
561 switch (Phv::reg(i)->size) {
562 case 8:
563 case 16:
564 mask = 3;
565 break;
566 case 32:
567 mask = 1;
568 break;
569 default:
570 BUG("unexpected size %d", Phv::reg(i)->size);
571 }
572 group = i & ~mask;
573 if (phv_use[EGRESS].getrange(group, mask + 1)) {
574 error(0, "%s..%s used by both ingress and egress deparser", Phv::reg(group)->name,
575 Phv::reg(group | mask)->name);
576 }
577 }
578}
579
580static void setup_jbay_ownership(bitvec phv_use, ubits_base &phv8, ubits_base &phv16,
581 ubits_base &phv32) {
582 std::set<unsigned> phv8_grps, phv16_grps, phv32_grps;
583
584 for (auto i : phv_use) {
585 auto *reg = Phv::reg(i);
586 switch (reg->size) {
587 case 8:
588 phv8_grps.insert(1U << ((reg->deparser_id() - 64) / 4U));
589 break;
590 case 16:
591 phv16_grps.insert(1U << ((reg->deparser_id() - 128) / 4U));
592 break;
593 case 32:
594 phv32_grps.insert(1U << (reg->deparser_id() / 2U));
595 break;
596 default:
597 BUG("unexpected size %d", reg->size);
598 }
599 }
600
601 for (auto v : phv8_grps) phv8 |= v;
602 for (auto v : phv16_grps) phv16 |= v;
603 for (auto v : phv32_grps) phv32 |= v;
604}
605
606static short jbay_phv2cksum[224][2] = {
607 // Entries 0-127 are for 32 bit PHV
608 // Each 32 bit PHV uses two 16b adders
609 // The even addresses are for [31:16], the odd addresses are for [15:0]
610 // Note: The current CSR description of these entries for 32 bit containers is incorrect.
611 // 128-191 are for 8 bit PHV
612 // 192-287 are for 16 bit PHV
613 {1, 0}, {3, 2}, {5, 4}, {7, 6}, {9, 8}, {11, 10}, {13, 12}, {15, 14},
614 {17, 16}, {19, 18}, {21, 20}, {23, 22}, {25, 24}, {27, 26}, {29, 28}, {31, 30},
615 {33, 32}, {35, 34}, {37, 36}, {39, 38}, {41, 40}, {43, 42}, {45, 44}, {47, 46},
616 {49, 48}, {51, 50}, {53, 52}, {55, 54}, {57, 56}, {59, 58}, {61, 60}, {63, 62},
617 {65, 64}, {67, 66}, {69, 68}, {71, 70}, {73, 72}, {75, 74}, {77, 76}, {79, 78},
618 {81, 80}, {83, 82}, {85, 84}, {87, 86}, {89, 88}, {91, 90}, {93, 92}, {95, 94},
619 {97, 96}, {99, 98}, {101, 100}, {103, 102}, {105, 104}, {107, 106}, {109, 108}, {111, 110},
620 {113, 112}, {115, 114}, {117, 116}, {119, 118}, {121, 120}, {123, 122}, {125, 124}, {127, 126},
621 {128, -1}, {129, -1}, {130, -1}, {131, -1}, {132, -1}, {133, -1}, {134, -1}, {135, -1},
622 {136, -1}, {137, -1}, {138, -1}, {139, -1}, {140, -1}, {141, -1}, {142, -1}, {143, -1},
623 {144, -1}, {145, -1}, {146, -1}, {147, -1}, {148, -1}, {149, -1}, {150, -1}, {151, -1},
624 {152, -1}, {153, -1}, {154, -1}, {155, -1}, {156, -1}, {157, -1}, {158, -1}, {159, -1},
625 {160, -1}, {161, -1}, {162, -1}, {163, -1}, {164, -1}, {165, -1}, {166, -1}, {167, -1},
626 {168, -1}, {169, -1}, {170, -1}, {171, -1}, {172, -1}, {173, -1}, {174, -1}, {175, -1},
627 {176, -1}, {177, -1}, {178, -1}, {179, -1}, {180, -1}, {181, -1}, {182, -1}, {183, -1},
628 {184, -1}, {185, -1}, {186, -1}, {187, -1}, {188, -1}, {189, -1}, {190, -1}, {191, -1},
629 {192, -1}, {193, -1}, {194, -1}, {195, -1}, {196, -1}, {197, -1}, {198, -1}, {199, -1},
630 {200, -1}, {201, -1}, {202, -1}, {203, -1}, {204, -1}, {205, -1}, {206, -1}, {207, -1},
631 {208, -1}, {209, -1}, {210, -1}, {211, -1}, {212, -1}, {213, -1}, {214, -1}, {215, -1},
632 {216, -1}, {217, -1}, {218, -1}, {219, -1}, {220, -1}, {221, -1}, {222, -1}, {223, -1},
633 {224, -1}, {225, -1}, {226, -1}, {227, -1}, {228, -1}, {229, -1}, {230, -1}, {231, -1},
634 {232, -1}, {233, -1}, {234, -1}, {235, -1}, {236, -1}, {237, -1}, {238, -1}, {239, -1},
635 {240, -1}, {241, -1}, {242, -1}, {243, -1}, {244, -1}, {245, -1}, {246, -1}, {247, -1},
636 {248, -1}, {249, -1}, {250, -1}, {251, -1}, {252, -1}, {253, -1}, {254, -1}, {255, -1},
637 {256, -1}, {257, -1}, {258, -1}, {259, -1}, {260, -1}, {261, -1}, {262, -1}, {263, -1},
638 {264, -1}, {265, -1}, {266, -1}, {267, -1}, {268, -1}, {269, -1}, {270, -1}, {271, -1},
639 {272, -1}, {273, -1}, {274, -1}, {275, -1}, {276, -1}, {277, -1}, {278, -1}, {279, -1},
640 {280, -1}, {281, -1}, {282, -1}, {283, -1}, {284, -1}, {285, -1}, {286, -1}, {287, -1},
641};
642
643template <class ENTRIES>
644static void write_jbay_checksum_entry(ENTRIES &entry, unsigned mask, int swap, int pov, int id,
645 const char *reg = nullptr) {
646 write_checksum_entry(entry, mask, swap, id, reg);
647 entry.pov = pov;
648}
649
650// Populates pov_map which maps the bit in the main POV array [127:0]
651// to bit in the checksum pov array [32:0]
652// The checksum pov array is 32 bits / 4 bytes - pov_cfg.byte_set[4].
653// Each element of the pov_cfg.byte_sel array maps to the byte in the main POV array
654template <class POV>
655void jbay_csum_pov_config(Phv::Ref povRef, POV &pov_cfg,
656 ordered_map<const Phv::Register *, unsigned> &pov,
657 std::map<unsigned, unsigned> &pov_map, unsigned *prev_byte,
658 int csum_unit) {
659 unsigned bit = pov.at(&povRef->reg) + povRef->lo;
660 if (pov_map.count(bit)) return;
661 for (unsigned i = 0; i < (*prev_byte); ++i) {
662 if (pov_cfg.byte_sel[i] == bit / 8U) {
663 pov_map[bit] = i * 8U + bit % 8U;
664 break;
665 }
666 }
667 if (pov_map.count(bit)) return;
668 if (*prev_byte >= (int)pov_cfg.byte_sel.size()) {
669 error(povRef.lineno, "Checksum unit %d exceeds %d bytes of POV", csum_unit,
670 (int)pov_cfg.byte_sel.size());
671 return;
672 }
673 pov_map[bit] = (*prev_byte) * 8U + bit % 8U;
674 pov_cfg.byte_sel[(*prev_byte)++] = bit / 8U;
675 return;
676}
677
678template <class POV>
679void set_jbay_pov_cfg(POV &pov_cfg, std::map<unsigned, unsigned> &pov_map,
681 ordered_map<const Phv::Register *, unsigned> &pov, int csum_unit,
682 unsigned *prev_byte) {
683 for (auto &unit_entry : full_csum.entries) {
684 for (auto val : unit_entry.second) {
685 if (val.pov.size() != 1) {
686 error(val.val.lineno, "one POV bit required for Tofino2");
687 continue;
688 }
689 jbay_csum_pov_config(val.pov.front(), pov_cfg, pov, pov_map, prev_byte, csum_unit);
690 }
691 }
692 for (auto &val : full_csum.clot_entries) {
693 if (val.pov.size() != 1) {
694 error(val.val.lineno, "one POV bit required for Tofino2");
695 continue;
696 }
697 jbay_csum_pov_config(val.pov.front(), pov_cfg, pov, pov_map, prev_byte, csum_unit);
698 }
699 for (auto &checksum_pov : full_csum.pov) {
700 jbay_csum_pov_config(checksum_pov.second, pov_cfg, pov, pov_map, prev_byte, csum_unit);
701 }
702 return;
703}
704
705template <class CSUM, class ENTRIES>
706void write_jbay_full_checksum_config(
707 CSUM &csum, ENTRIES &phv_entries, int unit, std::set<int> &visited,
708 std::array<std::map<unsigned, unsigned>, MAX_DEPARSER_CHECKSUM_UNITS> &pov_map,
709 Deparser::FullChecksumUnit &full_csum, ordered_map<const Phv::Register *, unsigned> &pov) {
710 for (auto &unit_entry : full_csum.entries) {
711 // Same partial checksum unit can be used in multiple full checksum unit.
712 // No need to rewrite the checksum entries multiple times for the same unit
713 if (visited.count(unit_entry.first)) continue;
714 visited.insert(unit_entry.first);
715 for (auto val : unit_entry.second) {
716 if (val.pov.size() != 1) continue;
717 int povbit =
718 pov_map[unit_entry.first].at(pov.at(&val.pov.front()->reg) + val.pov.front()->lo);
719 int mask = val.mask;
720 int swap = val.swap;
721 auto &remap = jbay_phv2cksum[val->reg.deparser_id()];
722 write_jbay_checksum_entry(phv_entries[unit_entry.first].entry[remap[0]], mask & 3,
723 swap & 1, povbit, unit_entry.first, val->reg.name);
724 if (remap[1] >= 0)
725 write_jbay_checksum_entry(phv_entries[unit_entry.first].entry[remap[1]], mask >> 2,
726 swap >> 1, povbit, unit_entry.first, val->reg.name);
727 else
728 BUG_CHECK((mask >> 2 == 0) && (swap >> 1 == 0), "Invalid checksum");
729 }
730 }
731 int tag_idx = 0;
732 for (auto &val : full_csum.clot_entries) {
733 if (val.pov.size() != 1) continue;
734 int povbit = pov_map[unit].at(pov.at(&val.pov.front()->reg) + val.pov.front()->lo);
735 if (tag_idx == 16) error(-1, "Ran out of clot entries in deparser checksum unit %d", unit);
736 csum.clot_entry[tag_idx].pov = povbit;
737 csum.clot_entry[tag_idx].vld = 1;
738 csum.tags[tag_idx].tag = val.tag;
739 tag_idx++;
740 }
741 for (auto &checksum_pov : full_csum.pov) {
742 csum.phv_entry[checksum_pov.first].pov =
743 pov_map[unit].at(pov.at(&checksum_pov.second->reg) + checksum_pov.second->lo);
744 csum.phv_entry[checksum_pov.first].vld = 1;
745 }
746 csum.zeros_as_ones.en = full_csum.zeros_as_ones_en;
747
748 // FIXME -- use/set csum.csum_constant?
749}
750// Engine 0: scratch[23:0]
751// Engine 1: { scratch2[15:0], scratch[31:24] }
752// Engine 2: { scratch[7:0] , scratch2[31:16] }
753// Engine 3: scratch[31:8]
754// So each engine gets a cfg_vector[23:0]
755// There are 16 CLOT csums and 8 PHV csums that can be inverted:
756// CLOT csum [15:0] are controlled by cfg_vector [15:0]
757// PHV csums [7:0] are controlled by cfg_vector [23:16]
758
759template <class SCRATCH1, class SCRATCH2, class SCRATCH3>
760void write_jbay_full_checksum_invert_config(SCRATCH1 &scratch1, SCRATCH2 &scratch2,
761 SCRATCH3 &scratch3, int unit,
762 Deparser::FullChecksumUnit &full_csum) {
763 ubits<32> value1;
764 ubits<32> value2;
765 ubits<32> value3;
766 for (auto checksum_unit : full_csum.checksum_unit_invert) {
767 if (unit == 0) {
768 value1 |= (1 << (16 + checksum_unit));
769 } else if (unit == 1) {
770 value1 |= (1 << (8 + checksum_unit));
771 } else if (unit == 2) {
772 value3 |= (1 << checksum_unit);
773 } else if (unit == 3) {
774 value3 |= (1 << (24 + checksum_unit));
775 }
776 }
777 for (auto clot_tag : full_csum.clot_tag_invert) {
778 if (unit == 0) {
779 value1 |= (1 << clot_tag);
780 } else if (unit == 1) {
781 if (clot_tag > 7) {
782 value1 |= (1 << (clot_tag - 8));
783 } else {
784 value3 |= (1 << (16 + clot_tag));
785 }
786 } else if (unit == 2) {
787 value2 |= (1 << (16 + clot_tag));
788 } else if (unit == 3) {
789 value3 |= (1 << (8 + clot_tag));
790 }
791 }
792 if (value1 || value2 || value3) {
793 scratch1.value |= value1;
794 scratch2.value |= value2;
795 scratch3.value |= value3;
796 }
797 return;
798}
799
800template <class CONS>
801void write_jbay_constant_config(CONS &cons, const std::set<int> &vals) {
802 unsigned idx = 0;
803 for (auto v : vals) {
804 cons[idx] = v;
805 idx++;
806 }
807}
808
809template <>
810void Deparser::write_config(Target::JBay::deparser_regs &regs) {
811 regs.dprsrreg.inp.icr.disable(); // disable this whole tree
812 regs.dprsrreg.inp.icr.disabled_ = false; // then enable just certain subtrees
813 regs.dprsrreg.inp.icr.csum_engine.enable();
814 regs.dprsrreg.inp.icr.egr.enable();
815 regs.dprsrreg.inp.icr.egr_meta_pov.enable();
816 regs.dprsrreg.inp.icr.ingr.enable();
817 regs.dprsrreg.inp.icr.ingr_meta_pov.enable();
818 regs.dprsrreg.inp.icr.scratch.enable();
819 regs.dprsrreg.inp.icr.scratch2.enable();
820 regs.dprsrreg.inp.ipp.scratch.enable();
821 regs.dprsrreg.inp.iim.disable();
822 regs.dprsrreg.inpslice.disable();
823 for (auto &r : regs.dprsrreg.ho_i) r.out_ingr.disable();
824 for (auto &r : regs.dprsrreg.ho_e) r.out_egr.disable();
825
826 for (auto &r : regs.dprsrreg.ho_i)
827 write_jbay_constant_config(r.hir.h.hdr_xbar_const.value, constants[INGRESS]);
828 for (auto &r : regs.dprsrreg.ho_e)
829 write_jbay_constant_config(r.her.h.hdr_xbar_const.value, constants[EGRESS]);
830 std::set<int> visited_i;
831 std::array<std::map<unsigned, unsigned>, MAX_DEPARSER_CHECKSUM_UNITS> pov_map_i;
832 for (int csum_unit = 0; csum_unit < Target::JBay::DEPARSER_CHECKSUM_UNITS; csum_unit++) {
833 unsigned prev_byte = 0;
834 if (full_checksum_unit[INGRESS][csum_unit].clot_entries.empty() &&
835 full_checksum_unit[INGRESS][csum_unit].entries.empty())
836 continue;
837 set_jbay_pov_cfg(regs.dprsrreg.inp.ipp.phv_csum_pov_cfg.csum_pov_cfg[csum_unit],
838 pov_map_i[csum_unit], full_checksum_unit[INGRESS][csum_unit], pov[INGRESS],
839 csum_unit, &prev_byte);
840 if (error_count > 0) break;
841 }
842 for (int csum_unit = 0; csum_unit < Target::JBay::DEPARSER_CHECKSUM_UNITS && error_count == 0;
843 csum_unit++) {
844 if (full_checksum_unit[INGRESS][csum_unit].clot_entries.empty() &&
845 full_checksum_unit[INGRESS][csum_unit].entries.empty())
846 continue;
847 regs.dprsrreg.inp.ipp.phv_csum_pov_cfg.thread.thread[csum_unit] = INGRESS;
848 write_jbay_full_checksum_config(
849 regs.dprsrreg.inp.icr.csum_engine[csum_unit], regs.dprsrreg.inp.ipp_m.i_csum.engine,
850 csum_unit, visited_i, pov_map_i, full_checksum_unit[INGRESS][csum_unit], pov[INGRESS]);
851 write_jbay_full_checksum_invert_config(
852 regs.dprsrreg.inp.icr.scratch, regs.dprsrreg.inp.icr.scratch2,
853 regs.dprsrreg.inp.ipp.scratch, csum_unit, full_checksum_unit[INGRESS][csum_unit]);
854 }
855 std::set<int> visited_e;
856 std::array<std::map<unsigned, unsigned>, MAX_DEPARSER_CHECKSUM_UNITS> pov_map_e;
857 for (int csum_unit = 0; csum_unit < Target::JBay::DEPARSER_CHECKSUM_UNITS; csum_unit++) {
858 unsigned prev_byte = 0;
859 if (full_checksum_unit[EGRESS][csum_unit].clot_entries.empty() &&
860 full_checksum_unit[EGRESS][csum_unit].entries.empty())
861 continue;
862 set_jbay_pov_cfg(regs.dprsrreg.inp.ipp.phv_csum_pov_cfg.csum_pov_cfg[csum_unit],
863 pov_map_e[csum_unit], full_checksum_unit[EGRESS][csum_unit], pov[EGRESS],
864 csum_unit, &prev_byte);
865 if (error_count > 0) break;
866 }
867 for (int csum_unit = 0; csum_unit < Target::JBay::DEPARSER_CHECKSUM_UNITS && error_count == 0;
868 csum_unit++) {
869 if (full_checksum_unit[EGRESS][csum_unit].clot_entries.empty() &&
870 full_checksum_unit[EGRESS][csum_unit].entries.empty())
871 continue;
872 regs.dprsrreg.inp.ipp.phv_csum_pov_cfg.thread.thread[csum_unit] = EGRESS;
873 write_jbay_full_checksum_config(
874 regs.dprsrreg.inp.icr.csum_engine[csum_unit], regs.dprsrreg.inp.ipp_m.i_csum.engine,
875 csum_unit, visited_e, pov_map_e, full_checksum_unit[EGRESS][csum_unit], pov[EGRESS]);
876 write_jbay_full_checksum_invert_config(
877 regs.dprsrreg.inp.icr.scratch, regs.dprsrreg.inp.icr.scratch2,
878 regs.dprsrreg.inp.ipp.scratch, csum_unit, full_checksum_unit[EGRESS][csum_unit]);
879 }
880
881 output_jbay_field_dictionary(lineno[INGRESS], regs.dprsrreg.inp.icr.ingr,
882 regs.dprsrreg.inp.ipp.main_i.pov.phvs, pov[INGRESS],
883 dictionary[INGRESS]);
884 json::map field_dictionary_alloc;
885 json::vector fde_entries_i;
886 json::vector fde_entries_e;
887 json::vector fde_entries;
888 json::vector fd_gress;
889 for (auto &rslice : regs.dprsrreg.ho_i) {
890 output_jbay_field_dictionary_slice(lineno[INGRESS], rslice.him.fd_compress.chunk,
891 rslice.hir.h.compress_clot_sel, pov[INGRESS],
892 dictionary[INGRESS], fd_gress, fde_entries, INGRESS);
893 field_dictionary_alloc["ingress"] = std::move(fd_gress);
894 fde_entries_i = std::move(fde_entries);
895 }
896 output_jbay_field_dictionary(lineno[EGRESS], regs.dprsrreg.inp.icr.egr,
897 regs.dprsrreg.inp.ipp.main_e.pov.phvs, pov[EGRESS],
898 dictionary[EGRESS]);
899 for (auto &rslice : regs.dprsrreg.ho_e) {
900 output_jbay_field_dictionary_slice(lineno[EGRESS], rslice.hem.fd_compress.chunk,
901 rslice.her.h.compress_clot_sel, pov[EGRESS],
902 dictionary[EGRESS], fd_gress, fde_entries, EGRESS);
903 field_dictionary_alloc["egress"] = std::move(fd_gress);
904 fde_entries_e = std::move(fde_entries);
905 }
906 if (Log::verbosity() > 0) {
907 auto json_dump = open_output("logs/field_dictionary.log");
908 *json_dump << &field_dictionary_alloc;
909 }
910 // Output deparser resources
911 report_resources_deparser_json(fde_entries_i, fde_entries_e);
912
913 if (Phv::use(INGRESS).intersects(Phv::use(EGRESS))) {
914 if (!options.match_compiler) {
915 error(lineno[INGRESS], "Registers used in both ingress and egress in pipeline: %s",
916 Phv::db_regset(Phv::use(INGRESS) & Phv::use(EGRESS)).c_str());
917 } else {
918 warning(lineno[INGRESS], "Registers used in both ingress and egress in pipeline: %s",
919 Phv::db_regset(Phv::use(INGRESS) & Phv::use(EGRESS)).c_str());
920 }
921 /* FIXME -- this only (sort-of) works because 'deparser' comes first in the alphabet,
922 * FIXME -- so is the first section to have its 'output' method run. Its a hack
923 * FIXME -- anyways to attempt to correct broken asm that should be an error */
924 Phv::unsetuse(INGRESS, phv_use[EGRESS]);
925 Phv::unsetuse(EGRESS, phv_use[INGRESS]);
926 }
927
928 check_jbay_ownership(phv_use);
929 regs.dprsrreg.inp.icr.i_phv8_grp.enable();
930 regs.dprsrreg.inp.icr.i_phv16_grp.enable();
931 regs.dprsrreg.inp.icr.i_phv32_grp.enable();
932 // regs.dprsrreg.inp.icr.scratch.enable();
933 regs.dprsrreg.inp.icr.i_phv8_grp.val = 0;
934 regs.dprsrreg.inp.icr.i_phv16_grp.val = 0;
935 regs.dprsrreg.inp.icr.i_phv32_grp.val = 0;
936 // regs.dprsrreg.inp.icr.scratch.value = 0;
937 setup_jbay_ownership(phv_use[INGRESS], regs.dprsrreg.inp.icr.i_phv8_grp.val,
938 regs.dprsrreg.inp.icr.i_phv16_grp.val,
939 regs.dprsrreg.inp.icr.i_phv32_grp.val);
940 regs.dprsrreg.inp.icr.e_phv8_grp.enable();
941 regs.dprsrreg.inp.icr.e_phv16_grp.enable();
942 regs.dprsrreg.inp.icr.e_phv32_grp.enable();
943 setup_jbay_ownership(phv_use[EGRESS], regs.dprsrreg.inp.icr.e_phv8_grp.val,
944 regs.dprsrreg.inp.icr.e_phv16_grp.val,
945 regs.dprsrreg.inp.icr.e_phv32_grp.val);
946
947 for (auto &intrin : intrinsics) intrin.type->setregs(regs, *this, intrin);
948
949 /* resubmit_mode specifies whether this pipe can perform a resubmit operation on
950 a packet. i.e. tell the IPB to resubmit a packet to the MAU pipeline for a second
951 time. If the compiler determines that no resubmit is possible, then it can set this
952 bit, which should lower latency in some circumstances.
953 0 = Resubmit is allowed. 1 = Resubmit is not allowed */
954 bool resubmit = false;
955 for (auto &digest : digests) {
956 if (digest.type->name == "resubmit" ||
957 digest.type->name == "resubmit_preserving_field_list") {
958 resubmit = true;
959 break;
960 }
961 }
962 if (resubmit)
963 regs.dprsrreg.inp.ipp.ingr.resubmit_mode.mode = 0;
964 else
965 regs.dprsrreg.inp.ipp.ingr.resubmit_mode.mode = 1;
966
967 for (auto &digest : digests) digest.type->setregs(regs, *this, digest);
968
969 /* Set learning digest mask for JBay */
970 for (auto &digest : digests) {
971 if (digest.type->name == "learning") {
972 regs.dprsrreg.inp.icr.lrnmask.enable();
973 for (auto &set : digest.layout) {
974 int id = set.first;
975 int len = regs.dprsrreg.inp.ipp.ingr.learn_tbl[id].len;
976 if (len == 0) continue; // Allow empty param list
977
978 // Fix for TF2LAB-37s:
979 // This fixes a hardware limitation where the container following
980 // the last PHV used cannot be the same non 8 bit container as the last entry.
981 // E.g. For len = 5, (active entries start at index 47)
982 // Used - PHV[47] ... PHV[43] = 0;
983 // Unused - PHV[42] ... PHV[0] = 0; // Defaults to 0
984 // This causes issues in hardware as container 0 is used.
985 // We fix by setting the default as 64 an 8 - bit container. It can be any
986 // other 8 bit container value.
987 // The hardware does not cause any issues for 8 bit conatiners.
988 for (int i = 47 - len; i >= 0; i--)
989 regs.dprsrreg.inp.ipp.ingr.learn_tbl[id].phvs[i] = 64;
990 // Fix for TF2LAB-37 end
991
992 // Create a bitvec of all phv masks stacked up next to each
993 // other in big-endian. 'setregs' above stacks the digest fields
994 // in a similar manner to setup the phvs per byte on learn_tbl
995 // regs. To illustrate with an example - tna_digest.p4 (since
996 // this is not clear based on reg descriptions);
997 //
998 // BFA Output:
999 //
1000 // learning:
1001 // select: { B1(0..2): B0(1) } # L[0..2]b:
1002 // ingress::ig_intr_md_for_dprsr.digest_type 0:
1003 // - B1(0..2) # L[0..2]b: ingress::ig_intr_md_for_dprsr.digest_type
1004 // - MW0 # ingress::hdr.ethernet.dst_addr.16-47
1005 // - MH1 # ingress::hdr.ethernet.dst_addr.0-15
1006 // - MH0(0..8) # L[0..8]b: ingress::ig_md.port
1007 // - MW1 # ingress::hdr.ethernet.src_addr.16-47
1008 // - MH2 # ingress::hdr.ethernet.src_addr.0-15
1009 //
1010 // PHV packing for digest,
1011 //
1012 // B1(7..0) | MW0 (31..24) | MW0(23..16) | MW0(15..8) |
1013 // MW0(7..0) | MH1 (15..8) | MH1(7..0) | MH0(16..8) |
1014 // MH0(7..0) | MW1 (31..24) | MW1(23..16) | MW1(15..8) |
1015 // MW1(7..0) | MH2 (15..8) | MH2(7..0) | ---------- |
1016 //
1017 // Learn Mask Regs for above digest
1018 // deparser.regs.dprsrreg.inp.icr.lrnmask[0].mask[11] = 4294967047 (0x07ffffff)
1019 // deparser.regs.dprsrreg.inp.icr.lrnmask[0].mask[10] = 4294967295 (0xffffff01)
1020 // deparser.regs.dprsrreg.inp.icr.lrnmask[0].mask[9] = 4278321151 (0xffffffff)
1021 // deparser.regs.dprsrreg.inp.icr.lrnmask[0].mask[8] = 4294967040 (0xffffff00)
1022
1023 bitvec lrnmask;
1024 int startBit = 0;
1025 int size = 0;
1026 for (auto p : set.second) {
1027 if (size > 0) lrnmask <<= p->reg.size;
1028 auto psliceSize = p.size();
1029 startBit = p.lobit();
1030 lrnmask.setrange(startBit, psliceSize);
1031 size += p->reg.size;
1032 }
1033 // Pad to a 32 bit word
1034 auto shift = (size % 32) ? (32 - (size % 32)) : 0;
1035 lrnmask <<= shift;
1036 int num_words = (size + 31) / 32;
1037 int quanta_index = 11;
1038 for (int index = num_words - 1; index >= 0; index--) {
1039 BUG_CHECK(quanta_index >= 0, "quanta_index < 0");
1040 unsigned word = lrnmask.getrange(index * 32, 32);
1041 regs.dprsrreg.inp.icr.lrnmask[id].mask[quanta_index--] = word;
1042 }
1043 }
1044 }
1045 }
1046
1047#define DISBALE_IF_NOT_SET(ISARRAY, ARRAY, REGS, DISABLE) \
1048 ISARRAY(for (auto &r : ARRAY)) if (!ISARRAY(r.) REGS.modified()) ISARRAY(r.) REGS.DISABLE = 1;
1049 JBAY_DISABLE_REGBITS(DISBALE_IF_NOT_SET)
1050
1051 if (options.condense_json) regs.disable_if_reset_value();
1052 if (error_count == 0 && options.gen_json)
1053 regs.emit_json(*open_output("regs.deparser.cfg.json"));
1054 TopLevel::regs<Target::JBay>()->reg_pipe.pardereg.dprsrreg.set("regs.deparser", &regs);
1055}
1056
1057#if 0
1058namespace {
1059static struct JbayChecksumReg : public Phv::Register {
1060 JbayChecksumReg(int unit) : Phv::Register("", Phv::Register::CHECKSUM, unit,
1061 unit+CONSTANTS_PHVID_JBAY_HIGH, 16) {
1062 snprintf(name, "csum%d", unit); }
1063 int deparser_id() const override { return uid; }
1064} jbay_checksum_units[8] = { {0}, {1}, {2}, {3}, {4}, {5}, {6}, {7} };
1065}
1066
1067template<> Phv::Slice Deparser::RefOrChksum::lookup<Target::JBay>() const {
1068 if (lo != hi || lo < 0 || lo >= Target::JBay::DEPARSER_CHECKSUM_UNITS) {
1069 error(lineno, "Invalid checksum unit number");
1070 return Phv::Slice(); }
1071 return Phv::Slice(tofino_checksum_units[lo], 0, 15);
1072}
1073#endif
1074
1075template <>
1076unsigned Deparser::FDEntry::Checksum::encode<Target::JBay>() {
1077 return CONSTANTS_PHVID_JBAY_HIGH + unit;
1078}
1079
1080template <>
1081unsigned Deparser::FDEntry::Constant::encode<Target::JBay>() {
1082 return CONSTANTS_PHVID_JBAY_LOW + Deparser::constant_idx(gress, val);
1083}
1084
1085template <>
1086void Deparser::gen_learn_quanta(Target::JBay::parser_regs &regs, json::vector &learn_quanta) {}
1087
1088template <>
1089void Deparser::process(Target::JBay *) {
1090 // Chip-specific code for process method
1091 // None for JBay
1092}
Definition bf-asm/phv.h:186
Definition bf-asm/phv.h:83
Definition backends/tofino/bf-asm/json.h:300
Definition backends/tofino/bf-asm/json.h:222
void process()
optionally process the data if not done during parsing
Definition tofino/bf-asm/deparser.cpp:654
Definition tofino/bf-asm/deparser.h:137
void warning(const char *format, Args &&...args)
Report a warning with the given message.
Definition lib/error.h:128
void error(const char *format, Args &&...args)
Report an error with the given message.
Definition lib/error.h:58
Definition tofino/bf-asm/deparser.cpp:34
Definition tofino/bf-asm/deparser.cpp:105
Definition tofino/bf-asm/deparser.cpp:53
Definition bf-asm/phv.h:45
Definition ubits.h:42
Definition ubits.h:82