Skip to main content

dfir_lang/graph/
flat_graph_builder.rs

1//! Build a flat graph from [`HfStatement`]s.
2
3use std::borrow::Cow;
4use std::collections::btree_map::Entry;
5use std::collections::{BTreeMap, BTreeSet};
6
7use itertools::Itertools;
8use proc_macro2::Span;
9use quote::ToTokens;
10use syn::spanned::Spanned;
11use syn::{Error, Ident, ItemUse};
12
13use crate::diagnostic::{Diagnostic, Diagnostics, Level};
14use crate::graph::meta_graph::ResolvedHandoffRef;
15use crate::graph::ops::next_iteration::NEXT_ITERATION;
16use crate::graph::ops::{FloType, Persistence, PortListSpec, RangeTrait};
17use crate::graph::{
18    DfirGraph, GraphEdgeId, GraphLoopId, GraphNode, GraphNodeId, HandoffKind, PortIndexValue,
19    graph_algorithms,
20};
21use crate::parse::{DfirCode, DfirStatement, Operator, Pipeline};
22use crate::pretty_span::PrettySpan;
23
24#[derive(Clone, Debug)]
25struct Ends {
26    inn: Option<(PortIndexValue, GraphDet)>,
27    out: Option<(PortIndexValue, GraphDet)>,
28}
29
30#[derive(Clone, Debug)]
31enum GraphDet {
32    Determined(GraphNodeId),
33    Undetermined(Ident),
34}
35
36/// Variable name info for each ident, see [`FlatGraphBuilder::varname_ends`].
37#[derive(Debug)]
38struct VarnameInfo {
39    /// What the variable name resolves to.
40    pub ends: Ends,
41    /// Set to true if the varname reference creates an illegal self-referential cycle.
42    pub illegal_cycle: bool,
43    /// Set to true once the in port is used. Used to track unused ports.
44    pub inn_used: bool,
45    /// Set to true once the out port is used. Used to track unused ports.
46    pub out_used: bool,
47}
48impl VarnameInfo {
49    pub fn new(ends: Ends) -> Self {
50        Self {
51            ends,
52            illegal_cycle: false,
53            inn_used: false,
54            out_used: false,
55        }
56    }
57}
58
59/// Wraper around [`DfirGraph`] to build a flat graph from AST code.
60#[derive(Debug, Default)]
61pub struct FlatGraphBuilder {
62    /// Spanned error/warning/etc diagnostics to emit.
63    diagnostics: Diagnostics,
64
65    /// [`DfirGraph`] being built.
66    flat_graph: DfirGraph,
67    /// Variable names, used as [`HfStatement::Named`] are added.
68    varname_ends: BTreeMap<Ident, VarnameInfo>,
69    /// Each (out -> inn) link inputted.
70    links: Vec<Ends>,
71
72    /// Use statements.
73    uses: Vec<ItemUse>,
74
75    /// If the flat graph is being loaded as a module, then two initial ModuleBoundary nodes are inserted into the graph. One
76    /// for the input into the module and one for the output out of the module.
77    module_boundary_nodes: Option<(GraphNodeId, GraphNodeId)>,
78}
79
80/// Output of [`FlatGraphBuilder::build`].
81pub struct FlatGraphBuilderOutput {
82    /// The flat DFIR graph.
83    pub flat_graph: DfirGraph,
84    /// Any `use` statements.
85    pub uses: Vec<ItemUse>,
86    /// Any emitted diagnostics (warnings, errors).
87    pub diagnostics: Diagnostics,
88}
89
90impl FlatGraphBuilder {
91    /// Create a new empty graph builder.
92    pub fn new() -> Self {
93        Default::default()
94    }
95
96    /// Convert the DFIR code AST into a graph builder.
97    pub fn from_dfir(input: DfirCode) -> Self {
98        let mut builder = Self::default();
99        builder.add_dfir(input, None, None);
100        builder
101    }
102
103    /// Build into an unpartitioned [`DfirGraph`], returning a struct containing the flat graph, any diagnostics, and
104    /// other outputs.
105    ///
106    /// If any diagnostics are errors, `Err` is returned and the underlying graph is lost.
107    pub fn build(mut self) -> Result<FlatGraphBuilderOutput, Diagnostics> {
108        self.finalize_connect_operator_links();
109        self.process_operator_errors();
110
111        if self.diagnostics.has_error() {
112            Err(self.diagnostics)
113        } else {
114            Ok(FlatGraphBuilderOutput {
115                flat_graph: self.flat_graph,
116                uses: self.uses,
117                diagnostics: self.diagnostics,
118            })
119        }
120    }
121
122    /// Adds all [`DfirStatement`]s within the [`DfirCode`] to this [`DfirGraph`].
123    ///
124    /// Optional configuration:
125    /// * In the given loop context `current_loop`.
126    /// * With the given operator tag `operator_tag`.
127    pub fn add_dfir(
128        &mut self,
129        dfir: DfirCode,
130        current_loop: Option<GraphLoopId>,
131        operator_tag: Option<&str>,
132    ) {
133        for stmt in dfir.statements {
134            self.add_statement_internal(stmt, current_loop, operator_tag);
135        }
136    }
137
138    /// Add a single [`DfirStatement`] line to this [`DfirGraph`] in the root context.
139    pub fn add_statement(&mut self, stmt: DfirStatement) {
140        self.add_statement_internal(stmt, None, None);
141    }
142
143    /// Add a single [`DfirStatement`] line to this [`DfirGraph`] with given configuration.
144    ///
145    /// Optional configuration:
146    /// * In the given loop context `current_loop`.
147    /// * With the given operator tag `operator_tag`.
148    fn add_statement_internal(
149        &mut self,
150        stmt: DfirStatement,
151        current_loop: Option<GraphLoopId>,
152        operator_tag: Option<&str>,
153    ) {
154        match stmt {
155            DfirStatement::Use(yuse) => {
156                self.uses.push(yuse);
157            }
158            DfirStatement::Named(named) => {
159                let stmt_span = named.span();
160                let ends = self.add_pipeline(
161                    named.pipeline,
162                    Some(&named.name),
163                    current_loop,
164                    operator_tag,
165                );
166                self.assign_varname_checked(named.name, stmt_span, ends);
167            }
168            DfirStatement::Pipeline(pipeline_stmt) => {
169                let ends =
170                    self.add_pipeline(pipeline_stmt.pipeline, None, current_loop, operator_tag);
171                Self::helper_check_unused_port(&mut self.diagnostics, &ends, true);
172                Self::helper_check_unused_port(&mut self.diagnostics, &ends, false);
173            }
174            DfirStatement::Loop(loop_statement) => {
175                let inner_loop = self.flat_graph.insert_loop(current_loop);
176                for stmt in loop_statement.statements {
177                    self.add_statement_internal(stmt, Some(inner_loop), operator_tag);
178                }
179            }
180        }
181    }
182
183    /// Programatically add an pipeline, optionally adding `pred_name` as a single predecessor and
184    /// assigning it all to `asgn_name`.
185    ///
186    /// In DFIR syntax, equivalent to [`Self::add_statement`] of (if all names are supplied):
187    /// ```text
188    /// #asgn_name = #pred_name -> #pipeline;
189    /// ```
190    ///
191    /// But with, optionally:
192    /// * A `current_loop` to put the operator in.
193    /// * An `operator_tag` to tag the operator with, for debugging/tracing.
194    pub fn append_assign_pipeline(
195        &mut self,
196        asgn_name: Option<&Ident>,
197        pred_name: Option<&Ident>,
198        pipeline: Pipeline,
199        current_loop: Option<GraphLoopId>,
200        operator_tag: Option<&str>,
201    ) {
202        let span = pipeline.span();
203        let mut ends = self.add_pipeline(pipeline, asgn_name, current_loop, operator_tag);
204
205        // Connect `pred_name` if supplied.
206        if let Some(pred_name) = pred_name {
207            if let Some(pred_varname_info) = self.varname_ends.get(pred_name) {
208                // Update ends for `asgn_name`.
209                ends = self.connect_ends(pred_varname_info.ends.clone(), ends);
210            } else {
211                self.diagnostics.push(Diagnostic::spanned(
212                    pred_name.span(),
213                    Level::Error,
214                    format!(
215                        "Cannot find referenced name `{}`; name was never assigned.",
216                        pred_name
217                    ),
218                ));
219            }
220        }
221
222        // Assign `asgn_name` if supplied.
223        if let Some(asgn_name) = asgn_name {
224            self.assign_varname_checked(asgn_name.clone(), span, ends);
225        }
226    }
227}
228
229/// Internal methods.
230impl FlatGraphBuilder {
231    /// Assign a variable name to a pipeline, checking for conflicts.
232    fn assign_varname_checked(&mut self, name: Ident, stmt_span: Span, ends: Ends) {
233        match self.varname_ends.entry(name) {
234            Entry::Vacant(vacant_entry) => {
235                vacant_entry.insert(VarnameInfo::new(ends));
236            }
237            Entry::Occupied(occupied_entry) => {
238                let prev_conflict = occupied_entry.key();
239                self.diagnostics.push(Diagnostic::spanned(
240                    prev_conflict.span(),
241                    Level::Error,
242                    format!(
243                        "Existing assignment to `{}` conflicts with later assignment: {} (1/2)",
244                        prev_conflict,
245                        PrettySpan(stmt_span),
246                    ),
247                ));
248                self.diagnostics.push(Diagnostic::spanned(
249                    stmt_span,
250                    Level::Error,
251                    format!(
252                        "Name assignment to `{}` conflicts with existing assignment: {} (2/2)",
253                        prev_conflict,
254                        PrettySpan(prev_conflict.span())
255                    ),
256                ));
257            }
258        }
259    }
260
261    /// Helper: Add a pipeline, i.e. `a -> b -> c`. Return the input and output [`Ends`] for it.
262    fn add_pipeline(
263        &mut self,
264        pipeline: Pipeline,
265        current_varname: Option<&Ident>,
266        current_loop: Option<GraphLoopId>,
267        operator_tag: Option<&str>,
268    ) -> Ends {
269        match pipeline {
270            Pipeline::Paren(ported_pipeline_paren) => {
271                let (inn_port, pipeline_paren, out_port) =
272                    PortIndexValue::from_ported(ported_pipeline_paren);
273                let og_ends = self.add_pipeline(
274                    *pipeline_paren.pipeline,
275                    current_varname,
276                    current_loop,
277                    operator_tag,
278                );
279                Self::helper_combine_ends(&mut self.diagnostics, og_ends, inn_port, out_port)
280            }
281            Pipeline::Name(pipeline_name) => {
282                let (inn_port, ident, out_port) = PortIndexValue::from_ported(pipeline_name);
283
284                // Mingwei: We could lookup non-forward references immediately, but easier to just
285                // have one consistent code path: `GraphDet::Undetermined`.
286                Ends {
287                    inn: Some((inn_port, GraphDet::Undetermined(ident.clone()))),
288                    out: Some((out_port, GraphDet::Undetermined(ident))),
289                }
290            }
291            Pipeline::ModuleBoundary(pipeline_name) => {
292                let Some((input_node, output_node)) = self.module_boundary_nodes else {
293                    self.diagnostics.push(
294                        Error::new(
295                            pipeline_name.span(),
296                            "`mod` is only usable inside of a module.",
297                        )
298                        .into(),
299                    );
300
301                    return Ends {
302                        inn: None,
303                        out: None,
304                    };
305                };
306
307                let (inn_port, _, out_port) = PortIndexValue::from_ported(pipeline_name);
308
309                Ends {
310                    inn: Some((inn_port, GraphDet::Determined(output_node))),
311                    out: Some((out_port, GraphDet::Determined(input_node))),
312                }
313            }
314            Pipeline::Link(pipeline_link) => {
315                // Add the nested LHS and RHS of this link.
316                let lhs_ends = self.add_pipeline(
317                    *pipeline_link.lhs,
318                    current_varname,
319                    current_loop,
320                    operator_tag,
321                );
322                let rhs_ends = self.add_pipeline(
323                    *pipeline_link.rhs,
324                    current_varname,
325                    current_loop,
326                    operator_tag,
327                );
328
329                self.connect_ends(lhs_ends, rhs_ends)
330            }
331            Pipeline::Operator(operator) => {
332                let op_span = Some(operator.span());
333                let (node_id, ends) =
334                    self.add_operator(current_varname, current_loop, operator, op_span);
335                if let Some(operator_tag) = operator_tag {
336                    self.flat_graph
337                        .set_operator_tag(node_id, operator_tag.to_owned());
338                }
339                ends
340            }
341        }
342    }
343
344    /// Connects two [`Ends`] together. Returns the outer [`Ends`] for the connection.
345    ///
346    /// Links the inner ends together by adding it to `self.links`.
347    fn connect_ends(&mut self, lhs_ends: Ends, rhs_ends: Ends) -> Ends {
348        // Outer (first and last) ends.
349        let outer_ends = Ends {
350            inn: lhs_ends.inn,
351            out: rhs_ends.out,
352        };
353        // Inner (link) ends.
354        let link_ends = Ends {
355            out: lhs_ends.out,
356            inn: rhs_ends.inn,
357        };
358        self.links.push(link_ends);
359        outer_ends
360    }
361
362    /// Adds an operator to the graph, returning its [`GraphNodeId`] the input and output [`Ends`] for it.
363    fn add_operator(
364        &mut self,
365        current_varname: Option<&Ident>,
366        current_loop: Option<GraphLoopId>,
367        operator: Operator,
368        op_span: Option<Span>,
369    ) -> (GraphNodeId, Ends) {
370        let node_id = self.flat_graph.insert_node(
371            GraphNode::Operator(operator),
372            current_varname.cloned(),
373            current_loop,
374        );
375        let ends = Ends {
376            inn: Some((
377                PortIndexValue::Elided(op_span),
378                GraphDet::Determined(node_id),
379            )),
380            out: Some((
381                PortIndexValue::Elided(op_span),
382                GraphDet::Determined(node_id),
383            )),
384        };
385        (node_id, ends)
386    }
387
388    /// Connects operator links as a final building step. Processes all the links stored in
389    /// `self.links` and actually puts them into the graph.
390    fn finalize_connect_operator_links(&mut self) {
391        // `->` edges
392        for Ends { out, inn } in std::mem::take(&mut self.links) {
393            let out_opt = Self::helper_resolve_name(
394                &mut self.varname_ends,
395                out,
396                false,
397                &mut self.diagnostics,
398            );
399            let inn_opt =
400                Self::helper_resolve_name(&mut self.varname_ends, inn, true, &mut self.diagnostics);
401            // `None` already have errors in `self.diagnostics`.
402            if let (Some((out_port, out_node)), Some((inn_port, inn_node))) = (out_opt, inn_opt) {
403                let _ = self.finalize_connect_operators(out_port, out_node, inn_port, inn_node);
404            }
405        }
406
407        // Resolve the singleton references for each node.
408        for node_id in self.flat_graph.node_ids().collect::<Vec<_>>() {
409            if let GraphNode::Operator(operator) = self.flat_graph.node(node_id) {
410                let singletons_referenced = operator
411                    .singletons_referenced
412                    .iter()
413                    .map(|singleton_ref| {
414                        let port_det = self
415                            .varname_ends
416                            .get(&singleton_ref.ident)
417                            .filter(|varname_info| !varname_info.illegal_cycle)
418                            .map(|varname_info| &varname_info.ends)
419                            .and_then(|ends| ends.out.as_ref())
420                            .cloned();
421                        let resolved_node_id = if let Some((_port, node_id)) =
422                            Self::helper_resolve_name(
423                                &mut self.varname_ends,
424                                port_det,
425                                false,
426                                &mut self.diagnostics,
427                            ) {
428                            Some(node_id)
429                        } else {
430                            self.diagnostics.push(Diagnostic::spanned(
431                                singleton_ref.span(),
432                                Level::Error,
433                                format!(
434                                    "Cannot find referenced name `{}`; name was never assigned.",
435                                    singleton_ref.ident
436                                ),
437                            ));
438                            None
439                        };
440                        ResolvedHandoffRef {
441                            node_id: resolved_node_id,
442                            is_mut: singleton_ref.token_mut.is_some(),
443                            access_group: singleton_ref.access_group.as_ref().and_then(
444                                |(_, lit_int)| match lit_int.base10_parse::<u32>() {
445                                    Ok(n) => Some(n),
446                                    Err(e) => {
447                                        self.diagnostics.push(Diagnostic::spanned(
448                                            lit_int.span(),
449                                            Level::Error,
450                                            format!("Access group is not a valid `u32`: {}", e),
451                                        ));
452                                        None
453                                    }
454                                },
455                            ),
456                        }
457                    })
458                    .collect();
459
460                self.flat_graph
461                    .set_node_handoff_references(node_id, singletons_referenced);
462            }
463        }
464    }
465
466    /// Recursively resolve a variable name. For handling forward (and backward) name references
467    /// after all names have been assigned.
468    /// Returns `None` if the name is not resolvable, either because it was never assigned or
469    /// because it contains a self-referential cycle.
470    ///
471    /// `is_in` set to `true` means the _input_ side will be returned. `false` means the _output_ side will be returned.
472    fn helper_resolve_name(
473        varname_ends: &mut BTreeMap<Ident, VarnameInfo>,
474        mut port_det: Option<(PortIndexValue, GraphDet)>,
475        is_in: bool,
476        diagnostics: &mut Diagnostics,
477    ) -> Option<(PortIndexValue, GraphNodeId)> {
478        const BACKUP_RECURSION_LIMIT: usize = 1024;
479
480        let mut names = Vec::new();
481        for _ in 0..BACKUP_RECURSION_LIMIT {
482            match port_det? {
483                (port, GraphDet::Determined(node_id)) => {
484                    return Some((port, node_id));
485                }
486                (port, GraphDet::Undetermined(ident)) => {
487                    let Some(varname_info) = varname_ends.get_mut(&ident) else {
488                        diagnostics.push(Diagnostic::spanned(
489                            ident.span(),
490                            Level::Error,
491                            format!("Cannot find name `{}`; name was never assigned.", ident),
492                        ));
493                        return None;
494                    };
495                    // Check for a self-referential cycle.
496                    let cycle_found = names.contains(&ident);
497                    if !cycle_found {
498                        names.push(ident);
499                    };
500                    if cycle_found || varname_info.illegal_cycle {
501                        let len = names.len();
502                        for (i, name) in names.into_iter().enumerate() {
503                            diagnostics.push(Diagnostic::spanned(
504                                name.span(),
505                                Level::Error,
506                                format!(
507                                    "Name `{}` forms or references an illegal self-referential cycle ({}/{}).",
508                                    name,
509                                    i + 1,
510                                    len
511                                ),
512                            ));
513                            // Set value as `Err(())` to trigger `name_ends_result.is_err()`
514                            // diagnostics above if the name is referenced in the future.
515                            varname_ends.get_mut(&name).unwrap().illegal_cycle = true;
516                        }
517                        return None;
518                    }
519
520                    // No self-cycle.
521                    let prev = if is_in {
522                        varname_info.inn_used = true;
523                        &varname_info.ends.inn
524                    } else {
525                        varname_info.out_used = true;
526                        &varname_info.ends.out
527                    };
528                    port_det = Self::helper_combine_end(
529                        diagnostics,
530                        prev.clone(),
531                        port,
532                        if is_in { "input" } else { "output" },
533                    );
534                }
535            }
536        }
537        diagnostics.push(Diagnostic::spanned(
538            Span::call_site(),
539            Level::Error,
540            format!(
541                "Reached the recursion limit {} while resolving names. This is either a dfir bug or you have an absurdly long chain of names: `{}`.",
542                BACKUP_RECURSION_LIMIT,
543                names.iter().map(ToString::to_string).collect::<Vec<_>>().join("` -> `"),
544            )
545        ));
546        None
547    }
548
549    /// Connect two operators on the given port indexes.
550    fn finalize_connect_operators(
551        &mut self,
552        src_port: PortIndexValue,
553        src: GraphNodeId,
554        dst_port: PortIndexValue,
555        dst: GraphNodeId,
556    ) -> GraphEdgeId {
557        {
558            /// Helper to emit conflicts when a port is used twice.
559            fn emit_conflict(
560                inout: &str,
561                old: &PortIndexValue,
562                new: &PortIndexValue,
563                diagnostics: &mut Diagnostics,
564            ) {
565                // TODO(mingwei): Use `MultiSpan` once `proc_macro2` supports it.
566                diagnostics.push(Diagnostic::spanned(
567                    old.span(),
568                    Level::Error,
569                    format!(
570                        "{} connection conflicts with below ({}) (1/2)",
571                        inout,
572                        PrettySpan(new.span()),
573                    ),
574                ));
575                diagnostics.push(Diagnostic::spanned(
576                    new.span(),
577                    Level::Error,
578                    format!(
579                        "{} connection conflicts with above ({}) (2/2)",
580                        inout,
581                        PrettySpan(old.span()),
582                    ),
583                ));
584            }
585
586            // Handle src's successor port conflicts:
587            if src_port.is_specified() {
588                for conflicting_port in self
589                    .flat_graph
590                    .node_successor_edges(src)
591                    .map(|edge_id| self.flat_graph.edge_ports(edge_id).0)
592                    .filter(|&port| port == &src_port)
593                {
594                    emit_conflict("Output", conflicting_port, &src_port, &mut self.diagnostics);
595                }
596            }
597
598            // Handle dst's predecessor port conflicts:
599            if dst_port.is_specified() {
600                for conflicting_port in self
601                    .flat_graph
602                    .node_predecessor_edges(dst)
603                    .map(|edge_id| self.flat_graph.edge_ports(edge_id).1)
604                    .filter(|&port| port == &dst_port)
605                {
606                    emit_conflict("Input", conflicting_port, &dst_port, &mut self.diagnostics);
607                }
608            }
609        }
610        self.flat_graph.insert_edge(src, src_port, dst, dst_port)
611    }
612
613    /// Process operators and emit operator errors.
614    fn process_operator_errors(&mut self) {
615        self.make_operator_instances();
616        self.check_operator_errors();
617        self.warn_unused_port_indexing();
618        self.check_loop_errors();
619    }
620
621    /// Make `OperatorInstance`s for each operator node.
622    fn make_operator_instances(&mut self) {
623        self.flat_graph
624            .insert_node_op_insts_all(&mut self.diagnostics);
625    }
626
627    /// Validates that operators have valid number of inputs, outputs, & arguments.
628    /// Adds errors (and warnings) to `self.diagnostics`.
629    fn check_operator_errors(&mut self) {
630        /// Returns true if an error was found.
631        fn emit_arity_error(
632            op_span: Span,
633            op_name: &str,
634            is_in: bool,
635            is_hard: bool,
636            degree: usize,
637            range: &dyn RangeTrait<usize>,
638            diagnostics: &mut Diagnostics,
639        ) -> bool {
640            let message = format!(
641                "`{}` {} have {} {}, actually has {}.",
642                op_name,
643                if is_hard { "must" } else { "should" },
644                range.human_string(),
645                if is_in { "input(s)" } else { "output(s)" },
646                degree,
647            );
648            let out_of_range = !range.contains(&degree);
649            if out_of_range {
650                diagnostics.push(Diagnostic::spanned(
651                    op_span,
652                    if is_hard {
653                        Level::Error
654                    } else {
655                        Level::Warning
656                    },
657                    message,
658                ));
659            }
660            out_of_range
661        }
662
663        for (node_id, node) in self.flat_graph.nodes() {
664            match node {
665                GraphNode::Operator(operator) => {
666                    let Some(op_inst) = self.flat_graph.node_op_inst(node_id) else {
667                        // Error already emitted by `insert_node_op_insts_all`.
668                        continue;
669                    };
670                    let op_constraints = op_inst.op_constraints;
671                    let op_name = operator.name_string();
672
673                    // Check number of args
674                    if op_constraints.num_args != operator.args.len() {
675                        self.diagnostics.push(Diagnostic::spanned(
676                            operator.span(),
677                            Level::Error,
678                            format!(
679                                "`{}` expects {} argument(s), received {}.",
680                                op_name,
681                                op_constraints.num_args,
682                                operator.args.len()
683                            ),
684                        ));
685                    }
686
687                    // Check input/output (port) arity
688                    let inn_degree = self.flat_graph.node_degree_in(node_id);
689                    let _ = emit_arity_error(
690                        operator.span(),
691                        &op_name,
692                        true,
693                        true,
694                        inn_degree,
695                        op_constraints.hard_range_inn,
696                        &mut self.diagnostics,
697                    ) || emit_arity_error(
698                        operator.span(),
699                        &op_name,
700                        true,
701                        false,
702                        inn_degree,
703                        op_constraints.soft_range_inn,
704                        &mut self.diagnostics,
705                    );
706
707                    let out_degree = self.flat_graph.node_degree_out(node_id);
708                    let _ = emit_arity_error(
709                        operator.span(),
710                        &op_name,
711                        false,
712                        true,
713                        out_degree,
714                        op_constraints.hard_range_out,
715                        &mut self.diagnostics,
716                    ) || emit_arity_error(
717                        operator.span(),
718                        &op_name,
719                        false,
720                        false,
721                        out_degree,
722                        op_constraints.soft_range_out,
723                        &mut self.diagnostics,
724                    );
725
726                    fn emit_port_error<'a>(
727                        op_span: Span,
728                        op_name: &str,
729                        expected_ports_fn: Option<fn() -> PortListSpec>,
730                        actual_ports_iter: impl Iterator<Item = &'a PortIndexValue>,
731                        input_output: &'static str,
732                        diagnostics: &mut Diagnostics,
733                    ) {
734                        let Some(expected_ports_fn) = expected_ports_fn else {
735                            return;
736                        };
737                        let PortListSpec::Fixed(expected_ports) = (expected_ports_fn)() else {
738                            // Separate check inside of `demux` special case.
739                            return;
740                        };
741                        let expected_ports: Vec<_> = expected_ports.into_iter().collect();
742
743                        // Reject unexpected ports.
744                        let ports: BTreeSet<_> = actual_ports_iter
745                            // Use `inspect` before collecting into `BTreeSet` to ensure we get
746                            // both error messages on duplicated port names.
747                            .inspect(|actual_port_iv| {
748                                // For each actually used port `port_index_value`, check if it is expected.
749                                let is_expected = expected_ports.iter().any(|port_index| {
750                                    actual_port_iv == &&port_index.clone().into()
751                                });
752                                // If it is not expected, emit a diagnostic error.
753                                if !is_expected {
754                                    diagnostics.push(Diagnostic::spanned(
755                                        actual_port_iv.span(),
756                                        Level::Error,
757                                        format!(
758                                            "`{}` received unexpected {} port: {}. Expected one of: `{}`",
759                                            op_name,
760                                            input_output,
761                                            actual_port_iv.as_error_message_string(),
762                                            Itertools::intersperse(
763                                                expected_ports
764                                                    .iter()
765                                                    .map(|port| port.to_token_stream().to_string())
766                                                    .map(Cow::Owned),
767                                                Cow::Borrowed("`, `"),
768                                            ).collect::<String>()
769                                        ),
770                                    ))
771                                }
772                            })
773                            .collect();
774
775                        // List missing expected ports.
776                        let missing: Vec<_> = expected_ports
777                            .into_iter()
778                            .filter_map(|expected_port| {
779                                let tokens = expected_port.to_token_stream();
780                                if !ports.contains(&&expected_port.into()) {
781                                    Some(tokens)
782                                } else {
783                                    None
784                                }
785                            })
786                            .collect();
787                        if !missing.is_empty() {
788                            diagnostics.push(Diagnostic::spanned(
789                                op_span,
790                                Level::Error,
791                                format!(
792                                    "`{}` missing expected {} port(s): `{}`.",
793                                    op_name,
794                                    input_output,
795                                    Itertools::intersperse(
796                                        missing.into_iter().map(|port| Cow::Owned(
797                                            port.to_token_stream().to_string()
798                                        )),
799                                        Cow::Borrowed("`, `")
800                                    )
801                                    .collect::<String>()
802                                ),
803                            ));
804                        }
805                    }
806
807                    emit_port_error(
808                        operator.span(),
809                        &op_name,
810                        op_constraints.ports_inn,
811                        self.flat_graph
812                            .node_predecessor_edges(node_id)
813                            .map(|edge_id| self.flat_graph.edge_ports(edge_id).1),
814                        "input",
815                        &mut self.diagnostics,
816                    );
817                    emit_port_error(
818                        operator.span(),
819                        &op_name,
820                        op_constraints.ports_out,
821                        self.flat_graph
822                            .node_successor_edges(node_id)
823                            .map(|edge_id| self.flat_graph.edge_ports(edge_id).0),
824                        "output",
825                        &mut self.diagnostics,
826                    );
827
828                    // Check that singleton references actually reference valid targets.
829                    {
830                        let singletons_resolved = self.flat_graph.node_handoff_references(node_id);
831                        for (resolved_ref, singleton_ref_token) in singletons_resolved
832                            .iter()
833                            .zip_eq(&*operator.singletons_referenced)
834                        {
835                            let Some(singleton_node_id) = resolved_ref.node_id else {
836                                // Error already emitted by `connect_operator_links`, "Cannot find referenced name...".
837                                continue;
838                            };
839                            // Handoff nodes are valid reference targets.
840                            if matches!(
841                                self.flat_graph.node(singleton_node_id),
842                                GraphNode::Handoff { .. },
843                            ) {
844                                continue;
845                            }
846                            let Some(ref_op_inst) = self.flat_graph.node_op_inst(singleton_node_id)
847                            else {
848                                // Error already emitted by `insert_node_op_insts_all`.
849                                continue;
850                            };
851                            let ref_op_constraints = ref_op_inst.op_constraints;
852                            self.diagnostics.push(Diagnostic::spanned(
853                                singleton_ref_token.span(),
854                                Level::Error,
855                                format!(
856                                    "Cannot reference operator `{}`. Use `singleton()`, `optional()`, or `handoff()` to create a referenceable name.",
857                                    ref_op_constraints.name,
858                                ),
859                            ));
860                        }
861                    }
862                }
863                GraphNode::Handoff { kind, src_span, .. } => {
864                    // Validate arity: handoff must have exactly 1 input and 1 output.
865                    let op_name = match kind {
866                        HandoffKind::Vec => "handoff",
867                        HandoffKind::Singleton => "singleton",
868                        HandoffKind::Optional => "optional",
869                    };
870                    let inn_degree = self.flat_graph.node_degree_in(node_id);
871                    emit_arity_error(
872                        *src_span,
873                        op_name,
874                        true,
875                        true,
876                        inn_degree,
877                        &(1..=1),
878                        &mut self.diagnostics,
879                    );
880                    let out_degree = self.flat_graph.node_degree_out(node_id);
881                    emit_arity_error(
882                        *src_span,
883                        op_name,
884                        false,
885                        true,
886                        out_degree,
887                        &(0..=1), // Handoffs may be no-output, for use only by ref.
888                        &mut self.diagnostics,
889                    );
890                }
891                GraphNode::ModuleBoundary { .. } => {
892                    // Module boundaries don't require any checking.
893                }
894            }
895        }
896
897        // Validate singleton references.
898        // All singleton references must have unambiguous group orderings.
899        // Rules:
900        // 1. If any singleton reference has an explicit group number, they all must have one.
901        // 2. Every `#mut` must be in its own group.
902        {
903            let refs_by_target = self.flat_graph.node_handoff_reference_groups();
904            // For each singleton, check the groups.
905            for (_singleton, groups) in refs_by_target {
906                // Rule 1. If any singleton reference has an explicit group number, they all must have one.
907                if 1 < groups.len()
908                    && let Some(ungrouped) = groups.get(&None)
909                {
910                    for &(_src_node, r, span) in ungrouped {
911                        self.diagnostics.push(Diagnostic::spanned(
912                            span,
913                            Level::Error,
914                            format!(
915                                "Must use an explicit group `#{{N}}{}` to reference a singleton when other references use explicit groups.",
916                                if r.is_mut { " mut" } else { "" },
917                            ),
918                        ));
919                    }
920                }
921                // Rule 2. Every `#mut` must be in its own group.
922                for (group_idx, group) in groups {
923                    if 1 < group.len() && group.iter().any(|(_, r, _)| r.is_mut) {
924                        let group_str = if let Some(n) = group_idx {
925                            format!("`#{{{}}}`", n)
926                        } else {
927                            "<default>".to_owned()
928                        };
929                        for (_src_node, _mut_r, span) in
930                            group.into_iter().filter(|(_, r, _)| r.is_mut)
931                        {
932                            self.diagnostics.push(Diagnostic::spanned(
933                                span,
934                                Level::Error,
935                                format!("Mutable singleton references must be the only one in their access group, but group {} has multiple.", group_str),
936                            ));
937                        }
938                    }
939                }
940            }
941        }
942    }
943
944    /// Warns about unused port indexing referenced in [`Self::varname_ends`].
945    /// https://github.com/hydro-project/hydro/issues/1108
946    fn warn_unused_port_indexing(&mut self) {
947        for (_ident, varname_info) in self.varname_ends.iter() {
948            if !varname_info.inn_used {
949                Self::helper_check_unused_port(&mut self.diagnostics, &varname_info.ends, true);
950            }
951            if !varname_info.out_used {
952                Self::helper_check_unused_port(&mut self.diagnostics, &varname_info.ends, false);
953            }
954        }
955    }
956
957    /// Emit a warning to `diagnostics` for an unused port (i.e. if the port is specified for
958    /// reason).
959    fn helper_check_unused_port(diagnostics: &mut Diagnostics, ends: &Ends, is_in: bool) {
960        let port = if is_in { &ends.inn } else { &ends.out };
961        if let Some((port, _)) = port
962            && port.is_specified()
963        {
964            diagnostics.push(Diagnostic::spanned(
965                port.span(),
966                Level::Error,
967                format!(
968                    "{} port index is unused. (Is the port on the correct side?)",
969                    if is_in { "Input" } else { "Output" },
970                ),
971            ));
972        }
973    }
974
975    /// Helper function.
976    /// Combine the port indexing information for indexing wrapped around a name.
977    /// Because the name may already have indexing, this may introduce double indexing (i.e. `[0][0]my_var[0][0]`)
978    /// which would be an error.
979    fn helper_combine_ends(
980        diagnostics: &mut Diagnostics,
981        og_ends: Ends,
982        inn_port: PortIndexValue,
983        out_port: PortIndexValue,
984    ) -> Ends {
985        Ends {
986            inn: Self::helper_combine_end(diagnostics, og_ends.inn, inn_port, "input"),
987            out: Self::helper_combine_end(diagnostics, og_ends.out, out_port, "output"),
988        }
989    }
990
991    /// Helper function.
992    /// Combine the port indexing info for one input or output.
993    fn helper_combine_end(
994        diagnostics: &mut Diagnostics,
995        og: Option<(PortIndexValue, GraphDet)>,
996        other: PortIndexValue,
997        input_output: &'static str,
998    ) -> Option<(PortIndexValue, GraphDet)> {
999        // TODO(mingwei): minification pass over this code?
1000
1001        let other_span = other.span();
1002
1003        let (og_port, og_node) = og?;
1004        match og_port.combine(other) {
1005            Ok(combined_port) => Some((combined_port, og_node)),
1006            Err(og_port) => {
1007                // TODO(mingwei): Use `MultiSpan` once `proc_macro2` supports it.
1008                diagnostics.push(Diagnostic::spanned(
1009                    og_port.span(),
1010                    Level::Error,
1011                    format!(
1012                        "Indexing on {} is overwritten below ({}) (1/2).",
1013                        input_output,
1014                        PrettySpan(other_span),
1015                    ),
1016                ));
1017                diagnostics.push(Diagnostic::spanned(
1018                    other_span,
1019                    Level::Error,
1020                    format!(
1021                        "Cannot index on already-indexed {}, previously indexed above ({}) (2/2).",
1022                        input_output,
1023                        PrettySpan(og_port.span()),
1024                    ),
1025                ));
1026                // When errored, just use original and ignore OTHER port to minimize
1027                // noisy/extra diagnostics.
1028                Some((og_port, og_node))
1029            }
1030        }
1031    }
1032
1033    /// Check for loop context-related errors.
1034    fn check_loop_errors(&mut self) {
1035        for (node_id, node) in self.flat_graph.nodes() {
1036            let Some(op_inst) = self.flat_graph.node_op_inst(node_id) else {
1037                continue;
1038            };
1039            let loop_opt = self.flat_graph.node_loop(node_id);
1040
1041            // Ensure no `'tick` or `'static` persistences are used WITHIN a loop context.
1042            // Ensure no `'loop` persistences are used OUTSIDE a loop context.
1043            for persistence in &op_inst.generics.persistence_args {
1044                let span = op_inst.generics.generic_args.span();
1045                match (loop_opt, persistence) {
1046                    (Some(_loop_id), p @ (Persistence::Tick | Persistence::Static)) => {
1047                        self.diagnostics.push(Diagnostic::spanned(
1048                            span,
1049                            Level::Error,
1050                            format!(
1051                                "Operator uses `'{}` persistence, which is not allowed within a `loop {{ ... }}` context.",
1052                                p.to_str_lowercase(),
1053                            ),
1054                        ));
1055                    }
1056                    (None, p @ (Persistence::None | Persistence::Loop)) => {
1057                        self.diagnostics.push(Diagnostic::spanned(
1058                            span,
1059                            Level::Error,
1060                            format!(
1061                                "Operator uses `'{}` persistence, but is not within a `loop {{ ... }}` context.",
1062                                p.to_str_lowercase(),
1063                            ),
1064                        ));
1065                    }
1066                    _ => {}
1067                }
1068            }
1069
1070            // All inputs must be declared in the root block.
1071            if let (Some(_loop_id), Some(FloType::Source)) =
1072                (loop_opt, op_inst.op_constraints.flo_type)
1073            {
1074                self.diagnostics.push(Diagnostic::spanned(
1075                    node.span(),
1076                    Level::Error,
1077                    format!(
1078                        "Source operator `{}(...)` must be at the root level, not within any `loop {{ ... }}` contexts.",
1079                        op_inst.op_constraints.name
1080                    )
1081                ));
1082            }
1083        }
1084
1085        // Check windowing and un-windowing operators, for loop inputs and outputs respectively.
1086        for (_edge_id, (pred_id, node_id)) in self.flat_graph.edges() {
1087            let Some(op_inst) = self.flat_graph.node_op_inst(node_id) else {
1088                continue;
1089            };
1090            let flo_type = &op_inst.op_constraints.flo_type;
1091
1092            let pred_loop_id = self.flat_graph.node_loop(pred_id);
1093            let loop_id = self.flat_graph.node_loop(node_id);
1094
1095            let span = self.flat_graph.node(node_id).span();
1096
1097            let (is_input, is_output) = {
1098                let parent_pred_loop_id =
1099                    pred_loop_id.and_then(|lid| self.flat_graph.loop_parent(lid));
1100                let parent_loop_id = loop_id.and_then(|lid| self.flat_graph.loop_parent(lid));
1101                let is_same = pred_loop_id == loop_id;
1102                let is_input = !is_same && parent_loop_id == pred_loop_id;
1103                let is_output = !is_same && parent_pred_loop_id == loop_id;
1104                if !(is_input || is_output || is_same) {
1105                    self.diagnostics.push(Diagnostic::spanned(
1106                        span,
1107                        Level::Error,
1108                        "Operator input edge may not cross multiple loop contexts.",
1109                    ));
1110                    continue;
1111                }
1112                (is_input, is_output)
1113            };
1114
1115            match flo_type {
1116                None => {
1117                    if is_input {
1118                        self.diagnostics.push(Diagnostic::spanned(
1119                            span,
1120                            Level::Error,
1121                            format!(
1122                                "Operator `{}(...)` entering a loop context must be a windowing operator, but is not.",
1123                                op_inst.op_constraints.name
1124                            )
1125                        ));
1126                    }
1127                    if is_output {
1128                        self.diagnostics.push(Diagnostic::spanned(
1129                            span,
1130                            Level::Error,
1131                            format!(
1132                                "Operator `{}(...)` exiting a loop context must be an un-windowing operator, but is not.",
1133                                op_inst.op_constraints.name
1134                            )
1135                        ));
1136                    }
1137                }
1138                Some(FloType::Windowing) => {
1139                    if !is_input {
1140                        self.diagnostics.push(Diagnostic::spanned(
1141                            span,
1142                            Level::Error,
1143                            format!(
1144                                "Windowing operator `{}(...)` must be the first input operator into a `loop {{ ... }} context.",
1145                                op_inst.op_constraints.name
1146                            )
1147                        ));
1148                    }
1149                }
1150                Some(FloType::Unwindowing) => {
1151                    if !is_output {
1152                        self.diagnostics.push(Diagnostic::spanned(
1153                            span,
1154                            Level::Error,
1155                            format!(
1156                                "Un-windowing operator `{}(...)` must be the first output operator after exiting a `loop {{ ... }} context.",
1157                                op_inst.op_constraints.name
1158                            )
1159                        ));
1160                    }
1161                }
1162                Some(FloType::NextIteration) => {
1163                    // Must be in a loop context.
1164                    if loop_id.is_none() {
1165                        self.diagnostics.push(Diagnostic::spanned(
1166                            span,
1167                            Level::Error,
1168                            format!(
1169                                "Operator `{}(...)` must be within a `loop {{ ... }}` context.",
1170                                op_inst.op_constraints.name
1171                            ),
1172                        ));
1173                    }
1174                }
1175                Some(FloType::Source) => {
1176                    // Handled above.
1177                }
1178            }
1179        }
1180
1181        // Must be a DAG (excluding `next_iteration()` operators).
1182        // TODO(mingwei): Nested loop blocks should count as a single node.
1183        // But this doesn't cause any correctness issues because the nested loops are also DAGs.
1184        for (loop_id, loop_nodes) in self.flat_graph.loops() {
1185            // Filter out `next_iteration()` operators.
1186            let filter_next_iteration = |&node_id: &GraphNodeId| {
1187                self.flat_graph
1188                    .node_op_inst(node_id)
1189                    .map(|op_inst| Some(FloType::NextIteration) != op_inst.op_constraints.flo_type)
1190                    .unwrap_or(true)
1191            };
1192
1193            let topo_sort_result = graph_algorithms::topo_sort(
1194                loop_nodes.iter().copied().filter(filter_next_iteration),
1195                |dst| {
1196                    self.flat_graph
1197                        .node_predecessor_nodes(dst)
1198                        .filter(|&src| Some(loop_id) == self.flat_graph.node_loop(src))
1199                        .filter(filter_next_iteration)
1200                },
1201            );
1202            if let Err(cycle) = topo_sort_result {
1203                let len = cycle.len();
1204                for (i, node_id) in cycle.into_iter().enumerate() {
1205                    let span = self.flat_graph.node(node_id).span();
1206                    self.diagnostics.push(Diagnostic::spanned(
1207                        span,
1208                        Level::Error,
1209                        format!(
1210                            "Operator forms an illegal cycle within a `loop {{ ... }}` block. Use `{}()` to pass data across loop iterations. ({}/{})",
1211                            NEXT_ITERATION.name,
1212                            i + 1,
1213                            len,
1214                        ),
1215                    ));
1216                }
1217            }
1218        }
1219    }
1220}