Skip to main content

saf_core/
air.rs

1//! AIR (Analysis Intermediate Representation) — SAF's canonical, frontend-agnostic IR.
2//!
3//! All analysis passes operate on AIR, never on frontend-specific representations.
4//! This ensures that adding a new frontend does not require changes to analysis
5//! algorithms (NFR-EXT-001, NFR-EXT-002).
6//!
7//! See FR-AIR-001 through FR-AIR-007 for full requirements.
8
9use std::collections::BTreeMap;
10use std::fmt;
11
12use serde::{Deserialize, Serialize};
13
14use crate::ids::{BlockId, FunctionId, InstId, ModuleId, ObjId, TypeId, ValueId};
15use crate::span::{SourceFile, Span, Symbol};
16
17// =============================================================================
18// Constants
19// =============================================================================
20
21/// Constant values in AIR.
22///
23/// Represents compile-time constant values that can appear as instruction operands.
24#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
25#[serde(tag = "kind", rename_all = "snake_case")]
26pub enum Constant {
27    /// Integer constant (signed, up to 128 bits).
28    Int {
29        /// The integer value (stored as i64 for JSON compatibility; use String variant for larger).
30        value: i64,
31        /// Bit width (e.g., 8, 16, 32, 64, 128).
32        bits: u8,
33    },
34
35    /// Large integer constant that doesn't fit in i64 (stored as string).
36    BigInt {
37        /// The integer value as a string.
38        value: String,
39        /// Bit width.
40        bits: u8,
41    },
42
43    /// Floating point constant.
44    ///
45    /// All floats are stored as `f64` regardless of source precision.
46    /// This is lossless for f32 values because every IEEE 754 binary32
47    /// value is exactly representable as a binary64 value. The `bits`
48    /// field records the original source precision (32 or 64) for
49    /// informational purposes, but analyses should treat the `value`
50    /// field as the canonical representation and should not round-trip
51    /// it back to f32.
52    Float {
53        /// The floating point value (stored as f64; lossless for f32 sources).
54        value: f64,
55        /// Original bit width (32 for f32, 64 for f64).
56        bits: u8,
57    },
58
59    /// String constant (UTF-8).
60    String {
61        /// The string value.
62        value: String,
63    },
64
65    /// Null pointer constant.
66    Null,
67
68    /// Undefined/poison value.
69    Undef,
70
71    /// Zero initializer (for aggregates).
72    ZeroInit,
73
74    /// Aggregate constant (struct, array).
75    Aggregate {
76        /// The aggregate elements.
77        elements: Vec<Constant>,
78    },
79
80    /// Global reference (pointer to a global variable).
81    ///
82    /// Used for global pointer initializers like `@p = global ptr @target`.
83    /// The `ValueId` identifies the target global's address.
84    GlobalRef(crate::ids::ValueId),
85}
86
87impl Constant {
88    /// Create an integer constant.
89    #[must_use]
90    pub fn int(value: i64, bits: u8) -> Self {
91        Self::Int { value, bits }
92    }
93
94    /// Create a big integer constant (for values that don't fit in i64).
95    #[must_use]
96    pub fn big_int(value: i128, bits: u8) -> Self {
97        Self::BigInt {
98            value: value.to_string(),
99            bits,
100        }
101    }
102
103    /// Create a 32-bit integer constant.
104    #[must_use]
105    pub const fn i32(value: i32) -> Self {
106        Self::Int {
107            value: value as i64,
108            bits: 32,
109        }
110    }
111
112    /// Create a 64-bit integer constant.
113    #[must_use]
114    pub const fn i64(value: i64) -> Self {
115        Self::Int { value, bits: 64 }
116    }
117
118    /// Create a floating point constant.
119    #[must_use]
120    pub const fn float(value: f64, bits: u8) -> Self {
121        Self::Float { value, bits }
122    }
123
124    /// Create a string constant.
125    #[must_use]
126    pub fn string(value: impl Into<String>) -> Self {
127        Self::String {
128            value: value.into(),
129        }
130    }
131}
132
133// =============================================================================
134// Types
135// =============================================================================
136
137/// Analysis-oriented type — describes memory layout, not source semantics.
138///
139/// Every language that compiles to machine code must resolve to concrete
140/// memory layouts. This enum captures that lowering. Frontends map source
141/// types to `AirType`; analyses use it for precision without coupling to
142/// any language.
143#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
144#[serde(tag = "kind", rename_all = "snake_case")]
145pub enum AirType {
146    /// Pointer to memory (language-agnostic, like LLVM opaque ptr).
147    Pointer,
148
149    /// Non-null reference type (for Java, Rust, Kotlin).
150    /// Distinct from `Pointer` in that it may carry nullability information.
151    Reference {
152        /// Whether this reference can be null.
153        nullable: bool,
154    },
155
156    /// SIMD vector type.
157    Vector {
158        /// Element type.
159        element: TypeId,
160        /// Number of lanes (elements).
161        lanes: u32,
162    },
163
164    /// Fixed-width integer.
165    Integer {
166        /// Bit width (e.g., 1, 8, 16, 32, 64, 128).
167        bits: u16,
168    },
169
170    /// Floating-point.
171    Float {
172        /// Bit width (32 for f32, 64 for f64).
173        bits: u16,
174    },
175
176    /// Fixed-size array.
177    Array {
178        /// Element type.
179        element: TypeId,
180        /// Element count. `None` for variable-length arrays.
181        #[serde(default, skip_serializing_if = "Option::is_none")]
182        count: Option<u64>,
183    },
184
185    /// Struct/record with known field layout.
186    Struct {
187        /// Fields in declaration order.
188        fields: Vec<StructField>,
189        /// Total size in bytes (including tail padding).
190        total_size: u64,
191    },
192
193    /// Function signature.
194    Function {
195        /// Parameter types.
196        params: Vec<TypeId>,
197        /// Return type.
198        return_type: TypeId,
199    },
200
201    /// Void (no value / zero-sized).
202    Void,
203
204    /// Type is unknown or cannot be expressed.
205    /// Analyses fall back to conservative behavior for `Opaque` types.
206    Opaque,
207}
208
209/// A single field in a struct layout.
210#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
211pub struct StructField {
212    /// Field type.
213    pub field_type: TypeId,
214    /// Byte offset from struct start. `None` if layout unavailable.
215    #[serde(default, skip_serializing_if = "Option::is_none")]
216    pub byte_offset: Option<u64>,
217    /// Size in bytes. `None` if layout unavailable.
218    #[serde(default, skip_serializing_if = "Option::is_none")]
219    pub byte_size: Option<u64>,
220    /// Optional field name from debug info or source language metadata.
221    #[serde(default, skip_serializing_if = "Option::is_none")]
222    pub name: Option<String>,
223}
224
225// =============================================================================
226// Field paths (for GEP)
227// =============================================================================
228
229/// A single step in a field path.
230#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
231#[serde(tag = "kind", rename_all = "snake_case")]
232pub enum FieldStep {
233    /// Array/pointer index (dynamic).
234    Index,
235    /// Struct field index (compile-time constant).
236    Field { index: u32 },
237}
238
239/// Path through nested structures for GEP operations.
240#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
241pub struct FieldPath {
242    /// Sequence of field steps.
243    pub steps: Vec<FieldStep>,
244}
245
246impl FieldPath {
247    /// Create an empty field path.
248    #[must_use]
249    pub const fn empty() -> Self {
250        Self { steps: Vec::new() }
251    }
252
253    /// Create a field path with a single field index.
254    #[must_use]
255    pub fn field(index: u32) -> Self {
256        Self {
257            steps: vec![FieldStep::Field { index }],
258        }
259    }
260
261    /// Create a field path with a single array index.
262    #[must_use]
263    pub fn index() -> Self {
264        Self {
265            steps: vec![FieldStep::Index],
266        }
267    }
268}
269
270// =============================================================================
271// Cast and binary operation kinds
272// =============================================================================
273
274/// Kind of cast operation.
275#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
276#[serde(rename_all = "snake_case")]
277pub enum CastKind {
278    /// Truncate to smaller integer type.
279    Trunc,
280    /// Zero-extend to larger integer type.
281    ZExt,
282    /// Sign-extend to larger integer type.
283    SExt,
284    /// Float to unsigned integer.
285    FPToUI,
286    /// Float to signed integer.
287    FPToSI,
288    /// Unsigned integer to float.
289    UIToFP,
290    /// Signed integer to float.
291    SIToFP,
292    /// Float truncation.
293    FPTrunc,
294    /// Float extension.
295    FPExt,
296    /// Pointer to integer.
297    PtrToInt,
298    /// Integer to pointer.
299    IntToPtr,
300    /// Bitcast (reinterpret bits).
301    Bitcast,
302    /// Address space cast (pointer conversion).
303    AddrSpaceCast,
304}
305
306/// Kind of binary operation.
307#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
308#[serde(rename_all = "snake_case")]
309pub enum BinaryOp {
310    // Integer arithmetic
311    /// Integer addition.
312    Add,
313    /// Integer subtraction.
314    Sub,
315    /// Integer multiplication.
316    Mul,
317    /// Unsigned integer division.
318    UDiv,
319    /// Signed integer division.
320    SDiv,
321    /// Unsigned integer remainder.
322    URem,
323    /// Signed integer remainder.
324    SRem,
325
326    // Floating point arithmetic
327    /// Floating point addition.
328    FAdd,
329    /// Floating point subtraction.
330    FSub,
331    /// Floating point multiplication.
332    FMul,
333    /// Floating point division.
334    FDiv,
335    /// Floating point remainder.
336    FRem,
337
338    // Bitwise operations
339    /// Bitwise AND.
340    And,
341    /// Bitwise OR.
342    Or,
343    /// Bitwise XOR.
344    Xor,
345    /// Shift left.
346    Shl,
347    /// Logical shift right.
348    LShr,
349    /// Arithmetic shift right.
350    AShr,
351
352    // Comparisons
353    /// Integer equality.
354    ICmpEq,
355    /// Integer inequality.
356    ICmpNe,
357    /// Unsigned greater than.
358    ICmpUgt,
359    /// Unsigned greater or equal.
360    ICmpUge,
361    /// Unsigned less than.
362    ICmpUlt,
363    /// Unsigned less or equal.
364    ICmpUle,
365    /// Signed greater than.
366    ICmpSgt,
367    /// Signed greater or equal.
368    ICmpSge,
369    /// Signed less than.
370    ICmpSlt,
371    /// Signed less or equal.
372    ICmpSle,
373
374    // Float comparisons (ordered)
375    /// Ordered equal.
376    FCmpOeq,
377    /// Ordered not equal.
378    FCmpOne,
379    /// Ordered greater than.
380    FCmpOgt,
381    /// Ordered greater or equal.
382    FCmpOge,
383    /// Ordered less than.
384    FCmpOlt,
385    /// Ordered less or equal.
386    FCmpOle,
387}
388
389// =============================================================================
390// Heap allocation kinds
391// =============================================================================
392
393/// Kind of heap allocation operation.
394#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
395#[serde(rename_all = "snake_case")]
396pub enum HeapAllocKind {
397    /// `malloc`-family allocation.
398    Malloc,
399    /// C++ `new` operator.
400    New,
401    /// `calloc` (zero-initialized).
402    Calloc,
403    /// `realloc` (resize).
404    Realloc,
405    /// Other/custom allocator (name preserved as string).
406    Other(String),
407}
408
409impl HeapAllocKind {
410    /// Get the kind as a string slice.
411    #[must_use]
412    pub fn as_str(&self) -> &str {
413        match self {
414            Self::Malloc => "malloc",
415            Self::New => "new",
416            Self::Calloc => "calloc",
417            Self::Realloc => "realloc",
418            Self::Other(s) => s.as_str(),
419        }
420    }
421}
422
423impl fmt::Display for HeapAllocKind {
424    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
425        f.write_str(self.as_str())
426    }
427}
428
429impl From<&str> for HeapAllocKind {
430    fn from(s: &str) -> Self {
431        match s {
432            "malloc" => Self::Malloc,
433            "new"
434            | "operator_new"
435            | "operator_new_array"
436            | "operator_new_nothrow"
437            | "operator_new_array_nothrow" => Self::New,
438            "calloc" => Self::Calloc,
439            "realloc" => Self::Realloc,
440            other => Self::Other(other.to_string()),
441        }
442    }
443}
444
445// =============================================================================
446// Operations (flat enum, pattern-match friendly)
447// =============================================================================
448
449/// AIR operation kind.
450///
451/// This is a flat enum for easy pattern matching. Additional metadata
452/// is carried in optional fields where needed.
453#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
454#[serde(tag = "op", rename_all = "snake_case")]
455pub enum Operation {
456    // =========================================================================
457    // Allocation operations
458    // =========================================================================
459    /// Stack allocation. Result is a pointer to allocated memory.
460    ///
461    /// The `size_bytes` field contains the allocation size when known statically.
462    /// For fixed-size allocas like `int x[10]`, this is the total size in bytes.
463    /// For variable-length arrays (VLAs), this is `None`.
464    Alloca {
465        /// Size of allocation in bytes, if known statically.
466        #[serde(default, skip_serializing_if = "Option::is_none")]
467        size_bytes: Option<u64>,
468    },
469
470    /// Global variable/constant reference.
471    Global {
472        /// The global object being referenced.
473        obj: ObjId,
474    },
475
476    /// Heap allocation (malloc, new, etc.).
477    HeapAlloc {
478        /// Kind of heap allocation (e.g., `Malloc`, `New`, `Calloc`).
479        kind: HeapAllocKind,
480    },
481
482    // =========================================================================
483    // Memory access operations
484    // =========================================================================
485    /// Load from memory. Operand[0] is the pointer.
486    Load,
487
488    /// Store to memory. Operand[0] is value, operand[1] is pointer.
489    Store,
490
491    /// Get element pointer. Computes address offset.
492    /// Operand[0] is base pointer, remaining operands are indices.
493    Gep {
494        /// Field path for struct/array traversal.
495        #[serde(default, skip_serializing_if = "FieldPath::is_default")]
496        field_path: FieldPath,
497    },
498
499    /// Memory copy. Operand[0] is dest, operand[1] is src, operand[2] is size.
500    Memcpy,
501
502    /// Memory set. Operand[0] is dest, operand[1] is value, operand[2] is size.
503    Memset,
504
505    // =========================================================================
506    // Control flow operations
507    // =========================================================================
508    /// Unconditional branch.
509    Br {
510        /// Target block.
511        target: BlockId,
512    },
513
514    /// Conditional branch. Operand[0] is the condition.
515    CondBr {
516        /// Target if condition is true.
517        then_target: BlockId,
518        /// Target if condition is false.
519        else_target: BlockId,
520    },
521
522    /// Switch statement. Operand[0] is the discriminant.
523    Switch {
524        /// Default target if no case matches.
525        default: BlockId,
526        /// (value, target) pairs.
527        cases: Vec<(i64, BlockId)>,
528    },
529
530    /// Return from function. Operand[0] is return value if present.
531    Ret,
532
533    /// Unreachable code marker.
534    Unreachable,
535
536    // =========================================================================
537    // SSA operations
538    // =========================================================================
539    /// Phi node for SSA. Merges values from predecessor blocks.
540    Phi {
541        /// (predecessor block, value from that block) pairs.
542        incoming: Vec<(BlockId, ValueId)>,
543    },
544
545    /// Select (ternary). Operand[0] is condition, operand[1] is true value, operand[2] is false value.
546    Select,
547
548    // =========================================================================
549    // Call operations
550    // =========================================================================
551    /// Direct function call. Operands are arguments.
552    CallDirect {
553        /// The function being called.
554        callee: FunctionId,
555    },
556
557    /// Indirect function call through pointer. The last operand is the function
558    /// pointer; all preceding operands are arguments (callee-LAST convention).
559    CallIndirect {
560        /// Expected function signature at this call site, if known.
561        /// Used for type-based call graph pruning.
562        #[serde(default, skip_serializing_if = "Option::is_none")]
563        expected_signature: Option<TypeId>,
564    },
565
566    // =========================================================================
567    // Transform operations
568    // =========================================================================
569    /// Type cast. Operand[0] is the value to cast.
570    Cast {
571        /// Kind of cast.
572        kind: CastKind,
573        /// Target type bit-width (e.g., 8 for `trunc i64 to i8`).
574        /// `None` for backward compatibility with older AIR JSON.
575        #[serde(default, skip_serializing_if = "Option::is_none")]
576        target_bits: Option<u8>,
577    },
578
579    /// Binary operation. Operand[0] and operand[1] are the operands.
580    BinaryOp {
581        /// Kind of binary operation.
582        kind: BinaryOp,
583    },
584
585    // =========================================================================
586    // Miscellaneous
587    // =========================================================================
588    /// Copy/move value (identity operation, used for clarity in IR).
589    Copy,
590
591    /// Freeze undefined value to determinate but unknown value.
592    Freeze,
593}
594
595impl FieldPath {
596    /// Check if the field path is the default (empty).
597    fn is_default(&self) -> bool {
598        self.steps.is_empty()
599    }
600}
601
602// =============================================================================
603// Values
604// =============================================================================
605
606/// A value in AIR — either an instruction result, parameter, or constant.
607#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
608#[serde(tag = "kind", rename_all = "snake_case")]
609pub enum Value {
610    /// Result of an instruction.
611    InstResult {
612        /// The instruction that produces this value.
613        inst: InstId,
614    },
615
616    /// Function parameter.
617    Param {
618        /// The function this parameter belongs to.
619        func: FunctionId,
620        /// Parameter index (0-based).
621        index: u32,
622    },
623
624    /// Global variable/constant address.
625    Global {
626        /// The global object.
627        id: ObjId,
628    },
629
630    /// Inline constant value.
631    Const(Constant),
632}
633
634// =============================================================================
635// Instructions
636// =============================================================================
637
638/// An AIR instruction.
639#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
640pub struct Instruction {
641    /// Unique instruction identifier.
642    pub id: InstId,
643
644    /// The operation performed.
645    #[serde(flatten)]
646    pub op: Operation,
647
648    /// Input values (operands).
649    #[serde(default, skip_serializing_if = "Vec::is_empty")]
650    pub operands: Vec<ValueId>,
651
652    /// Output value (if the instruction produces one).
653    #[serde(default, skip_serializing_if = "Option::is_none")]
654    pub dst: Option<ValueId>,
655
656    /// Optional source location.
657    #[serde(default, skip_serializing_if = "Option::is_none")]
658    pub span: Option<Span>,
659
660    /// Optional symbol/name information.
661    #[serde(default, skip_serializing_if = "Option::is_none")]
662    pub symbol: Option<Symbol>,
663
664    /// Type of the result value, if known.
665    /// Populated by frontends with type info (e.g., LLVM).
666    #[serde(default, skip_serializing_if = "Option::is_none")]
667    pub result_type: Option<TypeId>,
668
669    /// Frontend-specific extension data.
670    /// Analyses that don't understand these extensions should ignore them.
671    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
672    pub extensions: BTreeMap<String, serde_json::Value>,
673}
674
675impl Instruction {
676    /// Create a new instruction.
677    #[must_use]
678    pub fn new(id: InstId, op: Operation) -> Self {
679        Self {
680            id,
681            op,
682            operands: Vec::new(),
683            dst: None,
684            span: None,
685            symbol: None,
686            result_type: None,
687            extensions: BTreeMap::new(),
688        }
689    }
690
691    /// Add operands to the instruction.
692    #[must_use]
693    pub fn with_operands(mut self, operands: Vec<ValueId>) -> Self {
694        self.operands = operands;
695        self
696    }
697
698    /// Set the destination value.
699    #[must_use]
700    pub fn with_dst(mut self, dst: ValueId) -> Self {
701        self.dst = Some(dst);
702        self
703    }
704
705    /// Set the source span.
706    #[must_use]
707    pub fn with_span(mut self, span: Span) -> Self {
708        self.span = Some(span);
709        self
710    }
711
712    /// Set the symbol.
713    #[must_use]
714    pub fn with_symbol(mut self, symbol: Symbol) -> Self {
715        self.symbol = Some(symbol);
716        self
717    }
718
719    /// Check if this is a terminator instruction.
720    #[must_use]
721    pub fn is_terminator(&self) -> bool {
722        matches!(
723            self.op,
724            Operation::Br { .. }
725                | Operation::CondBr { .. }
726                | Operation::Switch { .. }
727                | Operation::Ret
728                | Operation::Unreachable
729        )
730    }
731}
732
733// =============================================================================
734// Basic blocks
735// =============================================================================
736
737/// A basic block — a sequence of instructions with single entry/exit.
738#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
739pub struct AirBlock {
740    /// Unique block identifier.
741    pub id: BlockId,
742
743    /// Optional block label/name.
744    #[serde(default, skip_serializing_if = "Option::is_none")]
745    pub label: Option<String>,
746
747    /// Instructions in this block (last must be terminator).
748    pub instructions: Vec<Instruction>,
749}
750
751impl AirBlock {
752    /// Create a new block.
753    #[must_use]
754    pub fn new(id: BlockId) -> Self {
755        Self {
756            id,
757            label: None,
758            instructions: Vec::new(),
759        }
760    }
761
762    /// Create a block with a label.
763    #[must_use]
764    pub fn with_label(id: BlockId, label: impl Into<String>) -> Self {
765        Self {
766            id,
767            label: Some(label.into()),
768            instructions: Vec::new(),
769        }
770    }
771
772    /// Get the terminator instruction if present.
773    #[must_use]
774    pub fn terminator(&self) -> Option<&Instruction> {
775        self.instructions.last().filter(|i| i.is_terminator())
776    }
777}
778
779// =============================================================================
780// Function parameters
781// =============================================================================
782
783/// A function parameter.
784#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
785pub struct AirParam {
786    /// Value ID for this parameter.
787    pub id: ValueId,
788
789    /// Optional parameter name.
790    #[serde(default, skip_serializing_if = "Option::is_none")]
791    pub name: Option<String>,
792
793    /// Parameter index (0-based).
794    pub index: u32,
795
796    /// Type of this parameter, if known.
797    #[serde(default, skip_serializing_if = "Option::is_none")]
798    pub param_type: Option<TypeId>,
799}
800
801impl AirParam {
802    /// Create a new parameter.
803    #[must_use]
804    pub fn new(id: ValueId, index: u32) -> Self {
805        Self {
806            id,
807            name: None,
808            index,
809            param_type: None,
810        }
811    }
812
813    /// Create a named parameter.
814    #[must_use]
815    pub fn named(id: ValueId, index: u32, name: impl Into<String>) -> Self {
816        Self {
817            id,
818            name: Some(name.into()),
819            index,
820            param_type: None,
821        }
822    }
823}
824
825// =============================================================================
826// Functions
827// =============================================================================
828
829/// An AIR function.
830#[derive(Debug, Clone, Serialize)]
831pub struct AirFunction {
832    /// Unique function identifier.
833    pub id: FunctionId,
834
835    /// Function name.
836    pub name: String,
837
838    /// Function parameters.
839    #[serde(default, skip_serializing_if = "Vec::is_empty")]
840    pub params: Vec<AirParam>,
841
842    /// Basic blocks (first is entry block unless `entry_block` is specified).
843    pub blocks: Vec<AirBlock>,
844
845    /// Entry block ID (defaults to first block if not specified).
846    #[serde(default, skip_serializing_if = "Option::is_none")]
847    pub entry_block: Option<BlockId>,
848
849    /// Whether this function is a declaration (no body).
850    #[serde(default, skip_serializing_if = "std::ops::Not::not")]
851    pub is_declaration: bool,
852
853    /// Optional source location.
854    #[serde(default, skip_serializing_if = "Option::is_none")]
855    pub span: Option<Span>,
856
857    /// Optional symbol information.
858    #[serde(default, skip_serializing_if = "Option::is_none")]
859    pub symbol: Option<Symbol>,
860
861    /// Pre-computed index: `BlockId` -> position in `blocks` vec.
862    /// Skipped during serialization; rebuilt on deserialization.
863    #[serde(skip)]
864    pub block_index: BTreeMap<BlockId, usize>,
865}
866
867impl PartialEq for AirFunction {
868    fn eq(&self, other: &Self) -> bool {
869        self.id == other.id
870            && self.name == other.name
871            && self.params == other.params
872            && self.blocks == other.blocks
873            && self.entry_block == other.entry_block
874            && self.is_declaration == other.is_declaration
875            && self.span == other.span
876            && self.symbol == other.symbol
877    }
878}
879
880impl<'de> Deserialize<'de> for AirFunction {
881    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
882    where
883        D: serde::Deserializer<'de>,
884    {
885        /// Helper struct for deserialization (all fields except index).
886        #[derive(Deserialize)]
887        struct AirFunctionData {
888            id: FunctionId,
889            name: String,
890            #[serde(default)]
891            params: Vec<AirParam>,
892            blocks: Vec<AirBlock>,
893            #[serde(default)]
894            entry_block: Option<BlockId>,
895            #[serde(default)]
896            is_declaration: bool,
897            #[serde(default)]
898            span: Option<Span>,
899            #[serde(default)]
900            symbol: Option<Symbol>,
901        }
902
903        let data = AirFunctionData::deserialize(deserializer)?;
904        let block_index = data
905            .blocks
906            .iter()
907            .enumerate()
908            .map(|(i, b)| (b.id, i))
909            .collect();
910        Ok(AirFunction {
911            id: data.id,
912            name: data.name,
913            params: data.params,
914            blocks: data.blocks,
915            entry_block: data.entry_block,
916            is_declaration: data.is_declaration,
917            span: data.span,
918            symbol: data.symbol,
919            block_index,
920        })
921    }
922}
923
924impl AirFunction {
925    /// Create a new function.
926    #[must_use]
927    pub fn new(id: FunctionId, name: impl Into<String>) -> Self {
928        Self {
929            id,
930            name: name.into(),
931            params: Vec::new(),
932            blocks: Vec::new(),
933            entry_block: None,
934            is_declaration: false,
935            span: None,
936            symbol: None,
937            block_index: BTreeMap::new(),
938        }
939    }
940
941    /// Rebuild the `block_index` from the current `blocks` vec.
942    ///
943    /// Call this after bulk-modifying `blocks` directly (e.g., assigning
944    /// a whole `Vec<AirBlock>`). Not needed when using `add_block`.
945    pub fn rebuild_block_index(&mut self) {
946        self.block_index = self
947            .blocks
948            .iter()
949            .enumerate()
950            .map(|(i, b)| (b.id, i))
951            .collect();
952    }
953
954    /// Look up a block by `BlockId` using the pre-computed index (O(log n)).
955    #[must_use]
956    pub fn block(&self, id: BlockId) -> Option<&AirBlock> {
957        self.block_index
958            .get(&id)
959            .and_then(|&idx| self.blocks.get(idx))
960    }
961
962    /// Add a block, updating the index.
963    pub fn add_block(&mut self, block: AirBlock) {
964        let idx = self.blocks.len();
965        self.block_index.insert(block.id, idx);
966        self.blocks.push(block);
967    }
968
969    /// Get the entry block.
970    #[must_use]
971    pub fn entry(&self) -> Option<&AirBlock> {
972        if let Some(entry_id) = self.entry_block {
973            self.block(entry_id)
974        } else {
975            self.blocks.first()
976        }
977    }
978}
979
980// =============================================================================
981// Globals
982// =============================================================================
983
984/// A global variable or constant.
985#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
986pub struct AirGlobal {
987    /// Value ID for this global's address.
988    pub id: ValueId,
989
990    /// Object ID for the allocation.
991    pub obj: ObjId,
992
993    /// Global name.
994    pub name: String,
995
996    /// Initial value (if any).
997    #[serde(default, skip_serializing_if = "Option::is_none")]
998    pub init: Option<Constant>,
999
1000    /// Whether this is a constant (immutable).
1001    #[serde(default, skip_serializing_if = "std::ops::Not::not")]
1002    pub is_constant: bool,
1003
1004    /// Optional source location.
1005    #[serde(default, skip_serializing_if = "Option::is_none")]
1006    pub span: Option<Span>,
1007
1008    /// Type of the global's value (the global itself is always a pointer).
1009    #[serde(default, skip_serializing_if = "Option::is_none")]
1010    pub value_type: Option<TypeId>,
1011}
1012
1013impl AirGlobal {
1014    /// Create a new global variable.
1015    #[must_use]
1016    pub fn new(id: ValueId, obj: ObjId, name: impl Into<String>) -> Self {
1017        Self {
1018            id,
1019            obj,
1020            name: name.into(),
1021            init: None,
1022            is_constant: false,
1023            span: None,
1024            value_type: None,
1025        }
1026    }
1027}
1028
1029// =============================================================================
1030// Type hierarchy (for CHA / virtual dispatch resolution)
1031// =============================================================================
1032
1033/// A virtual method slot in a class's vtable.
1034#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
1035pub struct VirtualMethodSlot {
1036    /// Slot index in the vtable.
1037    pub index: usize,
1038    /// Function ID that occupies this slot, or `None` for pure virtual.
1039    #[serde(default, skip_serializing_if = "Option::is_none")]
1040    pub function: Option<FunctionId>,
1041}
1042
1043/// Type hierarchy entry for a class/struct with virtual methods.
1044///
1045/// Frontend-agnostic: LLVM frontend extracts from `_ZTV*`/`_ZTI*` globals;
1046/// future frontends populate from their own metadata.
1047#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
1048pub struct TypeHierarchyEntry {
1049    /// Demangled class/struct name.
1050    pub type_name: String,
1051    /// Direct base class names.
1052    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1053    pub base_types: Vec<String>,
1054    /// Virtual method slots from the vtable.
1055    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1056    pub virtual_methods: Vec<VirtualMethodSlot>,
1057}
1058
1059// =============================================================================
1060// Module
1061// =============================================================================
1062
1063/// Default pointer width (64-bit / 8 bytes).
1064fn default_pointer_width() -> u32 {
1065    8
1066}
1067
1068/// An AIR module — the top-level container for a compilation unit.
1069#[derive(Debug, Clone, Serialize)]
1070pub struct AirModule {
1071    /// Unique module identifier.
1072    pub id: ModuleId,
1073
1074    /// Optional module name.
1075    #[serde(default, skip_serializing_if = "Option::is_none")]
1076    pub name: Option<String>,
1077
1078    /// Functions in this module.
1079    pub functions: Vec<AirFunction>,
1080
1081    /// Global variables/constants.
1082    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1083    pub globals: Vec<AirGlobal>,
1084
1085    /// Source files referenced by spans.
1086    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1087    pub source_files: Vec<SourceFile>,
1088
1089    /// Type hierarchy entries for CHA (Class Hierarchy Analysis).
1090    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1091    pub type_hierarchy: Vec<TypeHierarchyEntry>,
1092
1093    /// Inline constants: maps `ValueId` to constant value.
1094    ///
1095    /// When a constant (like `i32 0`) appears as an instruction operand,
1096    /// the frontend records the mapping here so analyses can look up
1097    /// the constant value by `ValueId`.
1098    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
1099    pub constants: BTreeMap<ValueId, Constant>,
1100
1101    /// Type table: maps `TypeId` to `AirType` definition.
1102    ///
1103    /// Frontends intern types here during ingestion. Analyses look up
1104    /// types by `TypeId` for precision improvements. Deterministic
1105    /// ordering via `BTreeMap` ensures reproducible JSON output.
1106    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
1107    pub types: BTreeMap<TypeId, AirType>,
1108
1109    /// Target pointer width in bytes (4 for 32-bit, 8 for 64-bit).
1110    /// Used by layout computation. Defaults to 8.
1111    #[serde(default = "default_pointer_width")]
1112    pub target_pointer_width: u32,
1113
1114    /// Pre-computed index: `FunctionId` -> position in `functions` vec.
1115    /// Skipped during serialization; rebuilt on deserialization.
1116    #[serde(skip)]
1117    pub function_index: BTreeMap<FunctionId, usize>,
1118
1119    /// Pre-computed index: function name -> position in `functions` vec.
1120    /// Skipped during serialization; rebuilt on deserialization.
1121    #[serde(skip)]
1122    pub name_index: BTreeMap<String, usize>,
1123}
1124
1125impl PartialEq for AirModule {
1126    fn eq(&self, other: &Self) -> bool {
1127        self.id == other.id
1128            && self.name == other.name
1129            && self.functions == other.functions
1130            && self.globals == other.globals
1131            && self.source_files == other.source_files
1132            && self.type_hierarchy == other.type_hierarchy
1133            && self.constants == other.constants
1134            && self.types == other.types
1135            && self.target_pointer_width == other.target_pointer_width
1136    }
1137}
1138
1139impl<'de> Deserialize<'de> for AirModule {
1140    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1141    where
1142        D: serde::Deserializer<'de>,
1143    {
1144        /// Helper struct for deserialization (all fields except index).
1145        #[derive(Deserialize)]
1146        struct AirModuleData {
1147            id: ModuleId,
1148            #[serde(default)]
1149            name: Option<String>,
1150            functions: Vec<AirFunction>,
1151            #[serde(default)]
1152            globals: Vec<AirGlobal>,
1153            #[serde(default)]
1154            source_files: Vec<SourceFile>,
1155            #[serde(default)]
1156            type_hierarchy: Vec<TypeHierarchyEntry>,
1157            #[serde(default)]
1158            constants: BTreeMap<ValueId, Constant>,
1159            #[serde(default)]
1160            types: BTreeMap<TypeId, AirType>,
1161            #[serde(default = "default_pointer_width")]
1162            target_pointer_width: u32,
1163        }
1164
1165        let data = AirModuleData::deserialize(deserializer)?;
1166        let function_index = data
1167            .functions
1168            .iter()
1169            .enumerate()
1170            .map(|(i, f)| (f.id, i))
1171            .collect();
1172        let name_index = data
1173            .functions
1174            .iter()
1175            .enumerate()
1176            .map(|(i, f)| (f.name.clone(), i))
1177            .collect();
1178        Ok(AirModule {
1179            id: data.id,
1180            name: data.name,
1181            functions: data.functions,
1182            globals: data.globals,
1183            source_files: data.source_files,
1184            type_hierarchy: data.type_hierarchy,
1185            constants: data.constants,
1186            types: data.types,
1187            target_pointer_width: data.target_pointer_width,
1188            function_index,
1189            name_index,
1190        })
1191    }
1192}
1193
1194impl AirModule {
1195    /// Create a new module.
1196    #[must_use]
1197    pub fn new(id: ModuleId) -> Self {
1198        Self {
1199            id,
1200            name: None,
1201            functions: Vec::new(),
1202            globals: Vec::new(),
1203            source_files: Vec::new(),
1204            type_hierarchy: Vec::new(),
1205            constants: BTreeMap::new(),
1206            types: BTreeMap::new(),
1207            target_pointer_width: 8,
1208            function_index: BTreeMap::new(),
1209            name_index: BTreeMap::new(),
1210        }
1211    }
1212
1213    /// Rebuild the `function_index` from the current `functions` vec.
1214    ///
1215    /// Call this after bulk-modifying `functions` directly (e.g., assigning
1216    /// a whole `Vec<AirFunction>`). Not needed when using `add_function`.
1217    pub fn rebuild_function_index(&mut self) {
1218        self.function_index = self
1219            .functions
1220            .iter()
1221            .enumerate()
1222            .map(|(i, f)| (f.id, i))
1223            .collect();
1224        self.name_index = self
1225            .functions
1226            .iter()
1227            .enumerate()
1228            .map(|(i, f)| (f.name.clone(), i))
1229            .collect();
1230    }
1231
1232    /// Add a function, updating the index.
1233    pub fn add_function(&mut self, func: AirFunction) {
1234        let idx = self.functions.len();
1235        self.function_index.insert(func.id, idx);
1236        self.name_index.insert(func.name.clone(), idx);
1237        self.functions.push(func);
1238    }
1239
1240    /// Find a function by ID using the pre-computed index (O(log n)).
1241    ///
1242    /// Falls back to linear scan if the index is empty (e.g., when the
1243    /// module was constructed via struct literal without rebuilding).
1244    #[must_use]
1245    pub fn function(&self, id: FunctionId) -> Option<&AirFunction> {
1246        if self.function_index.is_empty() {
1247            self.functions.iter().find(|f| f.id == id)
1248        } else {
1249            self.function_index
1250                .get(&id)
1251                .and_then(|&idx| self.functions.get(idx))
1252        }
1253    }
1254
1255    /// Find a function by ID (mutable) using the pre-computed index (O(log n)).
1256    ///
1257    /// Falls back to linear scan if the index is empty.
1258    #[must_use]
1259    pub fn function_mut(&mut self, id: FunctionId) -> Option<&mut AirFunction> {
1260        if self.function_index.is_empty() {
1261            self.functions.iter_mut().find(|f| f.id == id)
1262        } else {
1263            self.function_index
1264                .get(&id)
1265                .copied()
1266                .and_then(|idx| self.functions.get_mut(idx))
1267        }
1268    }
1269
1270    /// Find a function by name using the pre-computed index (O(log n)).
1271    ///
1272    /// Falls back to linear scan if the index is empty (e.g., when the
1273    /// module was constructed via struct literal without rebuilding).
1274    #[must_use]
1275    pub fn function_by_name(&self, name: &str) -> Option<&AirFunction> {
1276        if self.name_index.is_empty() {
1277            self.functions.iter().find(|f| f.name == name)
1278        } else {
1279            self.name_index
1280                .get(name)
1281                .and_then(|&idx| self.functions.get(idx))
1282        }
1283    }
1284
1285    /// Find a global by name.
1286    #[must_use]
1287    pub fn global_by_name(&self, name: &str) -> Option<&AirGlobal> {
1288        self.globals.iter().find(|g| g.name == name)
1289    }
1290
1291    /// Look up a type by `TypeId`.
1292    #[must_use]
1293    pub fn get_type(&self, id: TypeId) -> Option<&AirType> {
1294        self.types.get(&id)
1295    }
1296
1297    /// Check if a `TypeId` resolves to a pointer-like type (`Pointer` or `Reference`).
1298    #[must_use]
1299    pub fn is_pointer_type(&self, id: TypeId) -> bool {
1300        matches!(
1301            self.types.get(&id),
1302            Some(AirType::Pointer | AirType::Reference { .. })
1303        )
1304    }
1305
1306    /// Get the type of an instruction's result, if available.
1307    #[must_use]
1308    pub fn instruction_type(&self, inst: &Instruction) -> Option<&AirType> {
1309        inst.result_type.and_then(|id| self.types.get(&id))
1310    }
1311
1312    /// Count all values with pointer type in the module.
1313    ///
1314    /// Uses the type table to determine pointer types. If no type entry exists
1315    /// for a value, it is conservatively not counted as a pointer.
1316    #[must_use]
1317    pub fn pointer_value_count(&self) -> usize {
1318        let mut count = self.globals.len(); // globals are always pointers
1319        for func in &self.functions {
1320            count += func
1321                .params
1322                .iter()
1323                .filter(|p| p.param_type.is_some_and(|id| self.is_pointer_type(id)))
1324                .count();
1325            for block in &func.blocks {
1326                count += block
1327                    .instructions
1328                    .iter()
1329                    .filter(|i| {
1330                        i.dst.is_some() && i.result_type.is_some_and(|id| self.is_pointer_type(id))
1331                    })
1332                    .count();
1333            }
1334        }
1335        count
1336    }
1337}
1338
1339// =============================================================================
1340// Bundle
1341// =============================================================================
1342
1343/// The bundle produced by a frontend's `ingest()` call.
1344///
1345/// Contains the full AIR module plus metadata needed for caching and
1346/// schema discoverability.
1347#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1348pub struct AirBundle {
1349    /// The frontend that produced this bundle (e.g., `"llvm"`, `"air-json"`).
1350    pub frontend_id: String,
1351
1352    /// Schema version for forward/backward compatibility checks.
1353    pub schema_version: String,
1354
1355    /// The AIR module.
1356    pub module: AirModule,
1357}
1358
1359impl AirBundle {
1360    /// Current schema version.
1361    pub const SCHEMA_VERSION: &'static str = "0.1.0";
1362
1363    /// Create a new bundle.
1364    #[must_use]
1365    pub fn new(frontend_id: impl Into<String>, module: AirModule) -> Self {
1366        Self {
1367            frontend_id: frontend_id.into(),
1368            schema_version: Self::SCHEMA_VERSION.to_string(),
1369            module,
1370        }
1371    }
1372}
1373
1374// =============================================================================
1375// Tests
1376// =============================================================================
1377
1378#[cfg(test)]
1379mod tests {
1380    use super::*;
1381
1382    #[test]
1383    fn constant_serialization_roundtrip() {
1384        let constants = vec![
1385            Constant::i32(42),
1386            Constant::i64(-1),
1387            Constant::big_int(i128::MAX, 128),
1388            Constant::float(3.15, 64),
1389            Constant::string("hello"),
1390            Constant::Null,
1391            Constant::Undef,
1392            Constant::ZeroInit,
1393            Constant::Aggregate {
1394                elements: vec![Constant::i32(1), Constant::i32(2)],
1395            },
1396        ];
1397
1398        for constant in constants {
1399            let json = serde_json::to_string(&constant).expect("serialize");
1400            let parsed: Constant = serde_json::from_str(&json).expect("deserialize");
1401            assert_eq!(constant, parsed);
1402        }
1403    }
1404
1405    #[test]
1406    fn operation_serialization_roundtrip() {
1407        let ops = vec![
1408            Operation::Alloca { size_bytes: None },
1409            Operation::Load,
1410            Operation::Store,
1411            Operation::Ret,
1412            Operation::Br {
1413                target: BlockId::new(1),
1414            },
1415            Operation::CondBr {
1416                then_target: BlockId::new(1),
1417                else_target: BlockId::new(2),
1418            },
1419            Operation::CallDirect {
1420                callee: FunctionId::new(42),
1421            },
1422            Operation::BinaryOp {
1423                kind: BinaryOp::Add,
1424            },
1425            Operation::Cast {
1426                kind: CastKind::Bitcast,
1427                target_bits: None,
1428            },
1429            Operation::HeapAlloc {
1430                kind: HeapAllocKind::Malloc,
1431            },
1432            Operation::HeapAlloc {
1433                kind: HeapAllocKind::Other("custom_alloc".to_string()),
1434            },
1435        ];
1436
1437        for op in ops {
1438            let json = serde_json::to_string(&op).expect("serialize");
1439            let parsed: Operation = serde_json::from_str(&json).expect("deserialize");
1440            assert_eq!(op, parsed);
1441        }
1442    }
1443
1444    #[test]
1445    fn heap_alloc_kind_from_str() {
1446        assert_eq!(HeapAllocKind::from("malloc"), HeapAllocKind::Malloc);
1447        assert_eq!(HeapAllocKind::from("new"), HeapAllocKind::New);
1448        assert_eq!(HeapAllocKind::from("operator_new"), HeapAllocKind::New);
1449        assert_eq!(
1450            HeapAllocKind::from("operator_new_array"),
1451            HeapAllocKind::New
1452        );
1453        assert_eq!(HeapAllocKind::from("calloc"), HeapAllocKind::Calloc);
1454        assert_eq!(HeapAllocKind::from("realloc"), HeapAllocKind::Realloc);
1455        assert_eq!(
1456            HeapAllocKind::from("my_allocator"),
1457            HeapAllocKind::Other("my_allocator".to_string())
1458        );
1459    }
1460
1461    #[test]
1462    fn heap_alloc_kind_as_str() {
1463        assert_eq!(HeapAllocKind::Malloc.as_str(), "malloc");
1464        assert_eq!(HeapAllocKind::New.as_str(), "new");
1465        assert_eq!(HeapAllocKind::Calloc.as_str(), "calloc");
1466        assert_eq!(HeapAllocKind::Realloc.as_str(), "realloc");
1467        assert_eq!(
1468            HeapAllocKind::Other("custom".to_string()).as_str(),
1469            "custom"
1470        );
1471    }
1472
1473    #[test]
1474    fn heap_alloc_kind_display() {
1475        assert_eq!(HeapAllocKind::Malloc.to_string(), "malloc");
1476        assert_eq!(
1477            HeapAllocKind::Other("zmalloc".to_string()).to_string(),
1478            "zmalloc"
1479        );
1480    }
1481
1482    #[test]
1483    fn heap_alloc_kind_serialization_roundtrip() {
1484        let kinds = vec![
1485            HeapAllocKind::Malloc,
1486            HeapAllocKind::New,
1487            HeapAllocKind::Calloc,
1488            HeapAllocKind::Realloc,
1489            HeapAllocKind::Other("custom_alloc".to_string()),
1490        ];
1491        for kind in kinds {
1492            let json = serde_json::to_string(&kind).expect("serialize");
1493            let parsed: HeapAllocKind = serde_json::from_str(&json).expect("deserialize");
1494            assert_eq!(kind, parsed);
1495        }
1496
1497        // Verify exact JSON format for known variants (backward-compatible strings).
1498        assert_eq!(
1499            serde_json::to_string(&HeapAllocKind::Malloc).unwrap(),
1500            "\"malloc\""
1501        );
1502        assert_eq!(
1503            serde_json::to_string(&HeapAllocKind::New).unwrap(),
1504            "\"new\""
1505        );
1506    }
1507
1508    #[test]
1509    fn instruction_is_terminator() {
1510        let br = Instruction::new(
1511            InstId::new(1),
1512            Operation::Br {
1513                target: BlockId::new(2),
1514            },
1515        );
1516        assert!(br.is_terminator());
1517
1518        let load = Instruction::new(InstId::new(2), Operation::Load);
1519        assert!(!load.is_terminator());
1520    }
1521
1522    #[test]
1523    fn air_bundle_creation() {
1524        let module = AirModule::new(ModuleId::derive(b"test"));
1525        let bundle = AirBundle::new("test-frontend", module);
1526
1527        assert_eq!(bundle.frontend_id, "test-frontend");
1528        assert_eq!(bundle.schema_version, AirBundle::SCHEMA_VERSION);
1529    }
1530
1531    #[test]
1532    fn air_bundle_serialization_roundtrip() {
1533        let mut module = AirModule::new(ModuleId::derive(b"test"));
1534        module.name = Some("test_module".to_string());
1535
1536        let mut func = AirFunction::new(FunctionId::derive(b"main"), "main");
1537        let mut block = AirBlock::new(BlockId::derive(b"entry"));
1538        block
1539            .instructions
1540            .push(Instruction::new(InstId::derive(b"ret"), Operation::Ret));
1541        func.blocks.push(block);
1542        module.functions.push(func);
1543
1544        let bundle = AirBundle::new("air-json", module);
1545
1546        let json = serde_json::to_string_pretty(&bundle).expect("serialize");
1547        let parsed: AirBundle = serde_json::from_str(&json).expect("deserialize");
1548        assert_eq!(bundle, parsed);
1549    }
1550
1551    #[test]
1552    fn type_hierarchy_serialization_roundtrip() {
1553        let mut module = AirModule::new(ModuleId::derive(b"cha_test"));
1554        module.type_hierarchy.push(TypeHierarchyEntry {
1555            type_name: "Derived".to_string(),
1556            base_types: vec!["Base".to_string()],
1557            virtual_methods: vec![
1558                VirtualMethodSlot {
1559                    index: 0,
1560                    function: Some(FunctionId::derive(b"Derived::process")),
1561                },
1562                VirtualMethodSlot {
1563                    index: 1,
1564                    function: None, // pure virtual
1565                },
1566            ],
1567        });
1568
1569        let bundle = AirBundle::new("test", module);
1570        let json = serde_json::to_string_pretty(&bundle).expect("serialize");
1571        let parsed: AirBundle = serde_json::from_str(&json).expect("deserialize");
1572        assert_eq!(bundle, parsed);
1573    }
1574
1575    #[test]
1576    fn type_hierarchy_empty_is_omitted_in_json() {
1577        let module = AirModule::new(ModuleId::derive(b"empty_cha"));
1578        let bundle = AirBundle::new("test", module);
1579        let json = serde_json::to_string(&bundle).expect("serialize");
1580        assert!(
1581            !json.contains("type_hierarchy"),
1582            "empty type_hierarchy should be omitted"
1583        );
1584    }
1585
1586    #[test]
1587    fn type_hierarchy_deserializes_without_field() {
1588        // JSON from older version without type_hierarchy should still parse
1589        let json = r#"{"frontend_id":"test","schema_version":"0.1.0","module":{"id":"0x00000000000000000000000000000001","functions":[],"globals":[]}}"#;
1590        let bundle: AirBundle = serde_json::from_str(json).expect("deserialize");
1591        assert!(bundle.module.type_hierarchy.is_empty());
1592    }
1593
1594    #[test]
1595    fn air_type_serialization_roundtrip() {
1596        let types = vec![
1597            AirType::Pointer,
1598            AirType::Integer { bits: 32 },
1599            AirType::Float { bits: 64 },
1600            AirType::Void,
1601            AirType::Opaque,
1602            AirType::Array {
1603                element: TypeId::derive(b"integer:32"),
1604                count: Some(10),
1605            },
1606            AirType::Struct {
1607                fields: vec![StructField {
1608                    field_type: TypeId::derive(b"pointer"),
1609                    byte_offset: Some(0),
1610                    byte_size: Some(8),
1611                    name: None,
1612                }],
1613                total_size: 8,
1614            },
1615            AirType::Function {
1616                params: vec![TypeId::derive(b"pointer")],
1617                return_type: TypeId::derive(b"void"),
1618            },
1619        ];
1620
1621        for ty in types {
1622            let json = serde_json::to_string(&ty).expect("serialize");
1623            let parsed: AirType = serde_json::from_str(&json).expect("deserialize");
1624            assert_eq!(ty, parsed);
1625        }
1626    }
1627
1628    #[test]
1629    fn air_type_opaque_is_default_friendly() {
1630        let json = serde_json::to_string(&AirType::Opaque).unwrap();
1631        assert!(json.contains("opaque"));
1632        let parsed: AirType = serde_json::from_str(&json).unwrap();
1633        assert_eq!(parsed, AirType::Opaque);
1634    }
1635
1636    #[test]
1637    fn struct_field_optional_layout() {
1638        let field = StructField {
1639            field_type: TypeId::derive(b"integer:32"),
1640            byte_offset: None,
1641            byte_size: None,
1642            name: None,
1643        };
1644        let json = serde_json::to_string(&field).unwrap();
1645        assert!(!json.contains("byte_offset"));
1646        assert!(!json.contains("byte_size"));
1647        let parsed: StructField = serde_json::from_str(&json).unwrap();
1648        assert_eq!(parsed, field);
1649    }
1650
1651    #[test]
1652    fn struct_field_with_name_roundtrip() {
1653        let field = StructField {
1654            field_type: TypeId::derive(b"integer:32"),
1655            byte_offset: Some(0),
1656            byte_size: Some(4),
1657            name: Some("x".to_string()),
1658        };
1659        let json = serde_json::to_string(&field).unwrap();
1660        assert!(json.contains("\"name\":\"x\""));
1661        let parsed: StructField = serde_json::from_str(&json).unwrap();
1662        assert_eq!(parsed.name, Some("x".to_string()));
1663
1664        // Without name - should be omitted from JSON
1665        let field_no_name = StructField {
1666            field_type: TypeId::derive(b"integer:32"),
1667            byte_offset: None,
1668            byte_size: None,
1669            name: None,
1670        };
1671        let json2 = serde_json::to_string(&field_no_name).unwrap();
1672        assert!(!json2.contains("name"));
1673    }
1674
1675    #[test]
1676    fn module_type_table_roundtrip() {
1677        let mut module = AirModule::new(ModuleId::derive(b"type_test"));
1678        let ptr_type_id = TypeId::derive(b"pointer");
1679        module.types.insert(ptr_type_id, AirType::Pointer);
1680        let i32_type_id = TypeId::derive(b"integer:32");
1681        module
1682            .types
1683            .insert(i32_type_id, AirType::Integer { bits: 32 });
1684
1685        assert!(module.is_pointer_type(ptr_type_id));
1686        assert!(!module.is_pointer_type(i32_type_id));
1687        assert!(module.get_type(ptr_type_id).is_some());
1688
1689        let bundle = AirBundle::new("test", module);
1690        let json = serde_json::to_string_pretty(&bundle).expect("serialize");
1691        let parsed: AirBundle = serde_json::from_str(&json).expect("deserialize");
1692        assert_eq!(parsed.module.types.len(), 2);
1693        assert!(parsed.module.is_pointer_type(ptr_type_id));
1694    }
1695
1696    #[test]
1697    fn module_type_table_empty_omitted_in_json() {
1698        let module = AirModule::new(ModuleId::derive(b"empty_types"));
1699        let bundle = AirBundle::new("test", module);
1700        let json = serde_json::to_string(&bundle).expect("serialize");
1701        assert!(!json.contains("\"types\""), "empty types should be omitted");
1702    }
1703
1704    #[test]
1705    fn instruction_result_type() {
1706        let ptr_type = TypeId::derive(b"pointer");
1707        let mut module = AirModule::new(ModuleId::derive(b"inst_type_test"));
1708        module.types.insert(ptr_type, AirType::Pointer);
1709
1710        let mut inst = Instruction::new(InstId::derive(b"load1"), Operation::Load);
1711        inst.result_type = Some(ptr_type);
1712
1713        assert!(matches!(
1714            module.instruction_type(&inst),
1715            Some(AirType::Pointer)
1716        ));
1717    }
1718
1719    #[test]
1720    fn pointer_value_count_uses_type_table() {
1721        let ptr_type = TypeId::derive(b"pointer");
1722        let i32_type = TypeId::derive(b"integer:32");
1723
1724        let mut module = AirModule::new(ModuleId::derive(b"count_test"));
1725        module.types.insert(ptr_type, AirType::Pointer);
1726        module.types.insert(i32_type, AirType::Integer { bits: 32 });
1727
1728        let mut func = AirFunction::new(FunctionId::derive(b"main"), "main");
1729        let mut p0 = AirParam::new(ValueId::derive(b"p0"), 0);
1730        p0.param_type = Some(ptr_type);
1731        let mut p1 = AirParam::new(ValueId::derive(b"p1"), 1);
1732        p1.param_type = Some(i32_type);
1733        func.params = vec![p0, p1];
1734
1735        let mut block = AirBlock::new(BlockId::derive(b"entry"));
1736        let mut load = Instruction::new(InstId::derive(b"load"), Operation::Load);
1737        load.dst = Some(ValueId::derive(b"v1"));
1738        load.result_type = Some(ptr_type);
1739        let mut add = Instruction::new(
1740            InstId::derive(b"add"),
1741            Operation::BinaryOp {
1742                kind: BinaryOp::Add,
1743            },
1744        );
1745        add.dst = Some(ValueId::derive(b"v2"));
1746        add.result_type = Some(i32_type);
1747        block.instructions.push(load);
1748        block.instructions.push(add);
1749        block
1750            .instructions
1751            .push(Instruction::new(InstId::derive(b"ret"), Operation::Ret));
1752        func.blocks.push(block);
1753        module.functions.push(func);
1754
1755        // 0 globals + 1 pointer param + 1 pointer instruction = 2
1756        assert_eq!(module.pointer_value_count(), 2);
1757    }
1758
1759    #[test]
1760    fn call_indirect_with_signature() {
1761        let sig = TypeId::derive(b"fn(ptr)->void");
1762        let op = Operation::CallIndirect {
1763            expected_signature: Some(sig),
1764        };
1765        let json = serde_json::to_string(&op).expect("serialize");
1766        assert!(json.contains("expected_signature"));
1767        let parsed: Operation = serde_json::from_str(&json).expect("deserialize");
1768        assert_eq!(op, parsed);
1769    }
1770
1771    #[test]
1772    fn call_indirect_no_signature_omitted() {
1773        let op = Operation::CallIndirect {
1774            expected_signature: None,
1775        };
1776        let json = serde_json::to_string(&op).expect("serialize");
1777        assert!(!json.contains("expected_signature"));
1778    }
1779
1780    #[test]
1781    fn instruction_extensions_roundtrip() {
1782        let mut inst = Instruction::new(InstId::new(1), Operation::Ret);
1783        inst.extensions.insert(
1784            "llvm.landingpad".to_string(),
1785            serde_json::json!({"cleanup": true}),
1786        );
1787
1788        let json = serde_json::to_string(&inst).unwrap();
1789        let roundtripped: Instruction = serde_json::from_str(&json).unwrap();
1790        assert_eq!(roundtripped.extensions.len(), 1);
1791        assert_eq!(
1792            roundtripped.extensions["llvm.landingpad"],
1793            serde_json::json!({"cleanup": true})
1794        );
1795
1796        // Empty extensions should not appear in JSON
1797        let inst2 = Instruction::new(InstId::new(2), Operation::Ret);
1798        let json2 = serde_json::to_string(&inst2).unwrap();
1799        assert!(!json2.contains("extensions"));
1800    }
1801
1802    #[test]
1803    fn air_type_reference_serialization_roundtrip() {
1804        let types = vec![
1805            AirType::Reference { nullable: false },
1806            AirType::Reference { nullable: true },
1807        ];
1808        for ty in types {
1809            let json = serde_json::to_string(&ty).expect("serialize");
1810            let parsed: AirType = serde_json::from_str(&json).expect("deserialize");
1811            assert_eq!(ty, parsed);
1812        }
1813    }
1814
1815    #[test]
1816    fn air_type_vector_serialization_roundtrip() {
1817        let ty = AirType::Vector {
1818            element: TypeId::derive(b"float:32"),
1819            lanes: 4,
1820        };
1821        let json = serde_json::to_string(&ty).expect("serialize");
1822        let parsed: AirType = serde_json::from_str(&json).expect("deserialize");
1823        assert_eq!(ty, parsed);
1824    }
1825
1826    #[test]
1827    fn target_pointer_width_default() {
1828        let module = AirModule::new(ModuleId::derive(b"test"));
1829        assert_eq!(module.target_pointer_width, 8);
1830    }
1831
1832    #[test]
1833    fn target_pointer_width_serialization_roundtrip() {
1834        let mut module = AirModule::new(ModuleId::derive(b"test32"));
1835        module.target_pointer_width = 4;
1836        let bundle = AirBundle::new("test", module);
1837        let json = serde_json::to_string(&bundle).expect("serialize");
1838        let parsed: AirBundle = serde_json::from_str(&json).expect("deserialize");
1839        assert_eq!(parsed.module.target_pointer_width, 4);
1840    }
1841
1842    #[test]
1843    fn target_pointer_width_default_when_absent() {
1844        // JSON without `target_pointer_width` should default to 8
1845        let json = r#"{"frontend_id":"test","schema_version":"0.1.0","module":{"id":"0x00000000000000000000000000000001","functions":[]}}"#;
1846        let bundle: AirBundle = serde_json::from_str(json).expect("deserialize");
1847        assert_eq!(bundle.module.target_pointer_width, 8);
1848    }
1849}