Source code for chia.vlsi.sram_cacti.cacti_runner

"""Methods to run CACTI 7, and return characterization results."""

import logging
import math
import os
import re
import subprocess
import tempfile
from dataclasses import dataclass, field

# parse_mems_conf / rename_ports moved to chia.chipyard.macrocompiler (they parse
# the Chipyard .top.mems.conf, not CACTI); re-exported here for existing callers.
from chia.chipyard.macrocompiler import (  # noqa: F401
    SRAMSpec,
    parse_mems_conf,
    rename_ports,
)
from chia.base.ChiaFunction import ChiaFunction

logger = logging.getLogger(__name__)



[docs]
@dataclass
class CACTIResult:
    access_time_ns: float
    cycle_time_ns: float
    read_energy_nj: float
    leakage_power_mw: float
    height_mm: float
    width_mm: float
    area_um2: float
    full_out: str



def _block_size_bytes(width_bits: int) -> int:
    """CACTI line size for a width-bit word: the word rounded up to whole bytes.

    CACTI's only block-size constraint is ``block >= ceil(width/8)`` — it does
    NOT require a power of two (verified across every BOOM SRAM geometry). We
    deliberately do NOT pad up to a power-of-two byte count. That padding
    inflated narrow SRAMs (e.g. an 8-bit array characterized 8x too wide), and
    the area-only "correction" that tried to undo it only touched ``area_um2``
    (the Liberty ``area``) — never ``height_mm``/``width_mm``, which build the
    LEF ``SIZE`` that physical-mode Genus actually reports. So the correction
    was dead for the reported area and left Liberty and LEF inconsistent.
    """
    return max(1, math.ceil(width_bits / 8))


def _generate_cacti_cfg(spec: SRAMSpec, technology_um: float = 0.130) -> str:
    """Generate a CACTI 7 config file for the given SRAM spec."""
    block_size_bytes = _block_size_bytes(spec.width)
    size_bytes = spec.depth * block_size_bytes

    return f"""\
-size (bytes) {size_bytes}
-block size (bytes) {block_size_bytes}
-associativity 1
-read-write port {spec.num_rw_ports}
-exclusive read port {spec.num_read_ports}
-exclusive write port {spec.num_write_ports}
-single ended read ports 0
-UCA bank count 1
-technology (u) {technology_um}
-page size (bits) 8192
-burst length 8
-internal prefetch width 8
-Data array cell type - "itrs-hp"
-Data array peripheral type - "itrs-hp"
-Tag array cell type - "itrs-hp"
-Tag array peripheral type - "itrs-hp"
-output/input bus width {spec.width}
-operating temperature (K) 360
-cache type "ram"
-tag size (b) "default"
-access mode (normal, sequential, fast) - "normal"
-design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:0:100
-deviate (delay, dynamic power, leakage power, cycle time, area) 100000:100000:100000:100000:100000
-Optimize ED or ED^2 (ED, ED^2, NONE): "NONE"
-Cache model (NUCA, UCA)  - "UCA"
-NUCA bank count 0
-Wire signaling (fullswing, lowswing, default) - "Global_30"
-Wire inside mat - "semi-global"
-Wire outside mat - "semi-global"
-Interconnect projection - "conservative"
-Core count 8
-Cache level (L2/L3) - "L3"
-Add ECC - "false"
-Print level (DETAILED, CONCISE) - "CONCISE"
-Print input parameters - "false"
-Force cache config - "false"
-Ndwl 1
-Ndbl 1
-Nspd 0
-Ndcm 1
-Ndsam1 0
-Ndsam2 0
-dram_type "DDR3"
-io state "WRITE"
-addr_timing 1.0
-mem_density 4 Gb
-bus_freq 800 MHz
-duty_cycle 1.0
-activity_dq 1.0
-activity_ca 0.5
-num_dq 72
-num_dqs 18
-num_ca 25
-num_clk 2
-num_mem_dq 2
-mem_data_width 8
-rtt_value 10000
-ron_value 34
-tflight_value
-num_bobs 1
-capacity 80
-num_channels_per_bob 1
-first metric "Cost"
-second metric "Bandwidth"
-third metric "Energy"
-DIMM model "ALL"
-mirror_in_bob "F"
"""


_ACCESS_TIME_RE = re.compile(r"Access time \(ns\):\s*([\d.eE+\-]+)")
_CYCLE_TIME_RE = re.compile(r"Cycle time \(ns\):\s*([\d.eE+\-]+)")
_READ_ENERGY_RE = re.compile(r"Total dynamic read energy per access \(nJ\):\s*([\d.eE+\-]+)")
_LEAKAGE_RE = re.compile(r"Total leakage power of a bank\s*\(mW\):\s*([\d.eE+\-]+)")
_DIMENSIONS_RE = re.compile(r"Cache height x width \(mm\):\s*([\d.eE+\-]+)\s*x\s*([\d.eE+\-]+)")



[docs]
@ChiaFunction(resources={"cacti": 1})
def run_cacti(
    spec: SRAMSpec,
    technology_um: float = 0.130,
    cacti_path: str = "cacti",
) -> CACTIResult | None:
    """Run CACTI for a single SRAM spec and return parsed results.

    Returns None if CACTI fails.
    """
    cfg_content = _generate_cacti_cfg(spec, technology_um)

    work_dir = tempfile.mkdtemp(prefix=f"cacti_{spec.name}_")
    cfg_path = os.path.join(work_dir, "sram.cfg")
    with open(cfg_path, "w") as f:
        f.write(cfg_content)

    try:
        result = subprocess.run(
            [cacti_path, "-infile", cfg_path],
            capture_output=True, text=True, timeout=30,
            # dirname is "" for a bare "cacti" on PATH; pass None (cwd unchanged)
            # rather than "" (which subprocess rejects).
            cwd=os.path.dirname(cacti_path) or None,
        )
    except subprocess.TimeoutExpired:
        logger.warning(f"CACTI timed out for {spec.name}")
        return None

    if result.returncode != 0:
        logger.warning(f"CACTI failed for {spec.name} (rc={result.returncode}): "
                       f"{result.stderr[:200]}")
        return None

    stdout = result.stdout

    access_m = _ACCESS_TIME_RE.search(stdout)
    cycle_m = _CYCLE_TIME_RE.search(stdout)
    energy_m = _READ_ENERGY_RE.search(stdout)
    leakage_m = _LEAKAGE_RE.search(stdout)
    dims_m = _DIMENSIONS_RE.search(stdout)

    if not access_m or not dims_m:
        logger.warning(f"Could not parse CACTI output for {spec.name}")
        return None

    height_mm = float(dims_m.group(1))
    width_mm = float(dims_m.group(2))
    # Area comes straight from CACTI's characterized dimensions: block size is
    # now ceil(width/8) (see _block_size_bytes), so there is no power-of-two pad
    # to undo. area_um2, height_mm, and width_mm are mutually consistent, so the
    # Liberty `area` and the LEF `SIZE` agree.
    area_um2 = height_mm * width_mm * 1e6

    return CACTIResult(
        access_time_ns=float(access_m.group(1)),
        cycle_time_ns=float(cycle_m.group(1)) if cycle_m else 0.0,
        read_energy_nj=float(energy_m.group(1)) if energy_m else 0.0,
        leakage_power_mw=float(leakage_m.group(1)) if leakage_m else 0.0,
        height_mm=height_mm,
        width_mm=width_mm,
        area_um2=area_um2,
        full_out=stdout
    )




[docs]
def analytical_area_estimate(spec: SRAMSpec, cell_area_per_bit_um2: float = 1.0) -> CACTIResult:
    """Fallback area estimate when CACTI fails.

    Uses a simple model: area = depth * width * cell_area_per_bit.
    Timing is estimated from simple RC scaling.
    """
    area = spec.depth * spec.width * cell_area_per_bit_um2
    side = math.sqrt(area)
    # Rough timing: 0.5ns base + 0.01ns per row of depth
    access_time = 0.5 + 0.01 * math.log2(max(spec.depth, 2))
    return CACTIResult(
        access_time_ns=access_time,
        cycle_time_ns=access_time * 1.2,
        read_energy_nj=0.001 * spec.depth * spec.width / 8192,
        leakage_power_mw=0.01 * spec.depth * spec.width / 8192,
        height_mm=side / 1000,
        width_mm=side / 1000,
        area_um2=area,
        full_out="This is a fake analytical estimate not actually produced by CACTI"
    )