SpectralQuant API

veloxquant_mlx.spectral

SpectralQuantizer

from veloxquant_mlx.spectral.spectral_quant import SpectralQuantizer

Eigenvector-rotated quantizer with separate signal and noise codebooks.

Constructor

SpectralQuantizer(
    rotation: SpectralRotation,
    signal_bits: int = 4,
    noise_bits: int = 1,
    use_water_filling: bool = False,
)

Parameter	Type	Default	Description
`rotation`	`SpectralRotation`	Required	Per-layer rotation from calibration
`signal_bits`	`int`	`4`	Bits for high-variance dimensions
`noise_bits`	`int`	`1`	Bits for low-variance dimensions
`use_water_filling`	`bool`	`False`	Use per-dim water-filling allocation

Methods

def encode(self, keys: mx.array) -> EncodedVector: ...
def decode(self, encoded: EncodedVector) -> mx.array: ...

calibrate_spectral_rotation

from veloxquant_mlx.spectral.calibrate import calibrate_spectral_rotation

def calibrate_spectral_rotation(
    model,
    tokenizer,
    num_samples: int = 64,
    sequence_length: int = 1024,
    device: str = "gpu",
) -> list[SpectralRotation]

Collects key activations and computes the PCA rotation matrix per layer via SVD.

Parameters:

Parameter	Type	Default	Description
`model`	mlx_lm model	Required	Loaded model
`tokenizer`	tokenizer	Required	Loaded tokenizer
`num_samples`	`int`	`64`	Calibration sequences
`sequence_length`	`int`	`1024`	Tokens per sequence
`device`	`str`	`"gpu"`	`"gpu"` or `"cpu"`

Returns: list[SpectralRotation] — one per transformer layer.

SpectralRotation fields:

rotation_matrix: mx.array — shape [head_dim, head_dim]
eigenvalues: mx.array — shape [head_dim], sorted descending
head_dim: int
layer_name: str

calibrate_from_vectors

from veloxquant_mlx.spectral.calibrate import calibrate_from_vectors

def calibrate_from_vectors(
    key_vectors: list[mx.array],
) -> list[SpectralRotation]

Compute rotation from pre-collected key vectors instead of running a forward pass. Useful when key activations are already available.

save_rotations / load_cached_rotations

from veloxquant_mlx.spectral.calibrate import save_rotations, load_cached_rotations

def save_rotations(rotations: list[SpectralRotation], path: str) -> None: ...
def load_cached_rotations(path: str) -> list[SpectralRotation]: ...

Persist rotation matrices to disk and reload them. Uses NumPy .npy format.

save_rotations(rotations, "./artifacts/spectral/")
rotations = load_cached_rotations("./artifacts/spectral/")

compute_participation_ratio

from veloxquant_mlx.spectral.participation_ratio import compute_participation_ratio

def compute_participation_ratio(eigenvalues: mx.array) -> float

Measures how many effective dimensions concentrate the variance:

PR = (Σ λᵢ)² / (d · Σ λᵢ²)

Returns a value in [1/d, 1.0]. Close to 1/d means energy concentrated in few dims; close to 1.0 means uniform distribution.

compute_spectral_gap

from veloxquant_mlx.spectral.participation_ratio import compute_spectral_gap

def compute_spectral_gap(eigenvalues: mx.array) -> int

Finds the index of the largest drop in consecutive eigenvalues — the boundary between "signal" and "noise" subspaces.

water_fill_bits

from veloxquant_mlx.spectral.bit_allocator import water_fill_bits

def water_fill_bits(
    eigenvalues: mx.array,
    target_avg_bits: float,
    min_bits: int = 1,
    max_bits: int = 8,
) -> list[int]

Water-filling bit allocation: assigns more bits to dimensions with higher eigenvalues.

Returns: list[int] of length head_dim — bits per dimension.

from veloxquant_mlx.spectral.bit_allocator import water_fill_bits

bits_per_dim = water_fill_bits(
    eigenvalues=rotations[0].eigenvalues,
    target_avg_bits=3.0,
)
print(bits_per_dim[:8])  # e.g. [8, 8, 6, 4, 2, 1, 1, 1]

SpectralQuantizer​

Constructor​

Methods​

calibrate_spectral_rotation​

calibrate_from_vectors​

save_rotations / load_cached_rotations​

compute_participation_ratio​

compute_spectral_gap​

water_fill_bits​

See also​