Advanced Features

FormulaCompiler.jl provides sophisticated capabilities for advanced statistical computing, high-performance applications, and complex analytical workflows. This guide covers memory-efficient scenario analysis, derivative computation, and integration patterns for demanding computational environments.

Counterfactual Analysis

FormulaCompiler provides efficient counterfactual analysis through high-level APIs designed for user applications. For most use cases, you should use one of the recommended approaches below rather than internal implementation details.

Internal vs. Public API

CounterfactualVector types (NumericCounterfactualVector, BoolCounterfactualVector, etc.) and their constructors are internal implementation details NOT exported from FormulaCompiler. They power the high-level APIs but should not be used directly in application code.

Use these high-level APIs instead:

  • Simple contrasts: Direct data modification with merge()
  • Batch contrasts: contrastevaluator() + contrast_modelrow!()
  • Population analysis: Simple loops over rows

For simple one-off counterfactual analysis, modify data directly:

using FormulaCompiler, GLM, DataFrames, Tables

# Fit model
df = DataFrame(
    y = randn(1000),
    x = randn(1000),
    treatment = rand([true, false], 1000)
)
model = lm(@formula(y ~ x * treatment), df)
data = Tables.columntable(df)
compiled = compile_formula(model, data)

# Create counterfactual data scenarios
n_rows = length(data.treatment)
data_treated = merge(data, (treatment = fill(true, n_rows),))
data_control = merge(data, (treatment = fill(false, n_rows),))

# Compare outcomes for same individual under different treatments
row_vec = Vector{Float64}(undef, length(compiled))
β = coef(model)

compiled(row_vec, data_treated, 1)  # Individual 1 if treated
effect_treated = dot(β, row_vec)

compiled(row_vec, data_control, 1)  # Individual 1 if control
effect_control = dot(β, row_vec)

treatment_effect = effect_treated - effect_control

For batch categorical contrasts with zero allocations, use contrastevaluator():

# Create zero-allocation evaluator for categorical contrasts
evaluator = contrastevaluator(compiled, data, [:treatment])
contrast_buf = Vector{Float64}(undef, length(compiled))

# Batch contrasts (zero allocations)
for row in 1:100
    contrast_modelrow!(contrast_buf, evaluator, row, :treatment, false, true)
    # Process contrast_buf...
end

See "Categorical Contrasts" section below for comprehensive examples.

Population Analysis with Loop Patterns

FormulaCompiler uses simple loops for population analysis instead of special infrastructure:

using DataFrames, Tables

df = DataFrame(
    x = randn(1000),
    y = randn(1000),
    treatment = rand(Bool, 1000),
    group = rand(["A", "B", "C"], 1000)
)

data = Tables.columntable(df)
compiled = compile_formula(model, data)

# Population average marginal effects (simple loop)
function compute_population_ame(compiled, data, β, vars)
    de_fd = derivativeevaluator(:fd, compiled, data, vars)
    n_rows = length(first(data))
    ame_sum = zeros(length(vars))
    g_temp = Vector{Float64}(undef, length(vars))

    for row in 1:n_rows
        marginal_effects_eta!(g_temp, de_fd, β, row)  # Zero allocations
        ame_sum .+= g_temp
    end

    return ame_sum ./ n_rows  # Average marginal effects
end

# Treatment effect analysis (loop over scenarios)
function treatment_effects(compiled, data, β, treatment_var)
    n_rows = length(first(data))
    effects = Vector{Float64}(undef, n_rows)
    row_vec = Vector{Float64}(undef, length(compiled))

    for row in 1:n_rows
        # Treatment on
        data_on = merge(data, (treatment = fill(true, n_rows),))
        compiled(row_vec, data_on, row)
        effect_on = dot(β, row_vec)

        # Treatment off
        data_off = merge(data, (treatment = fill(false, n_rows),))
        compiled(row_vec, data_off, row)
        effect_off = dot(β, row_vec)

        effects[row] = effect_on - effect_off
    end

    return effects
end

Example: Treatment Effect Analysis

Compare the same individual under different treatment assignments:

model = lm(@formula(y ~ x * treatment + group), df)
data = Tables.columntable(df)
compiled = compile_formula(model, data)
β = coef(model)

# Option 1: Simple data modification (recommended for 1-10 contrasts)
n_rows = length(first(data))
data_treated = merge(data, (treatment = fill(true, n_rows),))
data_untreated = merge(data, (treatment = fill(false, n_rows),))

row_vec = Vector{Float64}(undef, length(compiled))

# Evaluate individual 1 under both scenarios
compiled(row_vec, data_treated, 1)
effect_treated = dot(β, row_vec)

compiled(row_vec, data_untreated, 1)
effect_untreated = dot(β, row_vec)

individual_treatment_effect = effect_treated - effect_untreated

# Option 2: Zero-allocation batch processing (for 100+ contrasts)
evaluator = contrastevaluator(compiled, data, [:treatment])
contrast_buf = Vector{Float64}(undef, length(compiled))

contrast_modelrow!(contrast_buf, evaluator, 1, :treatment, false, true)
individual_treatment_effect = dot(β, contrast_buf)  # Same result, 0 bytes

Grid Analysis with Loops

Use simple loops for comprehensive parameter analysis:

# Create comprehensive policy analysis using loops
function evaluate_policy_grid(compiled, data, grid_params, row_idx)
    treatment_values = [false, true]
    dose_values = [50.0, 100.0, 150.0]
    region_values = ["North", "South", "East", "West"]

    # Pre-allocate results: 2×3×4 = 24 scenarios
    n_scenarios = length(treatment_values) * length(dose_values) * length(region_values)
    results = Matrix{Float64}(undef, n_scenarios, length(compiled))
    row_vec = Vector{Float64}(undef, length(compiled))

    scenario_idx = 1
    for treatment in treatment_values
        for dose in dose_values
            for region in region_values
                # Create modified data for this scenario
                n_rows = length(first(data))
                scenario_data = merge(data, (
                    treatment = fill(treatment, n_rows),
                    dose = fill(dose, n_rows),
                    region = fill(region, n_rows)
                ))

                # Evaluate scenario
                compiled(row_vec, scenario_data, row_idx)
                results[scenario_idx, :] .= row_vec
                scenario_idx += 1
            end
        end
    end

    return results
end

# Usage
results = evaluate_policy_grid(compiled, data, grid_params, 1)

Multi-Variable Counterfactual Analysis

Analyze multiple counterfactual variables simultaneously:

# Option 1: Simple approach with data modification
n_rows = length(first(data))

# Create scenarios with multiple variables modified
baseline = data
scenario_1 = merge(data, (x = fill(2.0, n_rows), treatment = fill(true, n_rows)))
scenario_2 = merge(data, (x = fill(0.5, n_rows), treatment = fill(false, n_rows)))

# Evaluate same individual under different scenarios
row = 5
row_vec = Vector{Float64}(undef, length(compiled))
β = coef(model)

compiled(row_vec, baseline, row)
effect_baseline = dot(β, row_vec)

compiled(row_vec, scenario_1, row)
effect_scenario_1 = dot(β, row_vec)

compiled(row_vec, scenario_2, row)
effect_scenario_2 = dot(β, row_vec)

println("Scenario effects: ", effect_scenario_1 - effect_baseline, ", ", effect_scenario_2 - effect_baseline)

Advanced Compilation Features

Introspection and Performance

Profile compilation performance:

using BenchmarkTools

# Benchmark compilation time
@benchmark compile_formula($model, $data)

Derivative Computation System

FormulaCompiler provides comprehensive automatic differentiation capabilities for computing Jacobians with dual backend support. For marginal effects, standard errors, and statistical inference, use Margins.jl which builds on these computational primitives.

Note: Examples in this section use functions from Margins.jl (marginal_effects_eta!, marginal_effects_mu!, delta_method_se). Install Margins.jl to use these examples: using Pkg; Pkg.add(url="https://github.com/emfeltham/Margins.jl")

Performance Characteristics

  • Core evaluation: Zero allocations (modelrow!, compiled functions)
  • Both derivative backends: Zero allocations (ForwardDiff and finite differences)
  • AD backend preferred: Higher accuracy (machine precision) and faster performance
  • FD backend alternative: Explicit step size control
  • Validation: Cross-validated between backends for robustness
Backend Selection

FormulaCompiler provides dual backends for derivative computation, both achieving zero allocations. Use :ad (ForwardDiff) as the default for higher accuracy and performance. Use :fd (finite differences) only when you need explicit control over step sizes.

Derivative Evaluator Construction

Build reusable derivative evaluators for efficient computation:

using FormulaCompiler, GLM

# Setup model with mixed variable types
df = DataFrame(
    y = randn(1000),
    price = randn(1000),          # Float64 continuous
    quantity = rand(1:100, 1000), # Int64 continuous (auto-converted)
    available = rand(Bool, 1000), # Bool continuous (true→1.0, false→0.0)
    region = categorical(rand(["North", "South"], 1000))  # Categorical
)

model = lm(@formula(y ~ price * region + log(quantity + 1) + available), df)
data = Tables.columntable(df)
compiled = compile_formula(model, data)

# Boolean variables are treated as continuous (matching StatsModels behavior)
# available: true → 1.0, false → 0.0 in model matrix
# This produces identical results to StatsModels

# Identify continuous variables automatically
continuous_vars = continuous_variables(compiled, data)  # [:price, :quantity]

# Build derivative evaluator (AD backend preferred: zero allocations, higher accuracy)
de_ad = derivativeevaluator(:ad, compiled, data, continuous_vars)
# OR build FD evaluator (alternative: zero allocations, explicit step control)
de_fd = derivativeevaluator(:fd, compiled, data, continuous_vars)

Jacobian Computation

Compute partial derivatives of model matrix rows:

# Method 1: Automatic differentiation (zero allocations, higher accuracy - preferred)
de_ad = derivativeevaluator(:ad, compiled, data, continuous_vars)
J_ad = Matrix{Float64}(undef, length(compiled), length(continuous_vars))
derivative_modelrow!(J_ad, de_ad, 1)

# Method 2: Finite differences (zero allocations, explicit step control - alternative)
de_fd = derivativeevaluator(:fd, compiled, data, continuous_vars)
J_fd = Matrix{Float64}(undef, length(compiled), length(continuous_vars))
derivative_modelrow!(J_fd, de_fd, 1)

# All methods produce equivalent results
@assert isapprox(J_ad, J_fd; rtol=1e-6) "AD and FD should match"

Marginal Effects Computation

Compute effects on linear predictor and response scales:

using Margins  # Provides marginal_effects_eta!, marginal_effects_mu!

β = coef(model)

# Effects on linear predictor η = Xβ
g_eta = Vector{Float64}(undef, length(continuous_vars))

# AD backend: Zero allocations, higher accuracy (preferred)
de_ad = derivativeevaluator(:ad, compiled, data, continuous_vars)
marginal_effects_eta!(g_eta, de_ad, β, 1)

# FD backend: Zero allocations, explicit step control (alternative)
de_fd = derivativeevaluator(:fd, compiled, data, continuous_vars)
marginal_effects_eta!(g_eta, de_fd, β, 1)

# Effects on response scale μ (for GLM models)
if model isa GLM.GeneralizedLinearModel
    link_function = GLM.Link(model)
    g_mu = Vector{Float64}(undef, length(continuous_vars))

    # Using AD evaluator for response scale effects
    marginal_effects_mu!(g_mu, de_ad, β, 1, link_function)

    println("Marginal effects on linear predictor: $g_eta")
    println("Marginal effects on response scale: $g_mu")
end

Categorical Contrasts

FormulaCompiler provides two approaches for computing counterfactual discrete differences: comparing the same observation under different categorical levels, holding all other covariates constant.

Counterfactual vs Cross-Sectional

These methods compute counterfactual effects (same person, different treatment), NOT cross-sectional comparisons (different people with different treatments). This ensures we isolate the treatment effect without confounding from other variables.

Simple Approach: Direct Data Modification

For exploratory analysis or one-off contrasts, modify data directly:

# Create data with different categorical levels
# (keeps all other variables at their original values)
n_rows = length(data.region)
data_north = merge(data, (region = fill("North", n_rows),))
data_south = merge(data, (region = fill("South", n_rows),))

# Evaluate SAME row with different region assignments
row = 1  # Same person/observation
X_north = modelrow(compiled, data_north, row)  # If they were in North
X_south = modelrow(compiled, data_south, row)  # If they were in South
Δ = X_south .- X_north  # Counterfactual difference

# Scalar effect with uncertainty quantification
β = coef(model)
effect = dot(β, Δ)  # Effect of South vs North for person at row 1

# Standard error via delta method
∇β = Δ  # Parameter gradient (for linear scale)
vcov_matrix = vcov(model)
se = sqrt(dot(∇β, vcov_matrix, ∇β))
ci_lower = effect - 1.96 * se
ci_upper = effect + 1.96 * se

println("Effect: $effect ± $se, 95% CI: [$ci_lower, $ci_upper]")

When to use: Quick checks, 1-10 contrasts, exploratory analysis.

Zero-Allocation Approach: ContrastEvaluator

For batch processing or performance-critical code, use the zero-allocation evaluator:

# Create zero-allocation contrast evaluator
evaluator = contrastevaluator(compiled, data, [:region])
contrast_buf = Vector{Float64}(undef, length(compiled))

# Compare categorical levels for specific row
contrast_modelrow!(contrast_buf, evaluator, 1, :region, "North", "South")
contrast_north_south = copy(contrast_buf)  # Save result if needed

# Batch contrasts across multiple rows (zero allocations)
rows_to_analyze = [1, 50, 100, 500]
contrasts = Matrix{Float64}(undef, length(rows_to_analyze), length(compiled))

for (i, row) in enumerate(rows_to_analyze)
    contrast_modelrow!(contrast_buf, evaluator, row, :region, "North", "South")
    contrasts[i, :] .= contrast_buf
end

# Parameter gradients for uncertainty quantification
β = coef(model)
∇β = Vector{Float64}(undef, length(compiled))
contrast_gradient!(∇β, evaluator, 1, :region, "North", "South", β)

# Delta method standard error
vcov_matrix = vcov(model)
se = delta_method_se(evaluator, 1, :region, "North", "South", β, vcov_matrix)

When to use: 100+ contrasts, production pipelines, memory-constrained environments.

How Categorical Contrasts Work

For a detailed explanation of both approaches, including the internal mechanism of ContrastEvaluator and CounterfactualVectors, see How Categorical Contrasts Work.

Advanced Configuration

Optimize derivative computation for specific use cases:

# Variable selection strategies
all_continuous = continuous_variables(compiled, data)
economic_vars = [:price, :quantity]  # Domain-specific subset
interaction_vars = [:price]          # Focus on key interactions

# Backend selection based on requirements
function compute_derivatives_with_backend_choice(compiled, data, vars, β, row, backend=:ad)
    de = derivativeevaluator(backend, compiled, data, vars)
    g = Vector{Float64}(undef, length(vars))
    marginal_effects_eta!(g, de, β, row)
    return g
end

Mixed Models (Fixed Effects)

Derivatives target the fixed-effects design (random effects are intentionally excluded):

using MixedModels

df = DataFrame(y = randn(500), x = randn(500), z = abs.(randn(500)) .+ 0.1,
               group = categorical(rand(1:20, 500)))
mm = fit(MixedModel, @formula(y ~ 1 + x + z + (1|group)), df; progress=false)

data = Tables.columntable(df)
compiled = compile_formula(mm, data)  # fixed-effects only
vars = [:x, :z]
de_fd = derivativeevaluator(:fd, compiled, data, vars)

J = Matrix{Float64}(undef, length(compiled), length(vars))
derivative_modelrow!(J, de_fd, 1)

Architecture and Optimization

The derivative system achieves near-zero allocations through:

  • Preallocated buffers: Jacobian matrices, gradient vectors, and temporary arrays stored in derivativeevaluator
  • Typed closures: Compile-time specialization eliminates runtime dispatch
  • Prebuilt data structures: Override vectors and merged data reused across calls
  • Optimized memory layout: All allocations front-loaded during evaluator construction

Performance Benchmarking

using BenchmarkTools

# Build evaluators once (one-time cost)
de_ad = derivativeevaluator(:ad, compiled, data, [:x, :z])  # Zero allocations, higher accuracy (preferred)
de_fd = derivativeevaluator(:fd, compiled, data, [:x, :z])  # Zero allocations, explicit step control (alternative)
J = Matrix{Float64}(undef, length(compiled), length(de_ad.vars))

# Benchmark derivatives
@benchmark derivative_modelrow!($J, $de_ad, 25)  # AD (faster)
@benchmark derivative_modelrow!($J, $de_fd, 25)  # FD

# Benchmark marginal effects
β = coef(model)
g = Vector{Float64}(undef, length(de_ad.vars))
@benchmark marginal_effects_eta!($g, $de_ad, $β, 25)  # AD (faster, more accurate)
@benchmark marginal_effects_eta!($g, $de_fd, $β, 25)  # FD

Complex Formula Support

Nested Functions

FormulaCompiler.jl handles complex nested functions:

@formula(y ~ log(sqrt(abs(x))) + exp(sin(z)) * group)

Boolean Logic

Sophisticated boolean expressions:

@formula(y ~ (x > 0) * (z < mean(z)) + (group == "A") * log(w))

Custom Functions

Define custom functions for use in formulas:

# Define custom function
custom_transform(x) = x > 0 ? log(1 + x) : -log(1 - x)

# Use in formula (requires function to be defined in scope)
@formula(y ~ custom_transform(x) + group)

Categorical Mixtures

FormulaCompiler.jl supports categorical mixtures for marginal effects computation:

# Create data with weighted categorical specifications
df = DataFrame(
    x = [1.0, 2.0, 3.0],
    group = [mix("A" => 0.3, "B" => 0.7),   # 30% A, 70% B
             mix("A" => 0.3, "B" => 0.7),
             mix("A" => 0.3, "B" => 0.7)]
)

# Compile and evaluate with zero allocations
compiled = compile_formula(model, Tables.columntable(df))
compiled(output, Tables.columntable(df), 1)  # Zero allocations; time varies by hardware

For comprehensive coverage of categorical mixtures including validation, helper functions, and marginal effects integration, see the Categorical Mixtures guide.

High-Performance Computing Patterns

Zero-Allocation Computational Loops

Design computational patterns that maintain zero-allocation performance:

function monte_carlo_predictions(model, data, n_simulations=10000)
    # Pre-compile and pre-allocate all necessary buffers
    compiled = compile_formula(model, data)
    row_vec = Vector{Float64}(undef, length(compiled))
    β = coef(model)
    
    # Pre-allocate result storage
    predictions = Vector{Float64}(undef, n_simulations)
    data_size = length(first(data))
    
    # Zero-allocation simulation loop
    for sim in 1:n_simulations
        # Random row selection
        row_idx = rand(1:data_size)
        
        # Zero-allocation model matrix evaluation
        compiled(row_vec, data, row_idx)
        
        # Zero-allocation prediction computation
        predictions[sim] = dot(β, row_vec)
    end
    
    return predictions
end

# Usage with performance validation
predictions = monte_carlo_predictions(model, data, 100000)

Advanced Memory Management

Optimize memory usage for large-scale applications:

function memory_efficient_batch_processing(model, large_dataset, batch_size=1000)
    compiled = compile_formula(model, large_dataset)
    n_total = length(first(large_dataset))
    n_batches = cld(n_total, batch_size)
    
    # Pre-allocate reusable buffers
    row_vec = Vector{Float64}(undef, length(compiled))
    batch_results = Matrix{Float64}(undef, batch_size, length(compiled))
    
    all_results = Vector{Matrix{Float64}}()
    
    for batch in 1:n_batches
        start_idx = (batch - 1) * batch_size + 1
        end_idx = min(batch * batch_size, n_total)
        actual_batch_size = end_idx - start_idx + 1
        
        # Resize for last batch if needed
        if actual_batch_size != batch_size
            batch_results = Matrix{Float64}(undef, actual_batch_size, length(compiled))
        end
        
        # Zero-allocation batch evaluation
        for (local_idx, global_idx) in enumerate(start_idx:end_idx)
            compiled(view(batch_results, local_idx, :), large_dataset, global_idx)
        end
        
        # Store results (could write to disk here for very large datasets)
        push!(all_results, copy(batch_results))
    end
    
    return all_results
end

Batch Processing Large Datasets

function process_large_dataset(model, data, batch_size=1000)
    compiled = compile_formula(model, data)
    n_rows = Tables.rowcount(data)
    n_cols = length(compiled)
    
    results = Vector{Matrix{Float64}}()
    
    for start_idx in 1:batch_size:n_rows
        end_idx = min(start_idx + batch_size - 1, n_rows)
        batch_size_actual = end_idx - start_idx + 1

        batch_result = Matrix{Float64}(undef, batch_size_actual, n_cols)

        # Evaluate each row in the batch
        for (batch_idx, data_idx) in enumerate(start_idx:end_idx)
            compiled(view(batch_result, batch_idx, :), data, data_idx)
        end

        push!(results, batch_result)
    end
    
    return results
end

Parallel Processing

Combine with Julia's parallel processing capabilities:

using Distributed

@everywhere using FormulaCompiler, DataFrames, Tables

function parallel_evaluation(model, data, row_indices)
    compiled = compile_formula(model, data)
    
    results = @distributed (vcat) for row_idx in row_indices
        row_vec = Vector{Float64}(undef, length(compiled))
        compiled(row_vec, data, row_idx)
        row_vec'  # Return as row matrix
    end
    
    return results
end

Integration with Optimization

Gradient-Based Optimization

FormulaCompiler.jl works well with automatic differentiation:

using ForwardDiff

function objective_function(params, compiled_formula, data, target)
    # Update data with new parameters
    modified_data = (; data..., x = params[1], z = params[2])
    
    row_vec = Vector{Float64}(undef, length(compiled_formula))
    compiled_formula(row_vec, modified_data, 1)
    
    # Compute loss
    return sum((row_vec .- target).^2)
end

# Use with ForwardDiff for gradients
compiled = compile_formula(model, data)
target = [1.0, 2.0, 3.0, 4.0]  # Target model matrix row

gradient = ForwardDiff.gradient(
    params -> objective_function(params, compiled, data, target),
    [0.0, 1.0]  # Initial parameters
)

Bayesian Analysis Integration

Efficient model evaluation in MCMC samplers:

using MCMCChains

function log_likelihood(params, compiled_formula, data, y_observed)
    # Extract parameters
    β = params[1:length(compiled_formula)]
    σ = exp(params[end])  # Log-scale for positivity
    
    n_obs = length(y_observed)
    row_vec = Vector{Float64}(undef, length(compiled_formula))
    
    ll = 0.0
    for i in 1:n_obs
        compiled_formula(row_vec, data, i)
        μ = dot(β, row_vec)
        ll += logpdf(Normal(μ, σ), y_observed[i])
    end
    
    return ll
end

# Use in MCMC sampler (pseudocode)
# sampler = MCMCSampler(log_likelihood, compiled, data, y)

Memory and Performance Monitoring

Allocation Tracking

Monitor allocation performance:

using BenchmarkTools

function check_allocations(compiled, data, n_tests=1000)
    row_vec = Vector{Float64}(undef, length(compiled))
    
    # Warm up
    compiled(row_vec, data, 1)
    
    # Benchmark
    result = @benchmark begin
        for i in 1:$n_tests
            $compiled($row_vec, $data, i % nrow($data) + 1)
        end
    end
    
    return result
end

# Should show 0 allocations
benchmark_result = check_allocations(compiled, data)

Memory Usage Analysis

using Profile

function profile_memory_usage(model, data, n_evaluations=10000)
    compiled = compile_formula(model, data)
    row_vec = Vector{Float64}(undef, length(compiled))
    
    # Profile memory
    Profile.clear_malloc_data()
    
    for i in 1:n_evaluations
        compiled(row_vec, data, i % length(first(data)) + 1)
    end
    
    # Analyze results
    # (Use ProfileView.jl or similar for visualization)
end

Real-World Application Patterns

Economic Policy Analysis

Combine scenario analysis with derivative computation for comprehensive policy evaluation:

function policy_impact_analysis(baseline_model, policy_data, policy_parameters)
    # Compile baseline model
    compiled = compile_formula(baseline_model, policy_data)
    β = coef(baseline_model)
    
    # Identify continuous policy levers
    policy_vars = intersect(keys(policy_parameters), continuous_variables(compiled, policy_data))
    de_fd = derivativeevaluator(:fd, compiled, policy_data, collect(policy_vars))
    
    # Create policy scenarios using data modification
    scenarios = [
        ("status_quo", policy_data),
        ("moderate_policy", merge(policy_data, policy_parameters)),
        ("aggressive_policy", merge(policy_data,
                       [k => fill(v * 1.5, length(first(policy_data))) for (k, v) in policy_parameters]))
    ]
    
    # Evaluate policy impacts
    n_individuals = min(1000, length(first(policy_data)))  # Sample for analysis
    
    results = Dict()
    for (scenario_name, scenario_data) in scenarios
        scenario_predictions = Vector{Float64}(undef, n_individuals)
        scenario_marginals = Matrix{Float64}(undef, n_individuals, length(policy_vars))

        # Evaluate predictions and marginal effects for each individual
        row_vec = Vector{Float64}(undef, length(compiled))
        marginal_vec = Vector{Float64}(undef, length(policy_vars))

        for i in 1:n_individuals
            # Prediction
            compiled(row_vec, scenario_data, i)
            scenario_predictions[i] = dot(β, row_vec)

            # Marginal effects
            marginal_effects_eta!(marginal_vec, de_fd, β, i)  # Zero allocations
            scenario_marginals[i, :] .= marginal_vec
        end

        results[scenario_name] = (
            predictions = scenario_predictions,
            marginal_effects = scenario_marginals,
            mean_prediction = mean(scenario_predictions),
            policy_sensitivity = mean(scenario_marginals, dims=1)
        )
    end
    
    return results
end

# Example usage
policy_params = Dict(:minimum_wage => 15.0, :tax_rate => 0.25)
analysis_results = policy_impact_analysis(economic_model, economic_data, policy_params)

# Compare scenarios
status_quo_mean = analysis_results["status_quo"].mean_prediction
moderate_mean = analysis_results["moderate_policy"].mean_prediction
policy_effect = moderate_mean - status_quo_mean

println("Policy effect: $(round(policy_effect, digits=3))")

Biostatistical Applications

High-throughput analysis for medical and biological research:

function biomarker_analysis(survival_model, patient_data, biomarker_ranges)
    compiled = compile_formula(survival_model, patient_data)
    β = coef(survival_model)
    
    # Identify biomarker variables for sensitivity analysis
    biomarker_vars = Symbol.(keys(biomarker_ranges))
    continuous_biomarkers = intersect(biomarker_vars, continuous_variables(compiled, patient_data))

    if !isempty(continuous_biomarkers)
        de_fd = derivativeevaluator(:fd, compiled, patient_data, continuous_biomarkers)
    end
    
    # Create biomarker scenarios using loops
    biomarker_combinations = Iterators.product(values(biomarker_ranges)...)
    biomarker_scenarios = [("scenario_$i", merge(patient_data,
        Dict(zip(keys(biomarker_ranges), [fill(val, length(first(patient_data))) for val in combo])))
    ) for (i, combo) in enumerate(biomarker_combinations)]
    
    # Patient risk stratification
    n_patients = length(first(patient_data))
    risk_matrix = Matrix{Float64}(undef, length(biomarker_scenarios), n_patients)
    
    # Pre-allocate evaluation buffers
    row_vec = Vector{Float64}(undef, length(compiled))
    
    # Evaluate all scenario-patient combinations
    for (scenario_idx, (scenario_name, scenario_data)) in enumerate(biomarker_scenarios)
        for patient_idx in 1:n_patients
            compiled(row_vec, scenario_data, patient_idx)
            
            # Compute risk score (example: linear predictor)
            risk_score = dot(β, row_vec)
            risk_matrix[scenario_idx, patient_idx] = risk_score
        end
    end
    
    # Compute marginal effects for sensitivity analysis
    if !isempty(continuous_biomarkers)
        marginal_matrix = Matrix{Float64}(undef, n_patients, length(continuous_biomarkers))
        marginal_vec = Vector{Float64}(undef, length(continuous_biomarkers))
        
        for patient_idx in 1:n_patients
            marginal_effects_eta!(marginal_vec, de_fd, β, patient_idx)
            marginal_matrix[patient_idx, :] .= marginal_vec
        end
        
        return (
            risk_scores = risk_matrix,
            marginal_effects = marginal_matrix,
            scenarios = [name for (name, _) in biomarker_scenarios],
            biomarker_vars = continuous_biomarkers
        )
    else
        return (
            risk_scores = risk_matrix,
            scenarios = biomarker_scenarios
        )
    end
end

# Example usage
biomarker_ranges = Dict(
    :tumor_size => [1.0, 2.0, 3.0, 4.0],     # cm
    :psa_level => [4.0, 10.0, 20.0],         # ng/mL
    :age => [50, 65, 80]                     # years
)

bio_results = biomarker_analysis(oncology_model, patient_data, biomarker_ranges)

Financial Risk Modeling

Scenario analysis for financial applications:

function portfolio_risk_analysis(risk_model, market_data, stress_scenarios)
    compiled = compile_formula(risk_model, market_data)
    β = coef(risk_model)
    
    # Risk factor sensitivity
    risk_factors = continuous_variables(compiled, market_data)
    if !isempty(risk_factors)
        de_fd = derivativeevaluator(:fd, compiled, market_data, risk_factors)
    end
    
    # Create market stress scenarios using data modification
    stress_scenario_objects = [
        (name, merge(market_data,
            Dict(k => fill(v, length(first(market_data))) for (k, v) in parameters)))
        for (name, parameters) in stress_scenarios
    ]
    
    # Portfolio evaluation across scenarios
    n_assets = length(first(market_data))
    scenario_valuations = Dict{String, Vector{Float64}}()
    
    row_vec = Vector{Float64}(undef, length(compiled))
    
    for (scenario_name, scenario_data) in stress_scenario_objects
        asset_valuations = Vector{Float64}(undef, n_assets)

        for asset_idx in 1:n_assets
            compiled(row_vec, scenario_data, asset_idx)
            # Risk-adjusted valuation
            asset_valuations[asset_idx] = dot(β, row_vec)
        end

        scenario_valuations[scenario_name] = asset_valuations
    end
    
    # Risk sensitivity analysis
    if !isempty(risk_factors)
        sensitivity_matrix = Matrix{Float64}(undef, n_assets, length(risk_factors))
        sensitivity_vec = Vector{Float64}(undef, length(risk_factors))
        
        for asset_idx in 1:n_assets
            marginal_effects_eta!(sensitivity_vec, de_fd, β, asset_idx)
            sensitivity_matrix[asset_idx, :] .= sensitivity_vec
        end
        
        return (
            scenario_valuations = scenario_valuations,
            risk_sensitivities = sensitivity_matrix,
            risk_factors = risk_factors
        )
    else
        return (scenario_valuations = scenario_valuations,)
    end
end

# Example usage
stress_scenarios = [
    ("market_crash", Dict(:market_volatility => 0.4, :interest_rate => 0.02)),
    ("inflation_shock", Dict(:inflation_rate => 0.08, :commodity_index => 1.5)),
    ("recession", Dict(:gdp_growth => -0.03, :unemployment => 0.12))
]

risk_analysis = portfolio_risk_analysis(financial_model, market_data, stress_scenarios)

# Analyze results
baseline_value = sum(risk_analysis.scenario_valuations["baseline"])
crash_value = sum(risk_analysis.scenario_valuations["market_crash"])
portfolio_risk = (crash_value - baseline_value) / baseline_value

println("Portfolio stress loss: $(round(portfolio_risk * 100, digits=2))%")

Further Reading