Advanced Features
FormulaCompiler.jl provides sophisticated capabilities for advanced statistical computing, high-performance applications, and complex analytical workflows. This guide covers memory-efficient scenario analysis, derivative computation, and integration patterns for demanding computational environments.
Counterfactual Analysis
FormulaCompiler provides efficient counterfactual analysis through high-level APIs designed for user applications. For most use cases, you should use one of the recommended approaches below rather than internal implementation details.
CounterfactualVector types (NumericCounterfactualVector, BoolCounterfactualVector, etc.) and their constructors are internal implementation details NOT exported from FormulaCompiler. They power the high-level APIs but should not be used directly in application code.
Use these high-level APIs instead:
- Simple contrasts: Direct data modification with
merge() - Batch contrasts:
contrastevaluator()+contrast_modelrow!() - Population analysis: Simple loops over rows
Recommended Approach #1: Direct Data Modification
For simple one-off counterfactual analysis, modify data directly:
using FormulaCompiler, GLM, DataFrames, Tables
# Fit model
df = DataFrame(
y = randn(1000),
x = randn(1000),
treatment = rand([true, false], 1000)
)
model = lm(@formula(y ~ x * treatment), df)
data = Tables.columntable(df)
compiled = compile_formula(model, data)
# Create counterfactual data scenarios
n_rows = length(data.treatment)
data_treated = merge(data, (treatment = fill(true, n_rows),))
data_control = merge(data, (treatment = fill(false, n_rows),))
# Compare outcomes for same individual under different treatments
row_vec = Vector{Float64}(undef, length(compiled))
β = coef(model)
compiled(row_vec, data_treated, 1) # Individual 1 if treated
effect_treated = dot(β, row_vec)
compiled(row_vec, data_control, 1) # Individual 1 if control
effect_control = dot(β, row_vec)
treatment_effect = effect_treated - effect_controlRecommended Approach #2: ContrastEvaluator (Batch Processing)
For batch categorical contrasts with zero allocations, use contrastevaluator():
# Create zero-allocation evaluator for categorical contrasts
evaluator = contrastevaluator(compiled, data, [:treatment])
contrast_buf = Vector{Float64}(undef, length(compiled))
# Batch contrasts (zero allocations)
for row in 1:100
contrast_modelrow!(contrast_buf, evaluator, row, :treatment, false, true)
# Process contrast_buf...
endSee "Categorical Contrasts" section below for comprehensive examples.
Population Analysis with Loop Patterns
FormulaCompiler uses simple loops for population analysis instead of special infrastructure:
using DataFrames, Tables
df = DataFrame(
x = randn(1000),
y = randn(1000),
treatment = rand(Bool, 1000),
group = rand(["A", "B", "C"], 1000)
)
data = Tables.columntable(df)
compiled = compile_formula(model, data)
# Population average marginal effects (simple loop)
function compute_population_ame(compiled, data, β, vars)
de_fd = derivativeevaluator(:fd, compiled, data, vars)
n_rows = length(first(data))
ame_sum = zeros(length(vars))
g_temp = Vector{Float64}(undef, length(vars))
for row in 1:n_rows
marginal_effects_eta!(g_temp, de_fd, β, row) # Zero allocations
ame_sum .+= g_temp
end
return ame_sum ./ n_rows # Average marginal effects
end
# Treatment effect analysis (loop over scenarios)
function treatment_effects(compiled, data, β, treatment_var)
n_rows = length(first(data))
effects = Vector{Float64}(undef, n_rows)
row_vec = Vector{Float64}(undef, length(compiled))
for row in 1:n_rows
# Treatment on
data_on = merge(data, (treatment = fill(true, n_rows),))
compiled(row_vec, data_on, row)
effect_on = dot(β, row_vec)
# Treatment off
data_off = merge(data, (treatment = fill(false, n_rows),))
compiled(row_vec, data_off, row)
effect_off = dot(β, row_vec)
effects[row] = effect_on - effect_off
end
return effects
endExample: Treatment Effect Analysis
Compare the same individual under different treatment assignments:
model = lm(@formula(y ~ x * treatment + group), df)
data = Tables.columntable(df)
compiled = compile_formula(model, data)
β = coef(model)
# Option 1: Simple data modification (recommended for 1-10 contrasts)
n_rows = length(first(data))
data_treated = merge(data, (treatment = fill(true, n_rows),))
data_untreated = merge(data, (treatment = fill(false, n_rows),))
row_vec = Vector{Float64}(undef, length(compiled))
# Evaluate individual 1 under both scenarios
compiled(row_vec, data_treated, 1)
effect_treated = dot(β, row_vec)
compiled(row_vec, data_untreated, 1)
effect_untreated = dot(β, row_vec)
individual_treatment_effect = effect_treated - effect_untreated
# Option 2: Zero-allocation batch processing (for 100+ contrasts)
evaluator = contrastevaluator(compiled, data, [:treatment])
contrast_buf = Vector{Float64}(undef, length(compiled))
contrast_modelrow!(contrast_buf, evaluator, 1, :treatment, false, true)
individual_treatment_effect = dot(β, contrast_buf) # Same result, 0 bytesGrid Analysis with Loops
Use simple loops for comprehensive parameter analysis:
# Create comprehensive policy analysis using loops
function evaluate_policy_grid(compiled, data, grid_params, row_idx)
treatment_values = [false, true]
dose_values = [50.0, 100.0, 150.0]
region_values = ["North", "South", "East", "West"]
# Pre-allocate results: 2×3×4 = 24 scenarios
n_scenarios = length(treatment_values) * length(dose_values) * length(region_values)
results = Matrix{Float64}(undef, n_scenarios, length(compiled))
row_vec = Vector{Float64}(undef, length(compiled))
scenario_idx = 1
for treatment in treatment_values
for dose in dose_values
for region in region_values
# Create modified data for this scenario
n_rows = length(first(data))
scenario_data = merge(data, (
treatment = fill(treatment, n_rows),
dose = fill(dose, n_rows),
region = fill(region, n_rows)
))
# Evaluate scenario
compiled(row_vec, scenario_data, row_idx)
results[scenario_idx, :] .= row_vec
scenario_idx += 1
end
end
end
return results
end
# Usage
results = evaluate_policy_grid(compiled, data, grid_params, 1)Multi-Variable Counterfactual Analysis
Analyze multiple counterfactual variables simultaneously:
# Option 1: Simple approach with data modification
n_rows = length(first(data))
# Create scenarios with multiple variables modified
baseline = data
scenario_1 = merge(data, (x = fill(2.0, n_rows), treatment = fill(true, n_rows)))
scenario_2 = merge(data, (x = fill(0.5, n_rows), treatment = fill(false, n_rows)))
# Evaluate same individual under different scenarios
row = 5
row_vec = Vector{Float64}(undef, length(compiled))
β = coef(model)
compiled(row_vec, baseline, row)
effect_baseline = dot(β, row_vec)
compiled(row_vec, scenario_1, row)
effect_scenario_1 = dot(β, row_vec)
compiled(row_vec, scenario_2, row)
effect_scenario_2 = dot(β, row_vec)
println("Scenario effects: ", effect_scenario_1 - effect_baseline, ", ", effect_scenario_2 - effect_baseline)Advanced Compilation Features
Introspection and Performance
Profile compilation performance:
using BenchmarkTools
# Benchmark compilation time
@benchmark compile_formula($model, $data)Derivative Computation System
FormulaCompiler provides comprehensive automatic differentiation capabilities for computing Jacobians with dual backend support. For marginal effects, standard errors, and statistical inference, use Margins.jl which builds on these computational primitives.
Note: Examples in this section use functions from Margins.jl (
marginal_effects_eta!,marginal_effects_mu!,delta_method_se). Install Margins.jl to use these examples:using Pkg; Pkg.add(url="https://github.com/emfeltham/Margins.jl")
Performance Characteristics
- Core evaluation: Zero allocations (modelrow!, compiled functions)
- Both derivative backends: Zero allocations (ForwardDiff and finite differences)
- AD backend preferred: Higher accuracy (machine precision) and faster performance
- FD backend alternative: Explicit step size control
- Validation: Cross-validated between backends for robustness
FormulaCompiler provides dual backends for derivative computation, both achieving zero allocations. Use :ad (ForwardDiff) as the default for higher accuracy and performance. Use :fd (finite differences) only when you need explicit control over step sizes.
Derivative Evaluator Construction
Build reusable derivative evaluators for efficient computation:
using FormulaCompiler, GLM
# Setup model with mixed variable types
df = DataFrame(
y = randn(1000),
price = randn(1000), # Float64 continuous
quantity = rand(1:100, 1000), # Int64 continuous (auto-converted)
available = rand(Bool, 1000), # Bool continuous (true→1.0, false→0.0)
region = categorical(rand(["North", "South"], 1000)) # Categorical
)
model = lm(@formula(y ~ price * region + log(quantity + 1) + available), df)
data = Tables.columntable(df)
compiled = compile_formula(model, data)
# Boolean variables are treated as continuous (matching StatsModels behavior)
# available: true → 1.0, false → 0.0 in model matrix
# This produces identical results to StatsModels
# Identify continuous variables automatically
continuous_vars = continuous_variables(compiled, data) # [:price, :quantity]
# Build derivative evaluator (AD backend preferred: zero allocations, higher accuracy)
de_ad = derivativeevaluator(:ad, compiled, data, continuous_vars)
# OR build FD evaluator (alternative: zero allocations, explicit step control)
de_fd = derivativeevaluator(:fd, compiled, data, continuous_vars)Jacobian Computation
Compute partial derivatives of model matrix rows:
# Method 1: Automatic differentiation (zero allocations, higher accuracy - preferred)
de_ad = derivativeevaluator(:ad, compiled, data, continuous_vars)
J_ad = Matrix{Float64}(undef, length(compiled), length(continuous_vars))
derivative_modelrow!(J_ad, de_ad, 1)
# Method 2: Finite differences (zero allocations, explicit step control - alternative)
de_fd = derivativeevaluator(:fd, compiled, data, continuous_vars)
J_fd = Matrix{Float64}(undef, length(compiled), length(continuous_vars))
derivative_modelrow!(J_fd, de_fd, 1)
# All methods produce equivalent results
@assert isapprox(J_ad, J_fd; rtol=1e-6) "AD and FD should match"Marginal Effects Computation
Compute effects on linear predictor and response scales:
using Margins # Provides marginal_effects_eta!, marginal_effects_mu!
β = coef(model)
# Effects on linear predictor η = Xβ
g_eta = Vector{Float64}(undef, length(continuous_vars))
# AD backend: Zero allocations, higher accuracy (preferred)
de_ad = derivativeevaluator(:ad, compiled, data, continuous_vars)
marginal_effects_eta!(g_eta, de_ad, β, 1)
# FD backend: Zero allocations, explicit step control (alternative)
de_fd = derivativeevaluator(:fd, compiled, data, continuous_vars)
marginal_effects_eta!(g_eta, de_fd, β, 1)
# Effects on response scale μ (for GLM models)
if model isa GLM.GeneralizedLinearModel
link_function = GLM.Link(model)
g_mu = Vector{Float64}(undef, length(continuous_vars))
# Using AD evaluator for response scale effects
marginal_effects_mu!(g_mu, de_ad, β, 1, link_function)
println("Marginal effects on linear predictor: $g_eta")
println("Marginal effects on response scale: $g_mu")
endCategorical Contrasts
FormulaCompiler provides two approaches for computing counterfactual discrete differences: comparing the same observation under different categorical levels, holding all other covariates constant.
These methods compute counterfactual effects (same person, different treatment), NOT cross-sectional comparisons (different people with different treatments). This ensures we isolate the treatment effect without confounding from other variables.
Simple Approach: Direct Data Modification
For exploratory analysis or one-off contrasts, modify data directly:
# Create data with different categorical levels
# (keeps all other variables at their original values)
n_rows = length(data.region)
data_north = merge(data, (region = fill("North", n_rows),))
data_south = merge(data, (region = fill("South", n_rows),))
# Evaluate SAME row with different region assignments
row = 1 # Same person/observation
X_north = modelrow(compiled, data_north, row) # If they were in North
X_south = modelrow(compiled, data_south, row) # If they were in South
Δ = X_south .- X_north # Counterfactual difference
# Scalar effect with uncertainty quantification
β = coef(model)
effect = dot(β, Δ) # Effect of South vs North for person at row 1
# Standard error via delta method
∇β = Δ # Parameter gradient (for linear scale)
vcov_matrix = vcov(model)
se = sqrt(dot(∇β, vcov_matrix, ∇β))
ci_lower = effect - 1.96 * se
ci_upper = effect + 1.96 * se
println("Effect: $effect ± $se, 95% CI: [$ci_lower, $ci_upper]")When to use: Quick checks, 1-10 contrasts, exploratory analysis.
Zero-Allocation Approach: ContrastEvaluator
For batch processing or performance-critical code, use the zero-allocation evaluator:
# Create zero-allocation contrast evaluator
evaluator = contrastevaluator(compiled, data, [:region])
contrast_buf = Vector{Float64}(undef, length(compiled))
# Compare categorical levels for specific row
contrast_modelrow!(contrast_buf, evaluator, 1, :region, "North", "South")
contrast_north_south = copy(contrast_buf) # Save result if needed
# Batch contrasts across multiple rows (zero allocations)
rows_to_analyze = [1, 50, 100, 500]
contrasts = Matrix{Float64}(undef, length(rows_to_analyze), length(compiled))
for (i, row) in enumerate(rows_to_analyze)
contrast_modelrow!(contrast_buf, evaluator, row, :region, "North", "South")
contrasts[i, :] .= contrast_buf
end
# Parameter gradients for uncertainty quantification
β = coef(model)
∇β = Vector{Float64}(undef, length(compiled))
contrast_gradient!(∇β, evaluator, 1, :region, "North", "South", β)
# Delta method standard error
vcov_matrix = vcov(model)
se = delta_method_se(evaluator, 1, :region, "North", "South", β, vcov_matrix)When to use: 100+ contrasts, production pipelines, memory-constrained environments.
For a detailed explanation of both approaches, including the internal mechanism of ContrastEvaluator and CounterfactualVectors, see How Categorical Contrasts Work.
Advanced Configuration
Optimize derivative computation for specific use cases:
# Variable selection strategies
all_continuous = continuous_variables(compiled, data)
economic_vars = [:price, :quantity] # Domain-specific subset
interaction_vars = [:price] # Focus on key interactions
# Backend selection based on requirements
function compute_derivatives_with_backend_choice(compiled, data, vars, β, row, backend=:ad)
de = derivativeevaluator(backend, compiled, data, vars)
g = Vector{Float64}(undef, length(vars))
marginal_effects_eta!(g, de, β, row)
return g
endMixed Models (Fixed Effects)
Derivatives target the fixed-effects design (random effects are intentionally excluded):
using MixedModels
df = DataFrame(y = randn(500), x = randn(500), z = abs.(randn(500)) .+ 0.1,
group = categorical(rand(1:20, 500)))
mm = fit(MixedModel, @formula(y ~ 1 + x + z + (1|group)), df; progress=false)
data = Tables.columntable(df)
compiled = compile_formula(mm, data) # fixed-effects only
vars = [:x, :z]
de_fd = derivativeevaluator(:fd, compiled, data, vars)
J = Matrix{Float64}(undef, length(compiled), length(vars))
derivative_modelrow!(J, de_fd, 1)Architecture and Optimization
The derivative system achieves near-zero allocations through:
- Preallocated buffers: Jacobian matrices, gradient vectors, and temporary arrays stored in
derivativeevaluator - Typed closures: Compile-time specialization eliminates runtime dispatch
- Prebuilt data structures: Override vectors and merged data reused across calls
- Optimized memory layout: All allocations front-loaded during evaluator construction
Performance Benchmarking
using BenchmarkTools
# Build evaluators once (one-time cost)
de_ad = derivativeevaluator(:ad, compiled, data, [:x, :z]) # Zero allocations, higher accuracy (preferred)
de_fd = derivativeevaluator(:fd, compiled, data, [:x, :z]) # Zero allocations, explicit step control (alternative)
J = Matrix{Float64}(undef, length(compiled), length(de_ad.vars))
# Benchmark derivatives
@benchmark derivative_modelrow!($J, $de_ad, 25) # AD (faster)
@benchmark derivative_modelrow!($J, $de_fd, 25) # FD
# Benchmark marginal effects
β = coef(model)
g = Vector{Float64}(undef, length(de_ad.vars))
@benchmark marginal_effects_eta!($g, $de_ad, $β, 25) # AD (faster, more accurate)
@benchmark marginal_effects_eta!($g, $de_fd, $β, 25) # FDComplex Formula Support
Nested Functions
FormulaCompiler.jl handles complex nested functions:
@formula(y ~ log(sqrt(abs(x))) + exp(sin(z)) * group)Boolean Logic
Sophisticated boolean expressions:
@formula(y ~ (x > 0) * (z < mean(z)) + (group == "A") * log(w))Custom Functions
Define custom functions for use in formulas:
# Define custom function
custom_transform(x) = x > 0 ? log(1 + x) : -log(1 - x)
# Use in formula (requires function to be defined in scope)
@formula(y ~ custom_transform(x) + group)Categorical Mixtures
FormulaCompiler.jl supports categorical mixtures for marginal effects computation:
# Create data with weighted categorical specifications
df = DataFrame(
x = [1.0, 2.0, 3.0],
group = [mix("A" => 0.3, "B" => 0.7), # 30% A, 70% B
mix("A" => 0.3, "B" => 0.7),
mix("A" => 0.3, "B" => 0.7)]
)
# Compile and evaluate with zero allocations
compiled = compile_formula(model, Tables.columntable(df))
compiled(output, Tables.columntable(df), 1) # Zero allocations; time varies by hardwareFor comprehensive coverage of categorical mixtures including validation, helper functions, and marginal effects integration, see the Categorical Mixtures guide.
High-Performance Computing Patterns
Zero-Allocation Computational Loops
Design computational patterns that maintain zero-allocation performance:
function monte_carlo_predictions(model, data, n_simulations=10000)
# Pre-compile and pre-allocate all necessary buffers
compiled = compile_formula(model, data)
row_vec = Vector{Float64}(undef, length(compiled))
β = coef(model)
# Pre-allocate result storage
predictions = Vector{Float64}(undef, n_simulations)
data_size = length(first(data))
# Zero-allocation simulation loop
for sim in 1:n_simulations
# Random row selection
row_idx = rand(1:data_size)
# Zero-allocation model matrix evaluation
compiled(row_vec, data, row_idx)
# Zero-allocation prediction computation
predictions[sim] = dot(β, row_vec)
end
return predictions
end
# Usage with performance validation
predictions = monte_carlo_predictions(model, data, 100000)Advanced Memory Management
Optimize memory usage for large-scale applications:
function memory_efficient_batch_processing(model, large_dataset, batch_size=1000)
compiled = compile_formula(model, large_dataset)
n_total = length(first(large_dataset))
n_batches = cld(n_total, batch_size)
# Pre-allocate reusable buffers
row_vec = Vector{Float64}(undef, length(compiled))
batch_results = Matrix{Float64}(undef, batch_size, length(compiled))
all_results = Vector{Matrix{Float64}}()
for batch in 1:n_batches
start_idx = (batch - 1) * batch_size + 1
end_idx = min(batch * batch_size, n_total)
actual_batch_size = end_idx - start_idx + 1
# Resize for last batch if needed
if actual_batch_size != batch_size
batch_results = Matrix{Float64}(undef, actual_batch_size, length(compiled))
end
# Zero-allocation batch evaluation
for (local_idx, global_idx) in enumerate(start_idx:end_idx)
compiled(view(batch_results, local_idx, :), large_dataset, global_idx)
end
# Store results (could write to disk here for very large datasets)
push!(all_results, copy(batch_results))
end
return all_results
endBatch Processing Large Datasets
function process_large_dataset(model, data, batch_size=1000)
compiled = compile_formula(model, data)
n_rows = Tables.rowcount(data)
n_cols = length(compiled)
results = Vector{Matrix{Float64}}()
for start_idx in 1:batch_size:n_rows
end_idx = min(start_idx + batch_size - 1, n_rows)
batch_size_actual = end_idx - start_idx + 1
batch_result = Matrix{Float64}(undef, batch_size_actual, n_cols)
# Evaluate each row in the batch
for (batch_idx, data_idx) in enumerate(start_idx:end_idx)
compiled(view(batch_result, batch_idx, :), data, data_idx)
end
push!(results, batch_result)
end
return results
endParallel Processing
Combine with Julia's parallel processing capabilities:
using Distributed
@everywhere using FormulaCompiler, DataFrames, Tables
function parallel_evaluation(model, data, row_indices)
compiled = compile_formula(model, data)
results = @distributed (vcat) for row_idx in row_indices
row_vec = Vector{Float64}(undef, length(compiled))
compiled(row_vec, data, row_idx)
row_vec' # Return as row matrix
end
return results
endIntegration with Optimization
Gradient-Based Optimization
FormulaCompiler.jl works well with automatic differentiation:
using ForwardDiff
function objective_function(params, compiled_formula, data, target)
# Update data with new parameters
modified_data = (; data..., x = params[1], z = params[2])
row_vec = Vector{Float64}(undef, length(compiled_formula))
compiled_formula(row_vec, modified_data, 1)
# Compute loss
return sum((row_vec .- target).^2)
end
# Use with ForwardDiff for gradients
compiled = compile_formula(model, data)
target = [1.0, 2.0, 3.0, 4.0] # Target model matrix row
gradient = ForwardDiff.gradient(
params -> objective_function(params, compiled, data, target),
[0.0, 1.0] # Initial parameters
)Bayesian Analysis Integration
Efficient model evaluation in MCMC samplers:
using MCMCChains
function log_likelihood(params, compiled_formula, data, y_observed)
# Extract parameters
β = params[1:length(compiled_formula)]
σ = exp(params[end]) # Log-scale for positivity
n_obs = length(y_observed)
row_vec = Vector{Float64}(undef, length(compiled_formula))
ll = 0.0
for i in 1:n_obs
compiled_formula(row_vec, data, i)
μ = dot(β, row_vec)
ll += logpdf(Normal(μ, σ), y_observed[i])
end
return ll
end
# Use in MCMC sampler (pseudocode)
# sampler = MCMCSampler(log_likelihood, compiled, data, y)Memory and Performance Monitoring
Allocation Tracking
Monitor allocation performance:
using BenchmarkTools
function check_allocations(compiled, data, n_tests=1000)
row_vec = Vector{Float64}(undef, length(compiled))
# Warm up
compiled(row_vec, data, 1)
# Benchmark
result = @benchmark begin
for i in 1:$n_tests
$compiled($row_vec, $data, i % nrow($data) + 1)
end
end
return result
end
# Should show 0 allocations
benchmark_result = check_allocations(compiled, data)Memory Usage Analysis
using Profile
function profile_memory_usage(model, data, n_evaluations=10000)
compiled = compile_formula(model, data)
row_vec = Vector{Float64}(undef, length(compiled))
# Profile memory
Profile.clear_malloc_data()
for i in 1:n_evaluations
compiled(row_vec, data, i % length(first(data)) + 1)
end
# Analyze results
# (Use ProfileView.jl or similar for visualization)
endReal-World Application Patterns
Economic Policy Analysis
Combine scenario analysis with derivative computation for comprehensive policy evaluation:
function policy_impact_analysis(baseline_model, policy_data, policy_parameters)
# Compile baseline model
compiled = compile_formula(baseline_model, policy_data)
β = coef(baseline_model)
# Identify continuous policy levers
policy_vars = intersect(keys(policy_parameters), continuous_variables(compiled, policy_data))
de_fd = derivativeevaluator(:fd, compiled, policy_data, collect(policy_vars))
# Create policy scenarios using data modification
scenarios = [
("status_quo", policy_data),
("moderate_policy", merge(policy_data, policy_parameters)),
("aggressive_policy", merge(policy_data,
[k => fill(v * 1.5, length(first(policy_data))) for (k, v) in policy_parameters]))
]
# Evaluate policy impacts
n_individuals = min(1000, length(first(policy_data))) # Sample for analysis
results = Dict()
for (scenario_name, scenario_data) in scenarios
scenario_predictions = Vector{Float64}(undef, n_individuals)
scenario_marginals = Matrix{Float64}(undef, n_individuals, length(policy_vars))
# Evaluate predictions and marginal effects for each individual
row_vec = Vector{Float64}(undef, length(compiled))
marginal_vec = Vector{Float64}(undef, length(policy_vars))
for i in 1:n_individuals
# Prediction
compiled(row_vec, scenario_data, i)
scenario_predictions[i] = dot(β, row_vec)
# Marginal effects
marginal_effects_eta!(marginal_vec, de_fd, β, i) # Zero allocations
scenario_marginals[i, :] .= marginal_vec
end
results[scenario_name] = (
predictions = scenario_predictions,
marginal_effects = scenario_marginals,
mean_prediction = mean(scenario_predictions),
policy_sensitivity = mean(scenario_marginals, dims=1)
)
end
return results
end
# Example usage
policy_params = Dict(:minimum_wage => 15.0, :tax_rate => 0.25)
analysis_results = policy_impact_analysis(economic_model, economic_data, policy_params)
# Compare scenarios
status_quo_mean = analysis_results["status_quo"].mean_prediction
moderate_mean = analysis_results["moderate_policy"].mean_prediction
policy_effect = moderate_mean - status_quo_mean
println("Policy effect: $(round(policy_effect, digits=3))")Biostatistical Applications
High-throughput analysis for medical and biological research:
function biomarker_analysis(survival_model, patient_data, biomarker_ranges)
compiled = compile_formula(survival_model, patient_data)
β = coef(survival_model)
# Identify biomarker variables for sensitivity analysis
biomarker_vars = Symbol.(keys(biomarker_ranges))
continuous_biomarkers = intersect(biomarker_vars, continuous_variables(compiled, patient_data))
if !isempty(continuous_biomarkers)
de_fd = derivativeevaluator(:fd, compiled, patient_data, continuous_biomarkers)
end
# Create biomarker scenarios using loops
biomarker_combinations = Iterators.product(values(biomarker_ranges)...)
biomarker_scenarios = [("scenario_$i", merge(patient_data,
Dict(zip(keys(biomarker_ranges), [fill(val, length(first(patient_data))) for val in combo])))
) for (i, combo) in enumerate(biomarker_combinations)]
# Patient risk stratification
n_patients = length(first(patient_data))
risk_matrix = Matrix{Float64}(undef, length(biomarker_scenarios), n_patients)
# Pre-allocate evaluation buffers
row_vec = Vector{Float64}(undef, length(compiled))
# Evaluate all scenario-patient combinations
for (scenario_idx, (scenario_name, scenario_data)) in enumerate(biomarker_scenarios)
for patient_idx in 1:n_patients
compiled(row_vec, scenario_data, patient_idx)
# Compute risk score (example: linear predictor)
risk_score = dot(β, row_vec)
risk_matrix[scenario_idx, patient_idx] = risk_score
end
end
# Compute marginal effects for sensitivity analysis
if !isempty(continuous_biomarkers)
marginal_matrix = Matrix{Float64}(undef, n_patients, length(continuous_biomarkers))
marginal_vec = Vector{Float64}(undef, length(continuous_biomarkers))
for patient_idx in 1:n_patients
marginal_effects_eta!(marginal_vec, de_fd, β, patient_idx)
marginal_matrix[patient_idx, :] .= marginal_vec
end
return (
risk_scores = risk_matrix,
marginal_effects = marginal_matrix,
scenarios = [name for (name, _) in biomarker_scenarios],
biomarker_vars = continuous_biomarkers
)
else
return (
risk_scores = risk_matrix,
scenarios = biomarker_scenarios
)
end
end
# Example usage
biomarker_ranges = Dict(
:tumor_size => [1.0, 2.0, 3.0, 4.0], # cm
:psa_level => [4.0, 10.0, 20.0], # ng/mL
:age => [50, 65, 80] # years
)
bio_results = biomarker_analysis(oncology_model, patient_data, biomarker_ranges)Financial Risk Modeling
Scenario analysis for financial applications:
function portfolio_risk_analysis(risk_model, market_data, stress_scenarios)
compiled = compile_formula(risk_model, market_data)
β = coef(risk_model)
# Risk factor sensitivity
risk_factors = continuous_variables(compiled, market_data)
if !isempty(risk_factors)
de_fd = derivativeevaluator(:fd, compiled, market_data, risk_factors)
end
# Create market stress scenarios using data modification
stress_scenario_objects = [
(name, merge(market_data,
Dict(k => fill(v, length(first(market_data))) for (k, v) in parameters)))
for (name, parameters) in stress_scenarios
]
# Portfolio evaluation across scenarios
n_assets = length(first(market_data))
scenario_valuations = Dict{String, Vector{Float64}}()
row_vec = Vector{Float64}(undef, length(compiled))
for (scenario_name, scenario_data) in stress_scenario_objects
asset_valuations = Vector{Float64}(undef, n_assets)
for asset_idx in 1:n_assets
compiled(row_vec, scenario_data, asset_idx)
# Risk-adjusted valuation
asset_valuations[asset_idx] = dot(β, row_vec)
end
scenario_valuations[scenario_name] = asset_valuations
end
# Risk sensitivity analysis
if !isempty(risk_factors)
sensitivity_matrix = Matrix{Float64}(undef, n_assets, length(risk_factors))
sensitivity_vec = Vector{Float64}(undef, length(risk_factors))
for asset_idx in 1:n_assets
marginal_effects_eta!(sensitivity_vec, de_fd, β, asset_idx)
sensitivity_matrix[asset_idx, :] .= sensitivity_vec
end
return (
scenario_valuations = scenario_valuations,
risk_sensitivities = sensitivity_matrix,
risk_factors = risk_factors
)
else
return (scenario_valuations = scenario_valuations,)
end
end
# Example usage
stress_scenarios = [
("market_crash", Dict(:market_volatility => 0.4, :interest_rate => 0.02)),
("inflation_shock", Dict(:inflation_rate => 0.08, :commodity_index => 1.5)),
("recession", Dict(:gdp_growth => -0.03, :unemployment => 0.12))
]
risk_analysis = portfolio_risk_analysis(financial_model, market_data, stress_scenarios)
# Analyze results
baseline_value = sum(risk_analysis.scenario_valuations["baseline"])
crash_value = sum(risk_analysis.scenario_valuations["market_crash"])
portfolio_risk = (crash_value - baseline_value) / baseline_value
println("Portfolio stress loss: $(round(portfolio_risk * 100, digits=2))%")Further Reading
- Categorical Contrasts Guide - Detailed coverage of categorical contrasts and mixtures
- Performance Guide - Detailed optimization strategies and benchmarking
- API Reference - Complete function documentation