MixedModels.jl Integration
FormulaCompiler.jl integrates with MixedModels.jl by automatically extracting fixed effects from mixed-effects models, enabling zero-allocation evaluation of the fixed-effects portion.
Overview
Mixed models contain both fixed and random effects. FormulaCompiler.jl focuses on the fixed effects portion, which is often needed for:
- Marginal effects calculation
- Prediction with population-level effects
- Bootstrap inference on fixed effects
- Policy analysis scenarios
Basic Integration
using MixedModels, FormulaCompiler, DataFrames, Tables
# Example dataset
df = DataFrame(
y = randn(1000),
x = randn(1000),
treatment = rand(Bool, 1000),
group = rand(1:10, 1000),
cluster = rand(1:50, 1000)
)
# Fit mixed model
mixed_model = fit(MixedModel, @formula(y ~ x + treatment + (1|group) + (1+x|cluster)), df)
# FormulaCompiler automatically extracts fixed effects: y ~ x + treatment
compiled = compile_formula(mixed_model, Tables.columntable(df))Fixed Effects Extraction
Automatic Extraction
# Mixed model with various random effects structures
mixed_model = fit(MixedModel, @formula(y ~ x * treatment + age + (1|group) + (x|cluster)), df)
# Fixed effects formula: y ~ x * treatment + age
fixed_formula = fixed_effects_form(mixed_model)
println("Fixed effects: ", fixed_formula)
# Compile fixed effects only
compiled = compile_formula(mixed_model, data)Manual Fixed Effects
# If you need more control, extract manually
fixed_form = mixed_model.formula.rhs.terms[1] # Gets fixed effects terms
manual_model = lm(FormulaTerm(mixed_model.formula.lhs, fixed_form), df)
compiled_manual = compile_formula(manual_model, data)Use Cases
Population-Level Predictions
function population_predictions(mixed_model, data, scenarios)
# Compile fixed effects
compiled = compile_formula(mixed_model, data)
row_vec = Vector{Float64}(undef, length(compiled))
results = Dict{String, Vector{Float64}}()
for (scenario_name, scenario) in scenarios
n_obs = Tables.rowcount(scenario.data)
predictions = Vector{Float64}(undef, n_obs)
for i in 1:n_obs
compiled(row_vec, scenario.data, i)
# This gives the fixed effects linear predictor
predictions[i] = dot(fixef(mixed_model), row_vec)
end
results[scenario_name] = predictions
end
return results
end
# Example usage with data modification
n_rows = length(data.treatment)
# Baseline scenario
data_control = merge(data, (treatment = fill(false, n_rows),)) # No treatment
baseline_predictions = population_predictions_single(mixed_model, data_control, "baseline")
# Treatment scenario
data_treated = merge(data, (treatment = fill(true, n_rows),)) # Treatment
treatment_predictions = population_predictions_single(mixed_model, data_treated, "treatment")Marginal Effects for Fixed Effects
function marginal_effects_mixed(mixed_model, data, variable)
compiled = compile_formula(mixed_model, data)
fixed_coefs = fixef(mixed_model)
# Create perturbed data
delta = 0.01
original_values = data[variable]
perturbed_values = original_values .+ delta
perturbed_data = (; data..., variable => perturbed_values)
row_vec_orig = Vector{Float64}(undef, length(compiled))
row_vec_pert = Vector{Float64}(undef, length(compiled))
n_obs = Tables.rowcount(data)
marginal_effects = Vector{Float64}(undef, n_obs)
for i in 1:n_obs
compiled(row_vec_orig, data, i)
compiled(row_vec_pert, perturbed_data, i)
pred_orig = dot(fixed_coefs, row_vec_orig)
pred_pert = dot(fixed_coefs, row_vec_pert)
marginal_effects[i] = (pred_pert - pred_orig) / delta
end
return marginal_effects
endPerformance Comparison
Comparison with modelmatrix
using BenchmarkTools
# Setup mixed model
mixed_model = fit(MixedModel, @formula(y ~ x + treatment + (1|group)), df)
data = Tables.columntable(df)
# Traditional approach: extract full model matrix
function traditional_mixed_row(model, row_idx)
# Get fixed effects design matrix
X = modelmatrix(model.optsum.lm) # Linear model component
return X[row_idx, :]
end
# FormulaCompiler approach
compiled = compile_formula(mixed_model, data)
row_vec = Vector{Float64}(undef, length(compiled))
function fc_mixed_row(compiled, data, row_vec, row_idx)
compiled(row_vec, data, row_idx)
return row_vec
end
# Benchmark
println("Traditional mixed model row extraction:")
@benchmark traditional_mixed_row($mixed_model, 1)
println("FormulaCompiler mixed model row extraction:")
@benchmark fc_mixed_row($compiled, $data, $row_vec, 1)Advanced Examples
Bootstrap Fixed Effects
function bootstrap_fixed_effects(mixed_model, data, n_bootstrap=1000)
compiled = compile_formula(mixed_model, data)
n_obs = Tables.rowcount(data)
n_coefs = length(compiled)
# Get response variable
y_var = Symbol(mixed_model.formula.lhs)
y = data[y_var]
bootstrap_coefs = Matrix{Float64}(undef, n_bootstrap, n_coefs)
row_vec = Vector{Float64}(undef, n_coefs)
for boot in 1:n_bootstrap
# Bootstrap sample
sample_idx = rand(1:n_obs, n_obs)
# Build design matrix for bootstrap sample
X_boot = Matrix{Float64}(undef, n_obs, n_coefs)
y_boot = Vector{Float64}(undef, n_obs)
for (i, idx) in enumerate(sample_idx)
compiled(row_vec, data, idx)
X_boot[i, :] .= row_vec
y_boot[i] = y[idx]
end
# Estimate fixed effects (OLS approximation)
bootstrap_coefs[boot, :] = X_boot \ y_boot
end
return bootstrap_coefs
end
# Usage
boot_coefs = bootstrap_fixed_effects(mixed_model, data, 1000)
# Confidence intervals
using Statistics
conf_intervals = [
(quantile(boot_coefs[:, j], 0.025), quantile(boot_coefs[:, j], 0.975))
for j in 1:size(boot_coefs, 2)
]Policy Scenario Analysis
function analyze_policy_scenarios(mixed_model, base_data)
compiled = compile_formula(mixed_model, base_data)
fixed_coefs = fixef(mixed_model)
# Define policy scenarios using data modification
n_obs = Tables.rowcount(base_data)
policy_configs = [
("baseline", false, nothing, nothing), # No treatment, original x and support
("universal_treatment", true, nothing, nothing), # Treatment, original x and support
("targeted_treatment", true, quantile(base_data.x, 0.75), nothing), # Treatment + top 25% x
("enhanced_policy", true, mean(base_data.x), 1.0) # Treatment + mean x + support
]
results = Dict{String, NamedTuple}()
row_vec = Vector{Float64}(undef, length(compiled))
for (name, treatment_val, x_val, support_val) in policy_configs
# Create modified data for this policy
modified_data = base_data # Start with baseline
if name != "baseline"
# Build modification dictionary
mods = Dict{Symbol, Any}()
mods[:treatment] = fill(treatment_val, n_obs)
if x_val !== nothing
mods[:x] = fill(x_val, n_obs)
end
if support_val !== nothing
mods[:additional_support] = fill(support_val, n_obs)
end
modified_data = merge(base_data, NamedTuple(mods))
end
predictions = Vector{Float64}(undef, n_obs)
for i in 1:n_obs
compiled(row_vec, modified_data, i)
predictions[i] = dot(fixed_coefs, row_vec)
end
results[name] = (
mean_outcome = mean(predictions),
std_outcome = std(predictions),
quantiles = [quantile(predictions, q) for q in [0.25, 0.5, 0.75]]
)
end
return results
end
# Run analysis
policy_results = analyze_policy_scenarios(mixed_model, data)
# Display results
for (policy, stats) in policy_results
println("Policy: $policy")
println(" Mean outcome: $(round(stats.mean_outcome, digits=3))")
println(" Std outcome: $(round(stats.std_outcome, digits=3))")
println(" Quartiles: $(round.(stats.quantiles, digits=3))")
println()
endIntegration Notes
What's Included
- Fixed effects terms only
- Interaction terms involving fixed effects
- Functions applied to fixed effects predictors
What's Excluded
- Random effects terms
(1|group),(x|group) - Random intercepts and slopes
- Cross-level interactions involving random effects
Validation
function validate_mixed_model_integration(mixed_model, data)
compiled = compile_formula(mixed_model, data)
# Extract fixed effects design matrix from MixedModels.jl
mm_fixed = modelmatrix(mixed_model.optsum.lm)
# Compare with FormulaCompiler
row_vec = Vector{Float64}(undef, length(compiled))
for i in 1:min(10, size(mm_fixed, 1))
compiled(row_vec, data, i)
original_row = mm_fixed[i, :]
if !isapprox(row_vec, original_row, rtol=1e-12)
@warn "Mismatch in row $i"
return false
end
end
println("✓ FormulaCompiler matches MixedModels.jl fixed effects matrix")
return true
endBest Practices
When to Use FormulaCompiler with Mixed Models
Supported patterns:
- Population-level predictions
- Fixed effects marginal effects
- Policy scenario analysis
- Bootstrap inference on fixed effects
Not suitable for:
- Predictions requiring random effects (BLUPs)
- Individual-level predictions in clustered data
- Random effects inference
- Cross-level interaction effects
Performance Considerations
# For repeated evaluations, compile once
mixed_model = fit(MixedModel, @formula(y ~ x + (1|group)), df)
compiled = compile_formula(mixed_model, data) # Do this once
# Then evaluate many times
row_vec = Vector{Float64}(undef, length(compiled))
for scenario in many_scenarios
for individual in many_individuals
compiled(row_vec, scenario.data, individual)
# Process fixed effects prediction...
end
endMemory Efficiency
# Mixed models can be large - use data modification for memory efficiency
large_mixed_model = fit(MixedModel, complex_formula, large_df)
base_data = Tables.columntable(large_df)
# Instead of creating many copies of large_df
# Use merge() to override just the variables of interest
n_rows = length(base_data.key_variable)
modified_data = merge(base_data, (key_variable = fill(new_value, n_rows),))
# Evaluate with minimal memory overhead
compiled = compile_formula(large_mixed_model, base_data)
# ... use compiled with modified_data