1 Required Libraries

library(afcommon)

2 Building Regression Formulas

This document demonstrates and validates all capabilities of the af_build_formula function. For each case, we’ll show the function call and its output, followed by the expected formula.

3 Basic use

3.1 Simple Linear Regression

Basic formula with single response and predictors. Should produce a simple linear model formula: y ~ x1 + x2

formula <- af_build_formula(
    response = "y",
    predictors = c("x1", "x2"),
    as_string = TRUE
)
formula
[1] "y ~ x1 + x2"

3.2 Multiple Response Variables

Formula with multiple response variables. Should produce: cbind(y1, y2) ~ x1 + x2

formula <- af_build_formula(
    response = c("y1", "y2"),
    predictors = c("x1", "x2"),
    as_string = TRUE
)
formula
[1] "cbind( y1, y2 ) ~ x1 + x2"

4 Variable Transformations

4.1 Response Transformations

Formula with transformed response. Expected result: log(y) ~ x1 + x2

formula <- af_build_formula(
    response = "y",
    predictors = c("x1", "x2"),
    resp_transform = "log",
    as_string = TRUE
)
formula
[1] "log(y) ~ x1 + x2"

4.2 Predictor Transformations

Formula with transformed predictors. Should produce: y ~ log(x1) + sqrt(x2)

formula <- af_build_formula(
    response = "y",
    predictors = c("x1", "x2"),
    var_transform = list(x1 = "log", x2 = "sqrt"),
    as_string = TRUE
)
formula
[1] "y ~ log(x1) + sqrt(x2)"

5 Polynomial Terms

Formula with polynomial terms. Expected result: y ~ x1 + x2 + poly(x1, 2) + poly(x2, 3)

formula <- af_build_formula(
    response = "y",
    predictors = c("x1", "x2"),
    poly = list(x1 = 2, x2 = 3),
    as_string = TRUE
)
formula
[1] "y ~ x1 + x2 + poly(x1, 2) + poly(x2, 3)"

6 Interactions

6.1 Simple Interactions using “:”

Should produce: y ~ x1 + x2 + x3 + x1:x2

formula <- af_build_formula(
    response = "y",
    predictors = c("x1", "x2", "x3"),
    interactions = list(c("x1", "x2")),
    interaction_type = ":",
    as_string = TRUE
)
formula
[1] "y ~ x1 + x2 + x3 + x1:x2"

6.2 Factorial Interactions using “*”

Expected result: y ~ x1 + x2 + x3 + x1*x2

formula <- af_build_formula(
    response = "y",
    predictors = c("x1", "x2", "x3"),
    interactions = list(c("x1", "x2")),
    interaction_type = "*",
    as_string = TRUE
)
formula
[1] "y ~ x1 + x2 + x3 + x1*x2"

6.3 Mixed Interaction Types

Should produce: y ~ x1 + x2 + x3 + x1:x2 + x2*x3

formula <- af_build_formula(
    response = "y",
    predictors = c("x1", "x2", "x3"),
    interactions = list(
        list(vars = c("x1", "x2"), type = ":"),
        list(vars = c("x2", "x3"), type = "*")
    ),
    as_string = TRUE
)
formula
[1] "y ~ x1 + x2 + x3 + x1:x2 + x2*x3"

7 Mixed Effects Models

7.1 Random Intercepts

Expected result: y ~ x1 + x2 + (1 | subject) + (1 | item)

formula <- af_build_formula(
    response = "y",
    predictors = c("x1", "x2"),
    random = list("subject", "item"),
    as_string = TRUE
)
formula
[1] "y ~ x1 + x2 + (1 | subject) + (1 | item)"

7.2 Random Slopes

Should produce: y ~ x1 + x2 + (x1 + x2 | subject)

formula <- af_build_formula(
    response = "y",
    predictors = c("x1", "x2"),
    random = list(
        list(terms = c("x1", "x2"), group = "subject")
    ),
    as_string = TRUE
)
formula
[1] "y ~ x1 + x2 + (x1 + x2 | subject)"

8 Nested Terms

Expected result: y ~ x1 + x2 + treatment/subgroup

formula <- af_build_formula(
    response = "y",
    predictors = c("x1", "x2"),
    nested = list(c("treatment", "subgroup")),
    as_string = TRUE
)
formula
[1] "y ~ x1 + x2 + treatment/subgroup"

9 GAMs and Smooth Terms

Should produce: y ~ x1 + s(time, k=10, bs='cr') + s(x2, k=20, by=group)

formula <- af_build_formula(
    response = "y",
    predictors = "x1",
    smooth = list(
        time = list(k = 10, bs = "cr"),
        x2 = list(k = 20, by = "group")
    ),
    as_string = TRUE
)
formula
[1] "y ~ x1 + s(time, k=10, bs='cr') + s(x2, k=20, by=group)"

10 Complex Model Example

Should produce: y ~ -1 + x1 + x2 + x3 + poly(x1, 2) + x1:x2 + x2*x3 + treatment/subgroup + (x1 | subject) + (1 | item) + offset(exposure)

formula <- af_build_formula(
    response = "y",
    predictors = c("x1", "x2", "x3"),
    poly = list(x1 = 2),
    interactions = list(
        list(vars = c("x1", "x2"), type = ":"),
        list(vars = c("x2", "x3"), type = "*")
    ),
    random = list(
        list(terms = c("x1"), group = "subject"),
        "item"
    ),
    nested = list(c("treatment", "subgroup")),
    offset = "exposure",
    intercept = FALSE,
    as_string = TRUE
)
formula
[1] "y ~ -1 + x1 + x2 + x3 + poly(x1, 2) + x1:x2 + x2*x3 + treatment/subgroup + (x1 | subject) + (1 | item) + offset(exposure)"

11 Validation Checklist

Key aspects to verify in formula construction:

  1. Response Variable Handling
    • Single response variables appear as is
    • Multiple responses use cbind()
    • Response transformations are properly applied
  2. Predictor Variable Handling
    • Main effects are included correctly
    • Transformations are properly applied
    • Polynomial terms use poly()
    • Offset terms use offset()
  3. Interaction Handling
    • “:” produces specific interactions only
    • “*” produces full factorial interactions
    • Mixed interactions work correctly together
  4. Random Effects Syntax
    • Random intercepts use (1 | group)
    • Random slopes include variables before the |
    • Multiple random effects are combined properly
  5. Special Terms
    • Nested terms use “/”
    • GAM smooth terms use s()
    • Weights are handled separately from formula
  6. Intercept Handling
    • Present by default
    • Removed with -1 when specified