Lecture 22 - Introduction to Non-linear Models - OPTIONAL

Author

Penelope Pooler Eisenbies

Published

April 3, 2025

Setup

  • Run the following chunk of R code to install and load the packages needed for this assignment.

  • Click green triangle in the upper right corner of the setup chunk to run the setup code.

  • Note that this may be slow if there are packages that have not been installed yet.

Code
```{r setup, include=T}
# this line specifies options for default options for all R Chunks
knitr::opts_chunk$set(echo=T, highlight=T)

# suppress scientific notation
options(scipen=100)

# install helper package that loads and installs other packages, if needed
if (!require("pacman")) install.packages("pacman", repos = "http://lib.stat.cmu.edu/R/CRAN/")
```
Loading required package: pacman
Code
```{r setup, include=T}
# install and load required packages
pacman::p_load(pacman,tidyverse, magrittr, olsrr, gridExtra, knitr, viridis)

# verify packages
p_loaded()
```
 [1] "viridis"     "viridisLite" "knitr"       "gridExtra"   "olsrr"      
 [6] "magrittr"    "lubridate"   "forcats"     "stringr"     "dplyr"      
[11] "purrr"       "readr"       "tidyr"       "tibble"      "ggplot2"    
[16] "tidyverse"   "pacman"     

User Defined Functions in R

  • In R you can develop functions to do tasks for you.

  • Below I have written four functions

    • One to plot the data

    • One to calculate Adjusted R2 for each model

    • One to add model estimates to the data

      • Linear

      • Exponential

      • Logarithmic

      • Polynomial

      • Power

    • One to create a plot for each model

Using R Functions

  1. Run function to save it to Global Environment

  2. Use it as you would any R command

Function to Plot Data

Code
```{r}
#|label: Scatterplot Function

sctr_plot <- function(Data_set, X_var, Y_var, X_label, Y_label){
  
  ggplot(data=Data_set, aes(x=X_var,y=Y_var)) +
    geom_point(size=2, color="blue") +
    theme_classic() +
    labs(x=X_label, y=Y_label) +
    theme(legend.position="none")

}
```

Function to compare models

Code
```{r}
#|label: Function to compare non-linear models

mod_quick_compare <- function(X_var,Y_var){
  
  # specify models:
  linmod <- lm(Y_var ~ X_var)
  expmod <- lm(log(Y_var) ~ X_var)
  logmod <- lm(Y_var ~ log(X_var))
  polymod <- lm(Y_var ~ poly(X_var,2))
  powmod <- lm(log(Y_var) ~ log(X_var))
  
  # create vector of model names
  Model <- c("Linear", "Exponential", "Logarithmic", "Polynomial", "Power")

  # store R2 for each model
  R_Squared <- round(c(summary(linmod)$r.squared, 
                       summary(expmod)$r.squared, 
                       summary(logmod)$r.squared,
                       summary(polymod)$r.squared, 
                       summary(powmod)$r.squared),4)
  
  # store Adj R2 for each model
  Adj.R_Squared <- round(c(summary(linmod)$adj.r.squared, 
                           summary(expmod)$adj.r.squared, 
                           summary(logmod)$adj.r.squared,
                           summary(polymod)$adj.r.squared, 
                           summary(powmod)$adj.r.squared),4)
  
  # output summary
  data.frame(Model, R_Squared, Adj.R_Squared) 
}
```

Function to calculate model estimates

Code
```{r}
#|label: Function to add model estimates to data

model_estimates <- function(Data_set, X_var, Y_var){
  Lin <- lm(Y_var ~ X_var, data=Data_set)
  Exp <- lm(log(Y_var) ~ X_var, data=Data_set) 
  Log <- lm(Y_var ~ log(X_var), data=Data_set)
  Poly <- lm(Y_var ~ poly(X_var,2), data=Data_set)
  Pow <- lm(log(Y_var) ~ log(X_var), data=Data_set)
  
  Data_set |>
    mutate(Linear = predict(Lin),
           Exponential = exp(predict(Exp)),
           Logarithmic = predict(Log),
           Power = exp(predict(Pow)),
           Polynomial = predict(Poly))
  
}
```

Function to Plot Four non-linear models

Code
```{r}
#|label: Function to create quad plot

model_plot_quad <- function(Data_set, X_var, Y_var, X_label, Y_label){
  
  Exp <- lm(log(Y_var) ~ X_var, data=Data_set) 
  exp_ar <- round(summary(Exp)$adj.r.squared,4)
  
  Log <- lm(Y_var ~ log(X_var), data=Data_set)
  log_ar <- round(summary(Log)$adj.r.squared,4)
  
  Poly <- lm(Y_var ~ poly(X_var,2), data=Data_set)
  poly_ar <- round(summary(Poly)$adj.r.squared,4)
  
  Pow <- lm(log(Y_var) ~ log(X_var), data=Data_set)
  pow_ar <- round(summary(Pow)$adj.r.squared,4)
  
  
  Data_set <- Data_set |> 
    mutate(Exponential = exp(predict(Exp)),
           Logarithmic = predict(Log),
           Power = exp(predict(Pow)),
           Polynomial = predict(Poly))
  
  
  exp_plot <- Data_set |> 
    ggplot(aes(x=X_var,y=Y_var)) +
    geom_point(size=2, color="blue") +
    geom_line(aes(y=Exponential), size=1) +
    theme_classic() +
    labs(x=X_label, y=Y_label, 
        title="Exponential Model",
        subtitle = expr(paste("Adj.", R^{2}, "=", !!exp_ar)))
  
  
  log_plot <- Data_set |> 
    ggplot(aes(x=X_var,y=Y_var)) +
    geom_point(size=2, color="blue") +
    geom_line(aes(y=Logarithmic), size=1) +
    theme_classic() +
    labs(x=X_label, y=Y_label, 
        title="Logarithmic Model",
        subtitle = expr(paste("Adj.", R^{2}, "=", !!log_ar)))
    
  
  poly_plot <- Data_set |> 
    ggplot(aes(x=X_var,y=Y_var)) +
    geom_point(size=2, color="blue") +
    geom_line(aes(y=Polynomial), size=1) +
    theme_classic() +
    labs(x=X_label, y=Y_label, 
        title="Polynomial Model",
        subtitle = expr(paste("Adj.", R^{2}, "=", !!poly_ar)))
  
  
  pow_plot <- Data_set |> 
    ggplot(aes(x=X_var,y=Y_var)) +
    geom_point(size=2, color="blue") +
    geom_line(aes(y=Power), size=1) +
    theme_classic() +
    labs(x=X_label, y=Y_label, 
        title="Power Model",
        subtitle = expr(paste("Adj.", R^{2}, "=", !!pow_ar)))
  
  grid.arrange(exp_plot, log_plot, poly_plot, pow_plot, ncol=2)
  
}
```

Old Faithful Demo of Functions

Scatter Plot Function Demo

Code
```{r}
#|label: Import and Plot Old Faithful data

old_faithful <- read_csv("data/Old_Faithful.csv", show_col_types = F) |>
  glimpse()

(of_plot <- sctr_plot(Data_set = old_faithful,
                      X_var = old_faithful$Duration_of_eruption,
                      Y_var = old_faithful$Time_to_next_eruption,
                      X_label = "Duration of Eruption",
                      Y_label = "Time to Next Eruption"))
```
Rows: 265
Columns: 2
$ Duration_of_eruption  <dbl> 1.600, 1.667, 1.667, 1.700, 1.733, 1.750, 1.750,…
$ Time_to_next_eruption <dbl> 52, 64, 47, 69, 54, 62, 58, 54, 48, 47, 46, 54, …

Model Comparison Function Demo

Code
```{r}
#|label: Compare Models for Old Faithful Data

# save model fit statistics
of_mod_compare <- mod_quick_compare(old_faithful$Duration_of_eruption,
                                     Y_var = old_faithful$Time_to_next_eruption) |>
  
  write_csv("data/OF_Model_Comparisons.csv")


# output to screen
kable(of_mod_compare)
```
Model R_Squared Adj.R_Squared
Linear 0.7947 0.7939
Exponential 0.7940 0.7932
Logarithmic 0.8014 0.8006
Polynomial 0.8050 0.8035
Power 0.8048 0.8041

Model Estimates Function Demo

Code
```{r}
#|label: Model Estimates for Old Faithful Data

old_faithful_M <- model_estimates(Data_set = old_faithful,
                      X_var = old_faithful$Duration_of_eruption,
                      Y_var = old_faithful$Time_to_next_eruption) |>
  
  write_csv("data/Old_Faithful_Model_Estimates.csv")
```

Model Plot Quad Demo

Code
```{r}
#|label: Non-linear Plots for Old Faithful Data

model_plot_quad(Data_set = old_faithful,
                      X_var = old_faithful$Duration_of_eruption,
                      Y_var = old_faithful$Time_to_next_eruption,
                      X_label = "Duration of Eruption",
                      Y_label = "Time to Next Eruption")
```
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

BMW Demo of Functions

Import Data and Compare Models

Code
```{r}
#|label: Model Comparisons for BMW data

bmw <- read_csv("data/BMW.csv", show_col_types=F)

bmw_mod_comp <- mod_quick_compare(bmw$Speed, bmw$MPG) |>
  write_csv("data/bmw_model_comparisons.csv")

kable(bmw_mod_comp)
```
Model R_Squared Adj.R_Squared
Linear 0.5557 0.4816
Exponential 0.5627 0.4898
Logarithmic 0.7711 0.7330
Polynomial 0.9724 0.9614
Power 0.8092 0.7775

BMW Model Estimates and Plots

Code
```{r}
#|label: Model Estimates and Plots for BMW data

bmw_M <- model_estimates(Data_set = bmw,
                      X_var = bmw$Speed,
                      Y_var = bmw$MPG) |>
  
  write_csv("data/BMW_with_Estimates.csv")


model_plot_quad(Data_set = bmw,
                      X_var = bmw$Speed,
                      Y_var = bmw$MPG,
                      X_label = "Speed",
                      Y_label = "MPG")
```

When you are done…

Save your changes to this file.(Ctrl + S or Cmd + S)

OPTIONAL: Click Render button to update html file with your changes.

Close R/Studio or Close Posit Cloud Browser.