sp = read.csv("dataset/splines_data.csv") %>%
tibble %>%
column_to_rownames("X.1")
colnames(sp) = c("x","y")
sp
gg = ggplot(sp,aes(y = y, x = x)) +
geom_point()
gg
modNs = lm(y ~ ns(x,df = 8, knots = c(-8,-7,-5,-2,-1,2.5)), data = sp)
modNs %>% summary
##
## Call:
## lm(formula = y ~ ns(x, df = 8, knots = c(-8, -7, -5, -2, -1,
## 2.5)), data = sp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.61567 -0.11654 -0.00176 0.12482 0.58152
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -3.06472 0.09403 -32.593
## ns(x, df = 8, knots = c(-8, -7, -5, -2, -1, 2.5))1 0.64847 0.11619 5.581
## ns(x, df = 8, knots = c(-8, -7, -5, -2, -1, 2.5))2 0.48634 0.15899 3.059
## ns(x, df = 8, knots = c(-8, -7, -5, -2, -1, 2.5))3 3.49745 0.12254 28.541
## ns(x, df = 8, knots = c(-8, -7, -5, -2, -1, 2.5))4 2.83419 0.11617 24.396
## ns(x, df = 8, knots = c(-8, -7, -5, -2, -1, 2.5))5 2.27996 0.08708 26.182
## ns(x, df = 8, knots = c(-8, -7, -5, -2, -1, 2.5))6 6.04537 0.23321 25.922
## ns(x, df = 8, knots = c(-8, -7, -5, -2, -1, 2.5))7 3.09122 0.07219 42.820
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## ns(x, df = 8, knots = c(-8, -7, -5, -2, -1, 2.5))1 8.02e-08 ***
## ns(x, df = 8, knots = c(-8, -7, -5, -2, -1, 2.5))2 0.00254 **
## ns(x, df = 8, knots = c(-8, -7, -5, -2, -1, 2.5))3 < 2e-16 ***
## ns(x, df = 8, knots = c(-8, -7, -5, -2, -1, 2.5))4 < 2e-16 ***
## ns(x, df = 8, knots = c(-8, -7, -5, -2, -1, 2.5))5 < 2e-16 ***
## ns(x, df = 8, knots = c(-8, -7, -5, -2, -1, 2.5))6 < 2e-16 ***
## ns(x, df = 8, knots = c(-8, -7, -5, -2, -1, 2.5))7 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2056 on 193 degrees of freedom
## Multiple R-squared: 0.9691, Adjusted R-squared: 0.9679
## F-statistic: 863.8 on 7 and 193 DF, p-value: < 2.2e-16
Se con un semplice modello di regressione lineare si ottiene un \(R^2_{adj}\) del 0.83%, mentre utilizzando una spline naturale dove sono stati indicati i nodi si ottiene il 0.96%.
Nel grafico seguente viene raffigurata la spline stimata nel chunk precedente, e in aggiunta una spline cubica con 14 nodi e di conseguenza 18 gradi di libertà.
smooth.spline(y = sp$y,x = sp$x,df = 8,nknots = 5)
## Call:
## smooth.spline(x = sp$x, y = sp$y, df = 8, nknots = 5)
##
## Smoothing Parameter spar= -1.499937 lambda= 5.015439e-15 (33 iterations)
## Equivalent Degrees of Freedom (Df): 7
## Penalized Criterion (RSS): 30.30561
## GCV: 0.1618511
gg +
geom_smooth(method = "lm",
formula = modNs$call[[2]],
aes(color = "Splines naturale"),
fill = "paleturquoise") +
geom_spline(df = 18,
nknots = 14,
aes(color = "18 GdL e 14 nodi"),
size = 1,
alpha = 0.7) +
labs(color = "Tipologia: ") +
scale_color_manual(values = c("Splines naturale" = "paleturquoise",
"18 GdL e 14 nodi" = "orchid"))
Creo una funzione che avrà il compito di generare dei dati da una spline con determinati input.
genSpline <- function(x, knots, degree, theta) {
basis <- bs(x = x,
knots = knots,
degree = degree,
Boundary.knots = c(0,1),
intercept = T)
ySpline <- basis %*% theta
ySpline = ySpline + rnorm(length(x),0,(sd(ySpline)*.1))
dt <- data.table(x, ySpline = as.vector(ySpline))
return(list(dt = dt, basis = basis, knots = knots))
}
Ora genero la spline cubica, prima genero i valori di \(x \sim Unif(-1,1)\) e poi da essi genero i valori di y applicando la funzione sopra definita dando in input i valori x appena generati, i \(\beta\), i nodi fissati e il grado della spline.
x = runif(150,-1,1)
knots = c(-0.7, 0, 0.7)
theta = c(0.7, -0.1, 5, -1.5, 0.1, -0.3, 0.1)
sdata = genSpline(x, knots, 3, theta)
## Warning in bs(x = x, knots = knots, degree = degree, Boundary.knots =
## c(0, : alcuni valori 'x' oltre i nodi di confine potrebbero causare basi mal
## condizionate
gg = ggplot(sdata$dt, aes(y = ySpline, x = x)) +
geom_point()
gg
Di seguito vengono stimate due spline cubiche, nella prima sono fissati i nodi, mentre nella seconda sono fissati i gradi di libertà. Come si può osservare l’\(R^2_{adj}\) è molto elevato in quanto i dati sono stati generati da una spline cubica con le stesse caratteristiche.
modSplKnots = lm(ySpline ~ bs(x,knots = knots), data = sdata$dt)
modSplKnots %>% summary
##
## Call:
## lm(formula = ySpline ~ bs(x, knots = knots), data = sdata$dt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.6703 -1.9327 0.0902 1.4643 7.7634
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 105.052 2.047 51.31 < 2e-16 ***
## bs(x, knots = knots)1 -19.381 2.919 -6.64 6.06e-10 ***
## bs(x, knots = knots)2 -73.197 2.100 -34.85 < 2e-16 ***
## bs(x, knots = knots)3 -108.058 2.775 -38.94 < 2e-16 ***
## bs(x, knots = knots)4 -104.001 2.397 -43.39 < 2e-16 ***
## bs(x, knots = knots)5 -106.322 2.488 -42.74 < 2e-16 ***
## bs(x, knots = knots)6 -105.707 2.347 -45.05 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.731 on 143 degrees of freedom
## Multiple R-squared: 0.9912, Adjusted R-squared: 0.9908
## F-statistic: 2689 on 6 and 143 DF, p-value: < 2.2e-16
modSplDf = lm(ySpline ~ bs(x,df = (length(knots)+3),intercept = T), data = sdata$dt)
modSplDf %>% summary
##
## Call:
## lm(formula = ySpline ~ bs(x, df = (length(knots) + 3), intercept = T),
## data = sdata$dt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.5359 -1.8807 0.1463 1.5994 7.8465
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value
## (Intercept) -1.324 1.009 -1.312
## bs(x, df = (length(knots) + 3), intercept = T)1 106.901 1.821 58.715
## bs(x, df = (length(knots) + 3), intercept = T)2 66.558 1.580 42.133
## bs(x, df = (length(knots) + 3), intercept = T)3 14.821 1.907 7.774
## bs(x, df = (length(knots) + 3), intercept = T)4 -2.775 1.547 -1.793
## bs(x, df = (length(knots) + 3), intercept = T)5 2.556 2.070 1.235
## bs(x, df = (length(knots) + 3), intercept = T)6 NA NA NA
## Pr(>|t|)
## (Intercept) 0.192
## bs(x, df = (length(knots) + 3), intercept = T)1 < 2e-16 ***
## bs(x, df = (length(knots) + 3), intercept = T)2 < 2e-16 ***
## bs(x, df = (length(knots) + 3), intercept = T)3 1.33e-12 ***
## bs(x, df = (length(knots) + 3), intercept = T)4 0.075 .
## bs(x, df = (length(knots) + 3), intercept = T)5 0.219
## bs(x, df = (length(knots) + 3), intercept = T)6 NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.728 on 144 degrees of freedom
## Multiple R-squared: 0.9912, Adjusted R-squared: 0.9909
## F-statistic: 3233 on 5 and 144 DF, p-value: < 2.2e-16
Le due spline sono molto simili ma se rappresentate nel grafico con i
dati, esse si discostano molto con i dati. Questo errore potrebbe essere
dato da un utlizzo errato dell’intercetta all’interno della funzione
bs(). Nel successivo capitolo viene stimata nuovamente la
spline con una nuova funzione, più specifica e meno generale.
mutate_formula4ggplot = function(grafico, modello)
{
xModificato = str_replace(as.character(modello$call[[2]]),
as.character(grafico$mapping$x[[2]]),
"y")
yModificato = str_replace(xModificato,
as.character(grafico$mapping$y[[2]]),
"x")
newFormula = sprintf("%s ~ %s", yModificato[2], yModificato[3]) %>%
as.formula
return(newFormula)
}
gg +
geom_smooth(method = "lm",
aes(color = "Nodi fissati"),
fill = "paleturquoise",
alpha = 0.15,
formula = modSplKnots$call[[2]]) +
geom_smooth(method = "lm",
aes(color = "GdL fissati"),
fill = "orchid",
alpha = 0.15,
formula = modSplDf$call[[2]]) +
labs(color = "Tipologia: ") +
scale_color_manual(values = c("Nodi fissati" = "paleturquoise",
"GdL fissati" = "orchid"))
## Warning: Computation failed in `stat_smooth()`:
## oggetto 'ySpline' non trovato
## Computation failed in `stat_smooth()`:
## oggetto 'ySpline' non trovato
Utilizzando una spline naturale cubica il problema persiste. Invece, utilizzando le spline di lisaciamento, esse si adattano graficamente ai dati.
modNsCub = lm(ySpline ~ ns(x,df = 3), data = sdata$dt)
modNsCub %>% summary
##
## Call:
## lm(formula = ySpline ~ ns(x, df = 3), data = sdata$dt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.0317 -1.9379 0.0502 1.8036 7.3876
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 100.3209 0.8799 114.01 <2e-16 ***
## ns(x, df = 3)1 -74.1872 1.0031 -73.96 <2e-16 ***
## ns(x, df = 3)2 -178.3537 2.1628 -82.47 <2e-16 ***
## ns(x, df = 3)3 -50.3581 0.7778 -64.74 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.888 on 146 degrees of freedom
## Multiple R-squared: 0.99, Adjusted R-squared: 0.9898
## F-statistic: 4803 on 3 and 146 DF, p-value: < 2.2e-16
modSmoothSpline = lapply(c(0,0.5,1),
function(l) smooth.spline(sdata$dt$x,
sdata$dt$ySpline,
lambda = l))
gg = gg +
geom_smooth(method = "lm",
aes(color = "Spline naturale cubica"),
formula = modNsCub$call[[2]]) +
geom_smooth(aes(y = modSmoothSpline[[1]]$y,
x = modSmoothSpline[[1]]$x,
color = "Lambda = 0"),
se = T) +
geom_smooth(aes(y = modSmoothSpline[[2]]$y,
x = modSmoothSpline[[2]]$x,
color = "Lambda = 0.5"),
se = T) +
geom_smooth(aes(y = modSmoothSpline[[3]]$y,
x = modSmoothSpline[[3]]$x,
color = "Lambda = 1"),
se = T) +
labs(color = "Tipologia: ") +
scale_color_manual(values = c("Spline naturale cubica" = "paleturquoise",
"Lambda 0" = "orchid",
"Lambda 0.5" = "sky blue",
"Lambda 1" = "orange"))
gg
## Warning: Computation failed in `stat_smooth()`:
## oggetto 'ySpline' non trovato
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
La nuova funzione lavora diversamente dalla precedente. In questo caso applica alle \(x\) la formula, utilizzando le basi e i nodi dati in input. La funzione è fatta per lavorare solo su spline cubiche ma con un qualsiasi numero di nodi. Successivamente viene aggiunta anche la componente aleatoria \(\epsilon\).
generazioneSpline = function(x, knots, theta)
{
gdl = min((length(knots)+4), length(theta))
y = theta[1] +
theta[2] * x +
theta [3] * x**2 +
theta [4] * x** 3 +
transmute(sapply(c(1:length(knots)),
function(i) map_dbl(.x = x,
.f = ~ theta [4+i] * (. - knots[i])** 3)) %>% tibble %>% rowwise,
y = sum(c_across()))
y = y$y + rnorm(length(x),0,(sd(y$y)*.1))
return(data.frame(x,y))
}
df = generazioneSpline(x,knots,theta)
gg = ggplot(df, aes(x = x, y = y)) +
geom_point()
gg
Di seguito vengono stimate due spline cubiche, nella prima sono fissati i nodi, mentre nella seconda sono fissati i gradi di libertà. Come si può osservare l’\(R^2_{adj}\) è molto elevato in quanto i dati sono stati generati da una spline cubica con le stesse caratteristiche.
modSplKnots = lm(y ~ bs(x,knots = knots), data = df)
modSplKnots %>% summary
##
## Call:
## lm(formula = y ~ bs(x, knots = knots), data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5744 -0.1075 0.0052 0.1197 0.4524
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.1416 0.1253 56.993 < 2e-16 ***
## bs(x, knots = knots)1 -1.4458 0.1787 -8.093 2.29e-13 ***
## bs(x, knots = knots)2 -5.4387 0.1286 -42.306 < 2e-16 ***
## bs(x, knots = knots)3 -7.1747 0.1699 -42.238 < 2e-16 ***
## bs(x, knots = knots)4 -5.2954 0.1467 -36.093 < 2e-16 ***
## bs(x, knots = knots)5 -3.3450 0.1523 -21.968 < 2e-16 ***
## bs(x, knots = knots)6 -2.8452 0.1436 -19.810 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1671 on 143 degrees of freedom
## Multiple R-squared: 0.989, Adjusted R-squared: 0.9886
## F-statistic: 2147 on 6 and 143 DF, p-value: < 2.2e-16
modSplDf = lm(y ~ bs(x,df = (length(knots)+3),intercept = T), data = df)
modSplDf %>% summary
##
## Call:
## lm(formula = y ~ bs(x, df = (length(knots) + 3), intercept = T),
## data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.58236 -0.10553 0.00336 0.11894 0.46087
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value
## (Intercept) 4.32700 0.06178 70.033
## bs(x, df = (length(knots) + 3), intercept = T)1 2.81825 0.11148 25.281
## bs(x, df = (length(knots) + 3), intercept = T)2 -0.14591 0.09672 -1.509
## bs(x, df = (length(knots) + 3), intercept = T)3 -3.83143 0.11674 -32.820
## bs(x, df = (length(knots) + 3), intercept = T)4 -3.78791 0.09475 -39.979
## bs(x, df = (length(knots) + 3), intercept = T)5 -1.43974 0.12677 -11.358
## bs(x, df = (length(knots) + 3), intercept = T)6 NA NA NA
## Pr(>|t|)
## (Intercept) <2e-16 ***
## bs(x, df = (length(knots) + 3), intercept = T)1 <2e-16 ***
## bs(x, df = (length(knots) + 3), intercept = T)2 0.134
## bs(x, df = (length(knots) + 3), intercept = T)3 <2e-16 ***
## bs(x, df = (length(knots) + 3), intercept = T)4 <2e-16 ***
## bs(x, df = (length(knots) + 3), intercept = T)5 <2e-16 ***
## bs(x, df = (length(knots) + 3), intercept = T)6 NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.167 on 144 degrees of freedom
## Multiple R-squared: 0.989, Adjusted R-squared: 0.9886
## F-statistic: 2579 on 5 and 144 DF, p-value: < 2.2e-16
Con la nuova funzione per generare i dati, le spline si adattano perfettamente ai dati.
gg +
geom_smooth(method = "lm",
formula = modSplKnots$call[[2]]) +
geom_smooth(method = "lm",
formula = modSplDf$call[[2]])
## Warning in predict.lm(model, newdata = new_data_frame(list(x = xseq)), se.fit =
## se, : previsione da una stima non a rango pieno può essere non corretta
modNsCub = lm(y ~ ns(x,df = 3), data = df)
modNsCub %>% summary
##
## Call:
## lm(formula = y ~ ns(x, df = 3), data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.61196 -0.12893 0.01094 0.12601 0.60647
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.69747 0.05745 116.58 <2e-16 ***
## ns(x, df = 3)1 -4.18028 0.06549 -63.83 <2e-16 ***
## ns(x, df = 3)2 -9.18195 0.14120 -65.03 <2e-16 ***
## ns(x, df = 3)3 1.11854 0.05078 22.03 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1886 on 146 degrees of freedom
## Multiple R-squared: 0.9857, Adjusted R-squared: 0.9854
## F-statistic: 3363 on 3 and 146 DF, p-value: < 2.2e-16
modSmoothSpline = lapply(c(0,0.5,1),
function(l) smooth.spline(df$x,
df$y,
lambda = l))
gg = gg +
geom_smooth(method = "lm",
formula = modNsCub$call[[2]]) +
geom_smooth(aes(y = modSmoothSpline[[1]]$y, x = modSmoothSpline[[1]]$x),
method = "lm",
se = T) +
geom_smooth(method = "lm",
aes(y = modSmoothSpline[[2]]$y, x = modSmoothSpline[[2]]$x),
se = T) +
geom_smooth(method = "lm",
aes(y = modSmoothSpline[[3]]$y, x = modSmoothSpline[[3]]$x),
se = T)
gg
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
gg = ggplot(trees, aes(y = Volume))
gg + geom_point(aes(x = Girth))
gg + geom_point(aes(x = Height))
Provo a fare un semplice modello di regressione lineare multipla e osservo che già così ottengo un buon risultato con \(R_{adj}^2\) del 94%.
modGam = list()
modGam[[1]] = gam(Volume ~ Girth + Height, data = trees)
modGam[[1]] %>% summary
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Volume ~ Girth + Height
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -57.9877 8.6382 -6.713 2.75e-07 ***
## Girth 4.7082 0.2643 17.816 < 2e-16 ***
## Height 0.3393 0.1302 2.607 0.0145 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## R-sq.(adj) = 0.944 Deviance explained = 94.8%
## GCV = 16.683 Scale est. = 15.069 n = 31
ggplot(trees,(aes(y = Volume, x = Girth,z = Height))) +
geom_point() +
geom_smooth(method ="gam",
formula = y ~ ns(x,df = 3))
Osservando il correlogramma si può notare la forte correlazione tra
Volume e Girth, ma allo stesso tempo una forte
correlazione tra Girth e Height.
ggcorrplot::ggcorrplot(corr = cor(trees))
Provo, allora, a includere la correlazione delle variabili indipendenti.
In alternativa si sarebbe potuto ridurre le dimensioni tramite analisi
delle componenti principali. Il modello è migliorato ulteriormente a
quello precedente.
modGam[[2]] = gam(Volume ~ ns(Girth,knots = 1) + ns(Height,df = 3) + ti(Girth,Height) , data = trees)
modGam[[2]] %>% summary
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Volume ~ ns(Girth, knots = 1) + ns(Height, df = 3) + ti(Girth,
## Height)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.8922 1.8398 11.899 8.61e-12 ***
## ns(Girth, knots = 1)1 7.7421 0.8499 9.109 2.03e-09 ***
## ns(Girth, knots = 1)2 40.5582 1.8520 21.900 < 2e-16 ***
## ns(Height, df = 3)1 9.3073 2.2712 4.098 0.000385 ***
## ns(Height, df = 3)2 13.3017 4.6322 2.872 0.008203 **
## ns(Height, df = 3)3 7.9131 2.1323 3.711 0.001036 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## ti(Girth,Height) 1 1 24.87 3.98e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Rank: 21/22
## R-sq.(adj) = 0.972 Deviance explained = 97.7%
## GCV = 9.2299 Scale est. = 7.4435 n = 31
# plot.gam(modGam[[2]])
# ggplot(predict.gam(modGam[[2]]), aes(y = f1, x = x2, z = fit)) + geom_raster(fill = fit)
Nella tabella seguente sono rappresentati gli \(R_{adj}^2\) di diversi modelli. Le righe
corrispondono ai gradi di libertà per la spline della variabile
Height, mentre per le colonne la variabile
Girth.
gradoMax = 6
modelli = lapply(X = c(1:gradoMax), function(grGir) lapply(X = c(1:gradoMax),
function(grHei) gam(Volume ~ ns(Girth,df = grGir) + ns(Height,df = grHei) + ti(Girth,Height), data = trees)))
rDf = map_dfc(c(1:gradoMax),function(grGir)
sapply(c(1:gradoMax),function(grHei)
summary(modelli[[grGir]][[grHei]])$r.sq))
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
names(rDf) = paste(c(1:gradoMax),"GdL of Girth")
rDf
do.call(anova.gam,c(modelli[[1]],test = "F"))
Nella tabella seguente è stata applicata l’analisi della varianza
(ANOVA), utilizzando la statistica F, ai modelli precedentemente
generati. Il modello migliore utilizzando l’ANOVA risulta essere il
modello con 1 GdL per la spline della variabile Girth e 3
GdL per la variabile Volume.
anovaDf = map_dfc(c(1:gradoMax),function(var) do.call(anova,c(modelli[[var]], test = "F"))$F)
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
names(anovaDf) = paste(c(1:gradoMax),"GdL of Girth")
anovaDf