library(car)
## Loading required package: carData
setwd("C:/Users/lhomm/OneDrive/Documents/R")

Dat <- read.table("https://users.stat.ufl.edu/~rrandles/sta4210/Rclassnotes/data/textdatasets/KutnerData/Chapter%2022%20Data%20Sets/CH22PR11.txt")
colnames(Dat) <- c("Days", "Fitness", "Count", "Age")
attach(Dat)
Fitness <- as.factor(Dat$Fitness)
levels(Fitness) <- c("Below Average", "About Average", "Above Average")
Res.Age <- Dat$Age - mean(Dat$Age)

Lm1 <- lm(Days ~ Res.Age + Fitness)

Lm1$residuals
##             1             2             3             4             5 
##  2.069714e-01 -4.502794e-01 -3.647770e-01 -2.324352e-01  8.999065e-01 
##             6             7             8             9            10 
##  1.177507e-01 -5.439582e-01  3.668211e-01  1.361341e-01  7.397358e-05 
##            11            12            13            14            15 
## -6.690716e-01 -9.300368e-01  3.190336e-01 -3.813382e-01  4.201491e-01 
##            16            17            18            19            20 
##  5.837172e-01 -1.634941e-01  6.848327e-01  7.938041e-01 -2.099143e-01 
##            21            22            23            24 
##  2.294924e-01  2.800501e-01 -1.038910e+00 -5.452268e-02
Lm1Res1 <-Lm1$residuals[1:8]
Lm1Res2 <-Lm1$residuals[9:18]
Lm1Res3 <-Lm1$residuals[19:24]

Lm1Fit1 <- Lm1$fitted.values[1:8]
Lm1Fit2 <- Lm1$fitted.values[9:18]
Lm1Fit3 <- Lm1$fitted.values[19:24]


PlotRF1 <- plot(Lm1Res1, Lm1Fit1)

PlotRF2 <- plot(Lm1Res2, Lm1Fit2)

PlotRF3 <- plot(Lm1Res3, Lm1Fit3)

StandardLM1 = rstandard(Lm1)
qqnorm(StandardLM1, datax = TRUE)
qqline(StandardLM1, datax = TRUE)

shapiro.test(StandardLM1)
## 
##  Shapiro-Wilk normality test
## 
## data:  StandardLM1
## W = 0.98277, p-value = 0.9409
### The coefficient of correlation is .98277 we can conclude that there is a high level of correlation and that the data is normally distributed. ###
### Y_i_j = Mu + Tau_1I_i_j_1 + Tau_2I_i_j_2 + Beta_1X^*_i_j + Beta_2I_i_j_1X^*_i_j + Beta_3I_i_j_2X^*_i_j + e_i_j ###
### Ho: Beta_2 = Beta_3 = 0 H1: ∃i ∈ 2, 3 : βi ̸= 0 ###

Lm2 <- lm(Days ~ Res.Age*Fitness)
Lm2
## 
## Call:
## lm(formula = Days ~ Res.Age * Fitness)
## 
## Coefficients:
##                  (Intercept)                       Res.Age  
##                     34.87779                       1.19510  
##         FitnessAbout Average          FitnessAbove Average  
##                     -1.79582                      -8.70822  
## Res.Age:FitnessAbout Average  Res.Age:FitnessAbove Average  
##                     -0.05017                      -0.05821
Anova(Lm1)
## Anova Table (Type II tests)
## 
## Response: Days
##           Sum Sq Df F value    Pr(>F)    
## Res.Age   409.83  1 1329.39 < 2.2e-16 ***
## Fitness   246.08  2  399.11 < 2.2e-16 ***
## Residuals   6.17 20                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Anova(Lm2)
## Anova Table (Type II tests)
## 
## Response: Days
##                 Sum Sq Df   F value    Pr(>F)    
## Res.Age         409.83  1 1241.1044 < 2.2e-16 ***
## Fitness         246.08  2  372.6086 2.257e-15 ***
## Res.Age:Fitness   0.22  2    0.3359    0.7191    
## Residuals         5.94 18                        
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Decsion_Rule <- qf(0.05, 2, 18, lower.tail=FALSE) ### Decision Rule ###
Decsion_Rule
## [1] 3.554557
### With a small F-Value and a large P-Value we fail to reject Ho. ###
### No the general linera model uses a linear model and does not work for non-linear models. ###
summary(Lm2)
## 
## Call:
## lm(formula = Days ~ Res.Age * Fitness)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0379 -0.3681  0.0652  0.3090  0.8252 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  34.87779    0.23816 146.445  < 2e-16 ***
## Res.Age                       1.19510    0.04757  25.123 1.82e-15 ***
## FitnessAbout Average         -1.79582    0.30563  -5.876 1.45e-05 ***
## FitnessAbove Average         -8.70822    0.35766 -24.348 3.15e-15 ***
## Res.Age:FitnessAbout Average -0.05017    0.07980  -0.629    0.537    
## Res.Age:FitnessAbove Average -0.05821    0.08186  -0.711    0.486    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5746 on 18 degrees of freedom
## Multiple R-squared:  0.9945, Adjusted R-squared:  0.993 
## F-statistic: 655.4 on 5 and 18 DF,  p-value: < 2.2e-16
### The interaction terms are not signifigant so we will refit. But it seems that all but the interaction terms are signifigant. ###

summary(Lm1)
## 
## Call:
## lm(formula = Days ~ Res.Age + Fitness)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.03891 -0.36892  0.05891  0.33098  0.89991 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          34.95046    0.21338 163.794  < 2e-16 ***
## Res.Age               1.16729    0.03201  36.461  < 2e-16 ***
## FitnessAbout Average -1.84738    0.28694  -6.438  2.8e-06 ***
## FitnessAbove Average -8.72289    0.33296 -26.198  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5552 on 20 degrees of freedom
## Multiple R-squared:  0.9943, Adjusted R-squared:  0.9935 
## F-statistic:  1170 on 3 and 20 DF,  p-value: < 2.2e-16
### All of the terms are signigigant. ###