setwd("C:/Users/lhomm/OneDrive/Documents/R")
library(plyr)
library(ggplot2)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## v purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::arrange() masks plyr::arrange()
## x purrr::compact() masks plyr::compact()
## x dplyr::count() masks plyr::count()
## x dplyr::failwith() masks plyr::failwith()
## x dplyr::filter() masks stats::filter()
## x dplyr::id() masks plyr::id()
## x dplyr::lag() masks stats::lag()
## x dplyr::mutate() masks plyr::mutate()
## x dplyr::rename() masks plyr::rename()
## x dplyr::summarise() masks plyr::summarise()
## x dplyr::summarize() masks plyr::summarize()
library(PupillometryR)
## Loading required package: rlang
##
## Attaching package: 'rlang'
## The following objects are masked from 'package:purrr':
##
## %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
## flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
## splice
library(Rmisc)
## Loading required package: lattice
Dat<-read.table("http://users.stat.ufl.edu/~rrandles/sta4210/Rclassnotes/data/textdatasets/KutnerData/Chapter%2019%20Data%20Sets/CH19PR10.txt")
colnames(Dat) <- c("Offer", "Age", "Gender", "Count")
Dat$Age<- factor(Dat$Age)
Dat$Gender<- factor(Dat$Gender)
Interaction<- aov(Offer~ Age*Gender, data=Dat)
summary(Interaction)
## Df Sum Sq Mean Sq F value Pr(>F)
## Age 2 316.7 158.36 66.291 9.79e-12 ***
## Gender 1 5.4 5.44 2.279 0.142
## Age:Gender 2 5.1 2.53 1.058 0.360
## Residuals 30 71.7 2.39
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Interaction$fitted.values
## 1 2 3 4 5 6 7 8
## 21.66667 21.66667 21.66667 21.66667 21.66667 21.66667 21.33333 21.33333
## 9 10 11 12 13 14 15 16
## 21.33333 21.33333 21.33333 21.33333 27.83333 27.83333 27.83333 27.83333
## 17 18 19 20 21 22 23 24
## 27.83333 27.83333 27.66667 27.66667 27.66667 27.66667 27.66667 27.66667
## 25 26 27 28 29 30 31 32
## 22.33333 22.33333 22.33333 22.33333 22.33333 22.33333 20.50000 20.50000
## 33 34 35 36
## 20.50000 20.50000 20.50000 20.50000
Interaction$residuals
## 1 2 3 4 5 6 7
## -0.6666667 1.3333333 -2.6666667 0.3333333 0.3333333 1.3333333 -0.3333333
## 8 9 10 11 12 13 14
## 0.6666667 -1.3333333 -0.3333333 -2.3333333 3.6666667 2.1666667 1.1666667
## 15 16 17 18 19 20 21
## -1.8333333 0.1666667 -0.8333333 -0.8333333 -1.6666667 1.3333333 -0.6666667
## 22 23 24 25 26 27 28
## 0.3333333 -0.6666667 1.3333333 2.6666667 -0.3333333 0.6666667 -1.3333333
## 29 30 31 32 33 34 35
## -0.3333333 -1.3333333 2.5000000 -1.5000000 -0.5000000 0.5000000 -0.5000000
## 36
## -0.5000000
sum(Interaction$residuals)
## [1] 2.498002e-16
### The residuals All sum to 2.498002X10^116 which is essentially zero. ###
sum(Interaction$residuals[1:6])
## [1] -2.553513e-15
sum(Interaction$residuals[7:12])
## [1] 3.996803e-15
sum(Interaction$residuals[13:18])
## [1] 2.914335e-15
sum(Interaction$residuals[19:24])
## [1] -2.88658e-15
sum(Interaction$residuals[25:30])
## [1] -4.440892e-16
sum(Interaction$residuals[31:36])
## [1] -7.771561e-16
### The residuals for each treatment again essentially sum to zero ###
with(Dat,interaction.plot(Gender,Age,Offer))

with(Dat,interaction.plot(Age,Gender,Offer))

sumdat <- summarySE(Dat, measurevar = "Offer",
groupvars=c("Age", "Gender"))
sumdat
## Age Gender N Offer sd se ci
## 1 1 1 6 21.66667 1.505545 0.6146363 1.579973
## 2 1 2 6 21.33333 2.065591 0.8432740 2.167705
## 3 2 1 6 27.83333 1.471960 0.6009252 1.544727
## 4 2 2 6 27.66667 1.211060 0.4944132 1.270930
## 5 3 1 6 22.33333 1.505545 0.6146363 1.579973
## 6 3 2 6 20.50000 1.378405 0.5627314 1.446547
ggplot(Dat, aes(x = Age, y = Offer, fill = Gender)) +
geom_flat_violin(position = position_nudge(x = .1, y = 0), adjust = 0.75, trim = FALSE, alpha = .5, colour = NA) +
geom_point(aes(x = as.numeric(Age)-.1, y = Offer, colour = Gender),position = position_jitter(width = .05), size = 1, shape = 1) +
geom_boxplot(outlier.shape = NA, alpha = .5, width = .1, colour = "black")+
geom_line(data = sumdat, aes(x = as.numeric(Age)+.1, y = Offer, group = Gender, colour = Gender), linetype = 2)+
geom_point(data = sumdat, aes(x = as.numeric(Age)+.1, y = Offer, group = Gender, colour = Gender), shape = 18) +
geom_errorbar(data = sumdat, aes(x = as.numeric(Age)+.1, y = Offer, group = Gender, colour = Gender, ymin = Offer-se, ymax = Offer+se), width = .05)+
scale_colour_brewer(palette = "Dark2")+
scale_fill_brewer(palette = "Dark2")+
ggtitle("Aligned and Inreaction Plot")

source("https://raw.githubusercontent.com/athienit/STA4210material/main/check.R")
check(Interaction,tests=TRUE)
## Loading required package: lawstat
## Loading required package: car
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:lawstat':
##
## levene.test
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some

## $Independence
## $Independence[[1]]
##
## Runs Test - Two sided
##
## data: re
## Standardized Runs Statistic = 1.3528, p-value = 0.1761
##
##
## $Independence[[2]]
## lag Autocorrelation D-W Statistic p-value
## 1 -0.1713178 2.332946 0.844
## Alternative hypothesis: rho != 0
##
##
## $Normality
##
## Shapiro-Wilk normality test
##
## data: re
## W = 0.97216, p-value = 0.4875
##
##
## [[3]]
## [1] "Constant Variance only valid if data are in groups"
##
## $ConstantVar
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 0.2081 0.8132
## 33
StandardDat <- rstandard(Interaction)
Cortest <- qqnorm(StandardDat, plot = FALSE)
Corelation<- cor(Cortest$x, Cortest$y)
Corelation
## [1] 0.9861968
### The results from the Shapiro Wilk test and the Q-Q plot suggest that the assumption of normality has not been violated. ###
source("https://raw.githubusercontent.com/athienit/STA4210material/main/check.R")
check(Interaction,tests=TRUE)

## $Independence
## $Independence[[1]]
##
## Runs Test - Two sided
##
## data: re
## Standardized Runs Statistic = 1.3528, p-value = 0.1761
##
##
## $Independence[[2]]
## lag Autocorrelation D-W Statistic p-value
## 1 -0.1713178 2.332946 0.872
## Alternative hypothesis: rho != 0
##
##
## $Normality
##
## Shapiro-Wilk normality test
##
## data: re
## W = 0.97216, p-value = 0.4875
##
##
## [[3]]
## [1] "Constant Variance only valid if data are in groups"
##
## $ConstantVar
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 0.2081 0.8132
## 33
### The results from the Runs test, Durbin Watson test and the residual sequence plots suggest that the assumption of independence was not violated. ###
with(Dat, interaction.plot(Dat$Age, Dat$Gender, Dat$Offer, xlab = "Age",
ylab = "Average Offer in 100 Dollars", main = "Estimated Treatment Means Plot"))

Interaction<- aov(Offer~ Age*Gender, data=Dat)
summary(Interaction)
## Df Sum Sq Mean Sq F value Pr(>F)
## Age 2 316.7 158.36 66.291 9.79e-12 ***
## Gender 1 5.4 5.44 2.279 0.142
## Age:Gender 2 5.1 2.53 1.058 0.360
## Residuals 30 71.7 2.39
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Interaction2<-anova(lm(Offer ~ Age + Gender + Age*Gender, data = Dat))
Interaction2
## Analysis of Variance Table
##
## Response: Offer
## Df Sum Sq Mean Sq F value Pr(>F)
## Age 2 316.72 158.361 66.2907 9.789e-12 ***
## Gender 1 5.44 5.444 2.2791 0.1416
## Age:Gender 2 5.06 2.528 1.0581 0.3597
## Residuals 30 71.67 2.389
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AgeSStrt <- 316.72
AgeSStrt
## [1] 316.72
GenderSStrt <- 5.44
GenderSStrt
## [1] 5.44
AgeXGenderSStrt <- 5.06
AgeXGenderSStrt
## [1] 5.06
SSE <- 71.67
SSE
## [1] 71.67
SST <- AgeSStrt + GenderSStrt + AgeXGenderSStrt + SSE
SST
## [1] 398.89
AGERsqd <- AgeSStrt / SST
AGERsqd
## [1] 0.7940034
GenderRsqd <- GenderSStrt / SST
GenderRsqd
## [1] 0.01363785
AgeXGenderRsqd <- AgeXGenderSStrt / SST
AgeXGenderRsqd
## [1] 0.0126852
### It appears that age explains the most amount of the Varience as it has the R^2 value at .794 while Gender and Age*Gender have R^2 values of just .01363785 and .0126852 respectivly. In other words the age group someone is in (Young, Middle, Elderly ) explains 79.4% of the variability of the cash offer that said person receives while Gender and Age*Gender each only explain about 1% of the varience in cash offer a person receives for their car. This is also suggest by the fact that Age has the larges SStrt, Mstrt, F-Value and is the only variable with a P-val less than .1/.05/.01 and is thus the only variable that has a significant effect on the Cash offer that a person received for their Car. ###
Decsion_Rule <- qf(0.05, 2, 15, lower.tail=FALSE) ### Decision Rule ###
Decsion_Rule
## [1] 3.68232
### Ho: There is no interaction between Age and Gender in the model. ####
### H1: There is interaction between Age and Gender in the model. ###
Interaction
## Call:
## aov(formula = Offer ~ Age * Gender, data = Dat)
##
## Terms:
## Age Gender Age:Gender Residuals
## Sum of Squares 316.7222 5.4444 5.0556 71.6667
## Deg. of Freedom 2 1 2 30
##
## Residual standard error: 1.545603
## Estimated effects may be unbalanced
Interaction2
## Analysis of Variance Table
##
## Response: Offer
## Df Sum Sq Mean Sq F value Pr(>F)
## Age 2 316.72 158.361 66.2907 9.789e-12 ***
## Gender 1 5.44 5.444 2.2791 0.1416
## Age:Gender 2 5.06 2.528 1.0581 0.3597
## Residuals 30 71.67 2.389
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### Because only Age is significant we can conclude there is no interaction. We will then try an additive model. ###
Addative1 <- aov(Offer ~ Age + Gender, data = Dat)
Addative1
## Call:
## aov(formula = Offer ~ Age + Gender, data = Dat)
##
## Terms:
## Age Gender Residuals
## Sum of Squares 316.7222 5.4444 76.7222
## Deg. of Freedom 2 1 32
##
## Residual standard error: 1.548409
## Estimated effects may be unbalanced
Addative2 <-anova(lm(Offer ~ Age + Gender, data = Dat))
Addative2
## Analysis of Variance Table
##
## Response: Offer
## Df Sum Sq Mean Sq F value Pr(>F)
## Age 2 316.72 158.361 66.0507 4.371e-12 ***
## Gender 1 5.44 5.444 2.2708 0.1416
## Residuals 32 76.72 2.398
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### By removing the interaction term and switching to an additive model still Gender is not significant. So we will try using just Age. ###
Just_Age1 <- aov(Offer ~ Age, data = Dat)
Just_Age1
## Call:
## aov(formula = Offer ~ Age, data = Dat)
##
## Terms:
## Age Residuals
## Sum of Squares 316.7222 82.1667
## Deg. of Freedom 2 33
##
## Residual standard error: 1.577941
## Estimated effects may be unbalanced
Just_Age2 <- anova(lm(Offer ~ Age, data = Dat))
Just_Age2
## Analysis of Variance Table
##
## Response: Offer
## Df Sum Sq Mean Sq F value Pr(>F)
## Age 2 316.72 158.36 63.601 4.769e-12 ***
## Residuals 33 82.17 2.49
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### It appears that Age is the only variable that is significant. ###
### For the sake of completeness we will also try just Gender. ###
Just_Gender1 <- aov(Offer ~ Gender, data = Dat)
Just_Gender1
## Call:
## aov(formula = Offer ~ Gender, data = Dat)
##
## Terms:
## Gender Residuals
## Sum of Squares 5.4444 393.4444
## Deg. of Freedom 1 34
##
## Residual standard error: 3.401749
## Estimated effects may be unbalanced
Just_Gender2 <-anova(lm(Offer ~ Gender, data = Dat))
Just_Gender2
## Analysis of Variance Table
##
## Response: Offer
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 5.44 5.4444 0.4705 0.4974
## Residuals 34 393.44 11.5719
### The results show that Gender is not significant which is consistent with the previous results. ###
### For the sake of completeness we will also check pair-wise comparisons. ####
Interaction1_Age_Comparison <- TukeyHSD(Interaction, which = c("Age"))
Interaction1_Age_Comparison
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Offer ~ Age * Gender, data = Dat)
##
## $Age
## diff lwr upr p adj
## 2-1 6.25000000 4.694439 7.805561 0.0000000
## 3-1 -0.08333333 -1.638894 1.472228 0.9904333
## 3-2 -6.33333333 -7.888894 -4.777772 0.0000000
### The Comparisons for the interaction model for Age show that difference between groups 2 and 1 and 3 and 2 are significant but 3 and 1 are not. ###
Interaction1_Gender_Comparison <- TukeyHSD(Interaction, which = c("Gender"))
Interaction1_Gender_Comparison
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Offer ~ Age * Gender, data = Dat)
##
## $Gender
## diff lwr upr p adj
## 2-1 -0.7777778 -1.829959 0.2744031 0.141592
### The Comparisons for the interaction model show that difference between groups 1 and 2 are not signifigant. ###
Interaction1_AgeXGender_Comparison <- TukeyHSD(Interaction, which = c("Age:Gender"))
Interaction1_AgeXGender_Comparison
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Offer ~ Age * Gender, data = Dat)
##
## $`Age:Gender`
## diff lwr upr p adj
## 2:1-1:1 6.1666667 3.452487 8.8808465 0.0000016
## 3:1-1:1 0.6666667 -2.047513 3.3808465 0.9741078
## 1:2-1:1 -0.3333333 -3.047513 2.3808465 0.9989505
## 2:2-1:1 6.0000000 3.285820 8.7141799 0.0000027
## 3:2-1:1 -1.1666667 -3.880847 1.5475132 0.7787022
## 3:1-2:1 -5.5000000 -8.214180 -2.7858201 0.0000123
## 1:2-2:1 -6.5000000 -9.214180 -3.7858201 0.0000006
## 2:2-2:1 -0.1666667 -2.880847 2.5475132 0.9999649
## 3:2-2:1 -7.3333333 -10.047513 -4.6191535 0.0000001
## 1:2-3:1 -1.0000000 -3.714180 1.7141799 0.8689643
## 2:2-3:1 5.3333333 2.619153 8.0475132 0.0000206
## 3:2-3:1 -1.8333333 -4.547513 0.8808465 0.3372352
## 2:2-1:2 6.3333333 3.619153 9.0475132 0.0000010
## 3:2-1:2 -0.8333333 -3.547513 1.8808465 0.9344418
## 3:2-2:2 -7.1666667 -9.880847 -4.4524868 0.0000001
### The Comparisons for the interaction model for Age*Gender are complicated. For in short some comparisons for some interactions are signifigant while other are not. ###
Interaction1_All_Comparison <- TukeyHSD(Interaction)
Interaction1_All_Comparison
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Offer ~ Age * Gender, data = Dat)
##
## $Age
## diff lwr upr p adj
## 2-1 6.25000000 4.694439 7.805561 0.0000000
## 3-1 -0.08333333 -1.638894 1.472228 0.9904333
## 3-2 -6.33333333 -7.888894 -4.777772 0.0000000
##
## $Gender
## diff lwr upr p adj
## 2-1 -0.7777778 -1.829959 0.2744031 0.141592
##
## $`Age:Gender`
## diff lwr upr p adj
## 2:1-1:1 6.1666667 3.452487 8.8808465 0.0000016
## 3:1-1:1 0.6666667 -2.047513 3.3808465 0.9741078
## 1:2-1:1 -0.3333333 -3.047513 2.3808465 0.9989505
## 2:2-1:1 6.0000000 3.285820 8.7141799 0.0000027
## 3:2-1:1 -1.1666667 -3.880847 1.5475132 0.7787022
## 3:1-2:1 -5.5000000 -8.214180 -2.7858201 0.0000123
## 1:2-2:1 -6.5000000 -9.214180 -3.7858201 0.0000006
## 2:2-2:1 -0.1666667 -2.880847 2.5475132 0.9999649
## 3:2-2:1 -7.3333333 -10.047513 -4.6191535 0.0000001
## 1:2-3:1 -1.0000000 -3.714180 1.7141799 0.8689643
## 2:2-3:1 5.3333333 2.619153 8.0475132 0.0000206
## 3:2-3:1 -1.8333333 -4.547513 0.8808465 0.3372352
## 2:2-1:2 6.3333333 3.619153 9.0475132 0.0000010
## 3:2-1:2 -0.8333333 -3.547513 1.8808465 0.9344418
## 3:2-2:2 -7.1666667 -9.880847 -4.4524868 0.0000001
### Summarizes previous results. ####
Addative1_Age_Comparison <- TukeyHSD(Addative1, which = c("Age"))
Addative1_Age_Comparison
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Offer ~ Age + Gender, data = Dat)
##
## $Age
## diff lwr upr p adj
## 2-1 6.25000000 4.696609 7.803391 0.0000000
## 3-1 -0.08333333 -1.636724 1.470058 0.9904676
## 3-2 -6.33333333 -7.886724 -4.779942 0.0000000
### The Comparisons for the addative model for Age show that difference between groups 2 and 1 and 3 and 2 are signifigant but 3 and 1 are not. ###
Addative1_Gender_Comparison <- TukeyHSD(Addative1, which = c("Gender"))
Addative1_Gender_Comparison
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Offer ~ Age + Gender, data = Dat)
##
## $Gender
## diff lwr upr p adj
## 2-1 -0.7777778 -1.829113 0.2735573 0.1416397
### The Comparisons for the addative model show that difference between groups 1 and 2 are not signifigant. ###
Addative1_All_Comparison <- TukeyHSD(Addative1)
Addative1_All_Comparison
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Offer ~ Age + Gender, data = Dat)
##
## $Age
## diff lwr upr p adj
## 2-1 6.25000000 4.696609 7.803391 0.0000000
## 3-1 -0.08333333 -1.636724 1.470058 0.9904676
## 3-2 -6.33333333 -7.886724 -4.779942 0.0000000
##
## $Gender
## diff lwr upr p adj
## 2-1 -0.7777778 -1.829113 0.2735573 0.1416397
### Summarizes previous results. ####
Just_Age1_Age_Comparison <- TukeyHSD(Just_Age1, which = c("Age"))
Just_Age1_Age_Comparison
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Offer ~ Age, data = Dat)
##
## $Age
## diff lwr upr p adj
## 2-1 6.25000000 4.669286 7.830714 0.0000000
## 3-1 -0.08333333 -1.664048 1.497381 0.9908192
## 3-2 -6.33333333 -7.914048 -4.752619 0.0000000
### The Comparisons for the model with Age show that difference between groups 1 and 3 are not signifigant. ###
Just_Age1_All_Comparison <- TukeyHSD(Just_Age1)
Just_Age1_All_Comparison
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Offer ~ Age, data = Dat)
##
## $Age
## diff lwr upr p adj
## 2-1 6.25000000 4.669286 7.830714 0.0000000
## 3-1 -0.08333333 -1.664048 1.497381 0.9908192
## 3-2 -6.33333333 -7.914048 -4.752619 0.0000000
### Summarizes previous results. ####
Just_Gender1_Gender_Comparison <- TukeyHSD(Just_Gender1, which = c("Gender"))
Just_Gender1_Gender_Comparison
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Offer ~ Gender, data = Dat)
##
## $Gender
## diff lwr upr p adj
## 2-1 -0.7777778 -3.082173 1.526617 0.4974135
### The Comparisons for the model with just Gender show that difference between groups 1 and 2 are not signifigant. ###
Just_Gender1_All_Comparison <- TukeyHSD(Just_Gender1)
Just_Gender1_All_Comparison
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Offer ~ Gender, data = Dat)
##
## $Gender
## diff lwr upr p adj
## 2-1 -0.7777778 -3.082173 1.526617 0.4974135
### Summarizes previous results. ####
### The results from the ANOVa test suggest that there is no interaction as the interaction term has a P-Val of about .4 and an F-value of 1.0581 which is well below our decision rule of 3.68232. Howver a few of the multiple comparisons show that there may be some interaction. But It could also could just be a result of the effects seen from Age. ###
Interaction<- aov(Offer~ Age*Gender, data=Dat)
summary(Interaction)
## Df Sum Sq Mean Sq F value Pr(>F)
## Age 2 316.7 158.36 66.291 9.79e-12 ***
## Gender 1 5.4 5.44 2.279 0.142
## Age:Gender 2 5.1 2.53 1.058 0.360
## Residuals 30 71.7 2.39
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Interaction2<-anova(lm(Offer ~ Age + Gender + Age*Gender, data = Dat))
Interaction2
## Analysis of Variance Table
##
## Response: Offer
## Df Sum Sq Mean Sq F value Pr(>F)
## Age 2 316.72 158.361 66.2907 9.789e-12 ***
## Gender 1 5.44 5.444 2.2791 0.1416
## Age:Gender 2 5.06 2.528 1.0581 0.3597
## Residuals 30 71.67 2.389
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AgeSStrt <- 316.72
AgeSStrt
## [1] 316.72
GenderSStrt <- 5.44
GenderSStrt
## [1] 5.44
AgeXGenderSStrt <- 5.06
AgeXGenderSStrt
## [1] 5.06
SSE <- 71.67
SSE
## [1] 71.67
SST <- AgeSStrt + GenderSStrt + AgeXGenderSStrt + SSE
SST
## [1] 398.89
AGERsqd <- AgeSStrt / SST
AGERsqd
## [1] 0.7940034
GenderRsqd <- GenderSStrt / SST
GenderRsqd
## [1] 0.01363785
AgeXGenderRsqd <- AgeXGenderSStrt / SST
AgeXGenderRsqd
## [1] 0.0126852
### It appears that age explains the most amount of the Variance as it has the R^2 value at .794 while Gender and Age*Gender have R^2 values of just .01363785 and .0126852 respectively. In other words the age group someone is in (Young, Middle, Elderly ) explains 79.4% of the variability of the cash offer that said person receives while Gender and Age*Gender each only explain about 1% of the variance in cash offer a person receives for their car. This is also suggest by the fact that Age has the larges SStrt, Mstrt, F-Value and is the only variable with a P-val less than .1/.05/.01 and is thus the only variable that has a significant effect on the Cash offer that a person received for their Car. Largest MStrt, the largest F-value and the only group with a p-val < .1/.05/.01. ###
Decsion_Rule <- qf(0.05, 1, 15, lower.tail=FALSE) ### Decision Rule ###
Decsion_Rule
## [1] 4.543077
### Ho: There is no interaction between Age and Gender in the model. ####
### H1: There is interaction between Age and Gender in the model. ###
Interaction
## Call:
## aov(formula = Offer ~ Age * Gender, data = Dat)
##
## Terms:
## Age Gender Age:Gender Residuals
## Sum of Squares 316.7222 5.4444 5.0556 71.6667
## Deg. of Freedom 2 1 2 30
##
## Residual standard error: 1.545603
## Estimated effects may be unbalanced
Interaction2
## Analysis of Variance Table
##
## Response: Offer
## Df Sum Sq Mean Sq F value Pr(>F)
## Age 2 316.72 158.361 66.2907 9.789e-12 ***
## Gender 1 5.44 5.444 2.2791 0.1416
## Age:Gender 2 5.06 2.528 1.0581 0.3597
## Residuals 30 71.67 2.389
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### Because only Age is signifigant we can conclude there is no interaction. We will then try an addative model. ###
Addative1 <- aov(Offer ~ Age + Gender, data = Dat)
Addative1
## Call:
## aov(formula = Offer ~ Age + Gender, data = Dat)
##
## Terms:
## Age Gender Residuals
## Sum of Squares 316.7222 5.4444 76.7222
## Deg. of Freedom 2 1 32
##
## Residual standard error: 1.548409
## Estimated effects may be unbalanced
Addative2 <-anova(lm(Offer ~ Age + Gender, data = Dat))
Addative2
## Analysis of Variance Table
##
## Response: Offer
## Df Sum Sq Mean Sq F value Pr(>F)
## Age 2 316.72 158.361 66.0507 4.371e-12 ***
## Gender 1 5.44 5.444 2.2708 0.1416
## Residuals 32 76.72 2.398
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### By removing the interaction term and switching to an addative model still Gender is not signifigant. So we will try using just Age. ###
Just_Age1 <- aov(Offer ~ Age, data = Dat)
Just_Age1
## Call:
## aov(formula = Offer ~ Age, data = Dat)
##
## Terms:
## Age Residuals
## Sum of Squares 316.7222 82.1667
## Deg. of Freedom 2 33
##
## Residual standard error: 1.577941
## Estimated effects may be unbalanced
Just_Age2 <- anova(lm(Offer ~ Age, data = Dat))
Just_Age2
## Analysis of Variance Table
##
## Response: Offer
## Df Sum Sq Mean Sq F value Pr(>F)
## Age 2 316.72 158.36 63.601 4.769e-12 ***
## Residuals 33 82.17 2.49
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### For the sake of completeness we will also try just Gender. ###
Just_Gender1 <- aov(Offer ~ Gender, data = Dat)
Just_Gender1
## Call:
## aov(formula = Offer ~ Gender, data = Dat)
##
## Terms:
## Gender Residuals
## Sum of Squares 5.4444 393.4444
## Deg. of Freedom 1 34
##
## Residual standard error: 3.401749
## Estimated effects may be unbalanced
Just_Gender2 <-anova(lm(Offer ~ Gender, data = Dat))
Just_Gender2
## Analysis of Variance Table
##
## Response: Offer
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 5.44 5.4444 0.4705 0.4974
## Residuals 34 393.44 11.5719
Interaction1_Age_Comparison <- TukeyHSD(Interaction, which = c("Age"))
Interaction1_Age_Comparison
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Offer ~ Age * Gender, data = Dat)
##
## $Age
## diff lwr upr p adj
## 2-1 6.25000000 4.694439 7.805561 0.0000000
## 3-1 -0.08333333 -1.638894 1.472228 0.9904333
## 3-2 -6.33333333 -7.888894 -4.777772 0.0000000
### The Comparisons for the interaction model for Age show that difference between groups 2 and 1 and 3 and 2 are signifigant but 3 and 1 are not. ###
Interaction1_Gender_Comparison <- TukeyHSD(Interaction, which = c("Gender"))
Interaction1_Gender_Comparison
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Offer ~ Age * Gender, data = Dat)
##
## $Gender
## diff lwr upr p adj
## 2-1 -0.7777778 -1.829959 0.2744031 0.141592
### The Comparisons for the interaction model show that difference between groups 1 and 2 are not signifigant. ###
Interaction1_AgeXGender_Comparison <- TukeyHSD(Interaction, which = c("Age:Gender"))
Interaction1_AgeXGender_Comparison
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Offer ~ Age * Gender, data = Dat)
##
## $`Age:Gender`
## diff lwr upr p adj
## 2:1-1:1 6.1666667 3.452487 8.8808465 0.0000016
## 3:1-1:1 0.6666667 -2.047513 3.3808465 0.9741078
## 1:2-1:1 -0.3333333 -3.047513 2.3808465 0.9989505
## 2:2-1:1 6.0000000 3.285820 8.7141799 0.0000027
## 3:2-1:1 -1.1666667 -3.880847 1.5475132 0.7787022
## 3:1-2:1 -5.5000000 -8.214180 -2.7858201 0.0000123
## 1:2-2:1 -6.5000000 -9.214180 -3.7858201 0.0000006
## 2:2-2:1 -0.1666667 -2.880847 2.5475132 0.9999649
## 3:2-2:1 -7.3333333 -10.047513 -4.6191535 0.0000001
## 1:2-3:1 -1.0000000 -3.714180 1.7141799 0.8689643
## 2:2-3:1 5.3333333 2.619153 8.0475132 0.0000206
## 3:2-3:1 -1.8333333 -4.547513 0.8808465 0.3372352
## 2:2-1:2 6.3333333 3.619153 9.0475132 0.0000010
## 3:2-1:2 -0.8333333 -3.547513 1.8808465 0.9344418
## 3:2-2:2 -7.1666667 -9.880847 -4.4524868 0.0000001
### The Comparisons for the interaction model for Age*Gender are complicated. For in short some comparisons for some interactions are signifigant while other are not. ###
Interaction1_All_Comparison <- TukeyHSD(Interaction)
Interaction1_All_Comparison
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Offer ~ Age * Gender, data = Dat)
##
## $Age
## diff lwr upr p adj
## 2-1 6.25000000 4.694439 7.805561 0.0000000
## 3-1 -0.08333333 -1.638894 1.472228 0.9904333
## 3-2 -6.33333333 -7.888894 -4.777772 0.0000000
##
## $Gender
## diff lwr upr p adj
## 2-1 -0.7777778 -1.829959 0.2744031 0.141592
##
## $`Age:Gender`
## diff lwr upr p adj
## 2:1-1:1 6.1666667 3.452487 8.8808465 0.0000016
## 3:1-1:1 0.6666667 -2.047513 3.3808465 0.9741078
## 1:2-1:1 -0.3333333 -3.047513 2.3808465 0.9989505
## 2:2-1:1 6.0000000 3.285820 8.7141799 0.0000027
## 3:2-1:1 -1.1666667 -3.880847 1.5475132 0.7787022
## 3:1-2:1 -5.5000000 -8.214180 -2.7858201 0.0000123
## 1:2-2:1 -6.5000000 -9.214180 -3.7858201 0.0000006
## 2:2-2:1 -0.1666667 -2.880847 2.5475132 0.9999649
## 3:2-2:1 -7.3333333 -10.047513 -4.6191535 0.0000001
## 1:2-3:1 -1.0000000 -3.714180 1.7141799 0.8689643
## 2:2-3:1 5.3333333 2.619153 8.0475132 0.0000206
## 3:2-3:1 -1.8333333 -4.547513 0.8808465 0.3372352
## 2:2-1:2 6.3333333 3.619153 9.0475132 0.0000010
## 3:2-1:2 -0.8333333 -3.547513 1.8808465 0.9344418
## 3:2-2:2 -7.1666667 -9.880847 -4.4524868 0.0000001
### Summarizes previous results. ####
Addative1_Age_Comparison <- TukeyHSD(Addative1, which = c("Age"))
Addative1_Age_Comparison
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Offer ~ Age + Gender, data = Dat)
##
## $Age
## diff lwr upr p adj
## 2-1 6.25000000 4.696609 7.803391 0.0000000
## 3-1 -0.08333333 -1.636724 1.470058 0.9904676
## 3-2 -6.33333333 -7.886724 -4.779942 0.0000000
### The Comparisons for the addative model for Age show that difference between groups 2 and 1 and 3 and 2 are signifigant but 3 and 1 are not. ###
Addative1_Gender_Comparison <- TukeyHSD(Addative1, which = c("Gender"))
Addative1_Gender_Comparison
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Offer ~ Age + Gender, data = Dat)
##
## $Gender
## diff lwr upr p adj
## 2-1 -0.7777778 -1.829113 0.2735573 0.1416397
### The Comparisons for the addative model show that difference between groups 1 and 2 are not signifigant. ###
Addative1_All_Comparison <- TukeyHSD(Addative1)
Addative1_All_Comparison
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Offer ~ Age + Gender, data = Dat)
##
## $Age
## diff lwr upr p adj
## 2-1 6.25000000 4.696609 7.803391 0.0000000
## 3-1 -0.08333333 -1.636724 1.470058 0.9904676
## 3-2 -6.33333333 -7.886724 -4.779942 0.0000000
##
## $Gender
## diff lwr upr p adj
## 2-1 -0.7777778 -1.829113 0.2735573 0.1416397
### Summarizes previous results. ####
Just_Age1_Age_Comparison <- TukeyHSD(Just_Age1, which = c("Age"))
Just_Age1_Age_Comparison
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Offer ~ Age, data = Dat)
##
## $Age
## diff lwr upr p adj
## 2-1 6.25000000 4.669286 7.830714 0.0000000
## 3-1 -0.08333333 -1.664048 1.497381 0.9908192
## 3-2 -6.33333333 -7.914048 -4.752619 0.0000000
### The Comparisons for the model with Age show that difference between groups 1 and 3 are not signifigant. ###
Just_Age1_All_Comparison <- TukeyHSD(Just_Age1)
Just_Age1_All_Comparison
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Offer ~ Age, data = Dat)
##
## $Age
## diff lwr upr p adj
## 2-1 6.25000000 4.669286 7.830714 0.0000000
## 3-1 -0.08333333 -1.664048 1.497381 0.9908192
## 3-2 -6.33333333 -7.914048 -4.752619 0.0000000
### Summarizes previous results. ####
Just_Gender1_Gender_Comparison <- TukeyHSD(Just_Gender1, which = c("Gender"))
Just_Gender1_Gender_Comparison
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Offer ~ Gender, data = Dat)
##
## $Gender
## diff lwr upr p adj
## 2-1 -0.7777778 -3.082173 1.526617 0.4974135
### The Comparisons for the model with just Gender show that difference between groups 1 and 2 are not signifigant. ###
Just_Gender1_All_Comparison <- TukeyHSD(Just_Gender1)
Just_Gender1_All_Comparison
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Offer ~ Gender, data = Dat)
##
## $Gender
## diff lwr upr p adj
## 2-1 -0.7777778 -3.082173 1.526617 0.4974135
### Summarizes previous results. ####
### The results from the ANOVa table suggest that there are no main effects from Gender but there are main effects present from Age. As Age but not Gender has a P-val below .05. Similarly Age's F-value of 66.057 is much larger than our decision rule of 4.53077 while Gender has an F-value of2.2708 which is smaller than our decision rule. Additionally the pair wise comparisons for the additive and the Gender only model suggest that those comparisons are not significant while the comparisons for Age with both the additive and Age only models were significant with regards to the comparisons for group 1 and 2 and 2 and 3 but not for 1 and 3. It is meaningful to test main effects to see which variable in the model is producing the effects and therefore be able to produce a better more efficient model by removing weak predictors such as in this case Gender. ###