#Now, open the packages.
library("gmodels")
library("car")
## Loading required package: carData
library("ggplot2")
library("qqplotr")
## Warning: package 'qqplotr' was built under R version 4.0.5
##
## Attaching package: 'qqplotr'
## The following objects are masked from 'package:ggplot2':
##
## stat_qq_line, StatQqLine
library("dplyr")
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("readxl")
## Warning: package 'readxl' was built under R version 4.0.4
#Bring in your data.
Tdata <- read_excel("C:/Users/adolp/Desktop/Tdata.xlsx")
View(Tdata)
#Make your IV a factor and add labels.
Tdata$Party<-factor(Tdata$Party,
levels = c(0,1),
labels = c("Rep","Dem"))
#Run descriptive statistics
Tdata %>% select(Lies, Party) %>% group_by(Party) %>%
summarise(n = n(),
mean = mean(Lies, na.rm = TRUE),
sd = sd(Lies, na.rm = TRUE),
stderr = sd/sqrt(n),
LCL = mean - qt(1 - (0.05 / 2), n - 1) * stderr,
UCL = mean + qt(1 - (0.05 / 2), n - 1) * stderr,
median=median(Lies, na.rm = TRUE),
min=min(Lies, na.rm = TRUE),
max=max(Lies, na.rm = TRUE),
IQR=IQR(Lies, na.rm = TRUE))
## # A tibble: 2 x 11
## Party n mean sd stderr LCL UCL median min max IQR
## * <fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Rep 15 4.70 0.848 0.219 4.23 5.17 4.68 3.46 6.28 1.15
## 2 Dem 15 6.12 1.13 0.292 5.49 6.74 6.11 4.30 8.52 0.681
#Assumption 1: Normality check with SW
Tdata %>%
group_by(Party) %>%
summarise(`W Statistic` = shapiro.test(Lies)$statistic,
`p-value` = shapiro.test(Lies)$p.value)
## # A tibble: 2 x 3
## Party `W Statistic` `p-value`
## * <fct> <dbl> <dbl>
## 1 Rep 0.960 0.688
## 2 Dem 0.917 0.171
#Assumption 1b: Normality check with KS
Tdata %>%
group_by(Party) %>%
summarise(`KS Statistic` = ks.test(Party, Lies)$statistic,
`p-value` = ks.test(Party, Lies)$p.value)
## Warning in ks.test(Party, Lies): cannot compute exact p-value with ties
## Warning in ks.test(Party, Lies): cannot compute exact p-value with ties
## Warning in ks.test(Party, Lies): cannot compute exact p-value with ties
## Warning in ks.test(Party, Lies): cannot compute exact p-value with ties
## # A tibble: 2 x 3
## Party `KS Statistic` `p-value`
## * <fct> <dbl> <dbl>
## 1 Rep 1 0.000000612
## 2 Dem 1 0.000000612
#Assumption 1c: Normality check with Q-Q plots
ggplot(data = Tdata, mapping = aes(sample = Lies, color = Party, fill = Party)) +
stat_qq_band(alpha=0.5, conf=0.95, qtype=1, bandType = "ts") +
stat_qq_line(identity=TRUE) +
stat_qq_point(col="black") +
facet_wrap(~ Party, scales = "free") +
labs(x = "Theoretical Quantiles", y = "Sample Quantiles") + theme_bw()

#Assumption 2: Use boxplots to identify outliers
ggplot(Tdata, aes(x = Party, y = Lies, fill = Party)) +
stat_boxplot(geom ="errorbar", width = 0.5) +
geom_boxplot(fill = "light blue") +
stat_summary(fun =mean, geom="point", shape=10, size=3.5, color="black") +
ggtitle("Boxplots of Lies by Party") +
theme_bw() + theme(legend.position="none")

#Assumption 3: Check homogeneity of variance with Levene's test
lev1<-leveneTest(Lies ~ Party, data=Tdata, center="mean")
lev2<-leveneTest(Lies ~ Party, data=Tdata, center="mean")
print(lev1)
## Levene's Test for Homogeneity of Variance (center = "mean")
## Df F value Pr(>F)
## group 1 0.1824 0.6726
## 28
print(lev2)
## Levene's Test for Homogeneity of Variance (center = "mean")
## Df F value Pr(>F)
## group 1 0.1824 0.6726
## 28
#Conduct t-test with equal variance
m1<-t.test(Lies ~ Party, data=Tdata, var.equal=TRUE, na.rm=TRUE)
print(m1)
##
## Two Sample t-test
##
## data: Lies by Party
## t = -3.8796, df = 28, p-value = 0.0005799
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.1637735 -0.6683891
## sample estimates:
## mean in group Rep mean in group Dem
## 4.701504 6.117585
#Conduct t-test with unequal variance
m2<-t.test(Lies ~ Party, data=Tdata, var.equal=FALSE, na.rm=TRUE)
print(m2)
##
## Welch Two Sample t-test
##
## data: Lies by Party
## t = -3.8796, df = 25.951, p-value = 0.0006412
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.1664427 -0.6657199
## sample estimates:
## mean in group Rep mean in group Dem
## 4.701504 6.117585