Independent t-test

#Now, open the packages.
library("gmodels")
library("car")

## Loading required package: carData

library("ggplot2")
library("qqplotr")

## Warning: package 'qqplotr' was built under R version 4.0.5

## 
## Attaching package: 'qqplotr'

## The following objects are masked from 'package:ggplot2':
## 
##     stat_qq_line, StatQqLine

library("dplyr")

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library("readxl")

## Warning: package 'readxl' was built under R version 4.0.4

#Bring in your data.
Tdata <- read_excel("C:/Users/adolp/Desktop/Tdata.xlsx")
View(Tdata)

#Make your IV a factor and add labels.
Tdata$Party<-factor(Tdata$Party,
levels = c(0,1),
labels = c("Rep","Dem"))

#Run descriptive statistics
Tdata %>% select(Lies, Party) %>% group_by(Party) %>% 
  summarise(n = n(), 
            mean = mean(Lies, na.rm = TRUE), 
            sd = sd(Lies, na.rm = TRUE),
            stderr = sd/sqrt(n), 
            LCL = mean - qt(1 - (0.05 / 2), n - 1) * stderr,
            UCL = mean + qt(1 - (0.05 / 2), n - 1) * stderr,
            median=median(Lies, na.rm = TRUE),
            min=min(Lies, na.rm = TRUE), 
            max=max(Lies, na.rm = TRUE),
            IQR=IQR(Lies, na.rm = TRUE))

## # A tibble: 2 x 11
##   Party     n  mean    sd stderr   LCL   UCL median   min   max   IQR
## * <fct> <int> <dbl> <dbl>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 Rep      15  4.70 0.848  0.219  4.23  5.17   4.68  3.46  6.28 1.15 
## 2 Dem      15  6.12 1.13   0.292  5.49  6.74   6.11  4.30  8.52 0.681

#Assumption 1: Normality check with SW
Tdata %>%
  group_by(Party) %>%
  summarise(`W Statistic` = shapiro.test(Lies)$statistic,
            `p-value` = shapiro.test(Lies)$p.value)

## # A tibble: 2 x 3
##   Party `W Statistic` `p-value`
## * <fct>         <dbl>     <dbl>
## 1 Rep           0.960     0.688
## 2 Dem           0.917     0.171

#Assumption 1b: Normality check with KS
Tdata %>%
  group_by(Party) %>%
  summarise(`KS Statistic` = ks.test(Party, Lies)$statistic,
            `p-value` = ks.test(Party, Lies)$p.value)

## Warning in ks.test(Party, Lies): cannot compute exact p-value with ties

## Warning in ks.test(Party, Lies): cannot compute exact p-value with ties

## Warning in ks.test(Party, Lies): cannot compute exact p-value with ties

## Warning in ks.test(Party, Lies): cannot compute exact p-value with ties

## # A tibble: 2 x 3
##   Party `KS Statistic`   `p-value`
## * <fct>          <dbl>       <dbl>
## 1 Rep                1 0.000000612
## 2 Dem                1 0.000000612

#Assumption 1c: Normality check with Q-Q plots
ggplot(data = Tdata, mapping = aes(sample = Lies, color = Party, fill = Party)) +
  stat_qq_band(alpha=0.5, conf=0.95, qtype=1, bandType = "ts") +
  stat_qq_line(identity=TRUE) +
  stat_qq_point(col="black") +
  facet_wrap(~ Party, scales = "free") +
  labs(x = "Theoretical Quantiles", y = "Sample Quantiles") + theme_bw()

#Assumption 2: Use boxplots to identify outliers
ggplot(Tdata, aes(x = Party, y = Lies, fill = Party)) +
  stat_boxplot(geom ="errorbar", width = 0.5) +
  geom_boxplot(fill = "light blue") + 
  stat_summary(fun =mean, geom="point", shape=10, size=3.5, color="black") + 
  ggtitle("Boxplots of Lies by Party") + 
  theme_bw() + theme(legend.position="none")

#Assumption 3: Check homogeneity of variance with Levene's test
lev1<-leveneTest(Lies ~ Party, data=Tdata, center="mean")
lev2<-leveneTest(Lies ~ Party, data=Tdata, center="mean")
print(lev1)

## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  1  0.1824 0.6726
##       28

print(lev2)

## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value Pr(>F)
## group  1  0.1824 0.6726
##       28

#Conduct t-test with equal variance
m1<-t.test(Lies ~ Party, data=Tdata, var.equal=TRUE, na.rm=TRUE)
print(m1)

## 
##  Two Sample t-test
## 
## data:  Lies by Party
## t = -3.8796, df = 28, p-value = 0.0005799
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.1637735 -0.6683891
## sample estimates:
## mean in group Rep mean in group Dem 
##          4.701504          6.117585

#Conduct t-test with unequal variance
m2<-t.test(Lies ~ Party, data=Tdata, var.equal=FALSE, na.rm=TRUE)
print(m2)

## 
##  Welch Two Sample t-test
## 
## data:  Lies by Party
## t = -3.8796, df = 25.951, p-value = 0.0006412
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.1664427 -0.6657199
## sample estimates:
## mean in group Rep mean in group Dem 
##          4.701504          6.117585

Independent t-test

AJ Delgado

4/12/2021