#PLOT data
library(readxl)
no_negative_data <- read_excel("C:/Users/cngon/Documents/no negative data.xlsx", sheet = "all thesis data no negative")
cleaned_up_thesis_data_5 <- read_excel("C:/Users/cngon/Documents/no negative data.xlsx", sheet = "transformed working hours")
gpatotvidhrs <- read_excel("C:/Users/cngon/Documents/no negative data.xlsx", sheet = "X1video hours")
gpatotsex <- read_excel("C:/Users/cngon/Documents/no negative data.xlsx", sheet = "X1SEX")
gpatotrace <- read_excel("C:/Users/cngon/Documents/no negative data.xlsx", sheet = "X1RACE")
GPA_total <- read_excel("C:/Users/cngon/Documents/no negative data.xlsx", sheet = "X3TGPATOT")
gpatotfamincbin2011 <- read_excel("C:/Users/cngon/Documents/no negative data.xlsx", sheet = "X2FAMINCOMEBIN")
hs_completion <- read_excel("C:/Users/cngon/Documents/no negative data.xlsx", sheet = "gradto1hrvideo")
gpatotfamincbin2011$X2FAMINCOMEBIN <- factor(gpatotfamincbin2011$X2FAMINCOMEBIN)
gpatotrace$X1RACE <- factor(gpatotrace$X1RACE)
gpatotsex$X1SEX <- factor(gpatotsex$X1SEX)
gpatotvidhrs$S1HRVIDEO <- factor(gpatotvidhrs$S1HRVIDEO)
cleaned_up_thesis_data_5$S3CURJOBFTrnsfrmd <- factor(cleaned_up_thesis_data_5$S3CURJOBFTrnsfrmd)
hs_completion$S1HRVIDEO <- factor(hs_completion$S1HRVIDEO)
hs_completion$X3HSCOMPSTAT <- factor(hs_completion$X3HSCOMPSTAT)
no_negative_data$S1HRVIDEO <- factor(no_negative_data$S1HRVIDEO)
no_negative_data$X1SEX <- factor(no_negative_data$X1SEX)
no_negative_data$X1RACE <- factor(no_negative_data$X1RACE)
no_negative_data$S3CURJOBFTrnsfrmd <- factor(no_negative_data$S3CURJOBFTrnsfrmd)
no_negative_data$X2FAMINCOMEBIN <- factor(no_negative_data$X2FAMINCOMEBIN)
library(ggplot2)
ggplot (GPA_total, aes(x=X3TGPATOT)) + geom_density()
ggplot (GPA_total, aes(x=X3TGPATOT)) + geom_histogram(binwidth=.5)
y <- qunif(ppoints(length(GPA_total$X3TGPATOT)))
qqnorm(GPA_total$X3TGPATOT,main="GPAs")
qqline(GPA_total$X3TGPATOT)
#GGPLOT, QNORM, QQLINE
ks.test(GPA_total$X3TGPATOT,y='pnorm',alternative='two.sided')
Warning in ks.test(GPA_total$X3TGPATOT, y = "pnorm", alternative = "two.sided") :
ties should not be present for the Kolmogorov-Smirnov test
One-sample Kolmogorov-Smirnov test
data: GPA_total$X3TGPATOT
D = 0.87335, p-value < 2.2e-16
alternative hypothesis: two-sided
#not normally distributed
library(broom) #for cleaner tables
#anova
gpatotvd.anova<- aov(X3TGPATOT ~ S1HRVIDEO, data = gpatotvidhrs)
tidy(gpatotvd.anova)
#test for homoscedasticity
par(mfrow=c(2,2))
plot(gpatotvd.anova)
par(mfrow=c(1,1))
#tukey test
gpatotvd.anova.tukey<-TukeyHSD(gpatotvd.anova,ordered = TRUE, conf.level = 0.95)
tidy(gpatotvd.anova.tukey)
plot(gpatotvd.anova.tukey, las = 1)
# Levene's test
library(carData)
leveneTest(X3TGPATOT ~ S1HRVIDEO,
data = gpatotvidhrs)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 5 1.8731 0.09539 .
18844
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#significant b/c p<.05 and the variances of the groups are similar because Levene test p>0.05
library(broom) #for cleaner tables
#anova
gpatotcurjbft.anova<- aov(X3TGPATOT ~ S3CURJOBFTrnsfrmd, data = cleaned_up_thesis_data_5)
tidy(gpatotcurjbft.anova)
#test for homoscedasticity
par(mfrow=c(2,2))
plot(gpatotcurjbft.anova)
par(mfrow=c(1,1))
#tukey test
gpatotcurjbft.anova.tukey<-TukeyHSD(gpatotcurjbft.anova)
tidy(gpatotcurjbft.anova.tukey)
plot(gpatotcurjbft.anova.tukey, las = 1)
# Levene's test
library(car)
Loading required package: carData
Registered S3 method overwritten by 'data.table':
method from
print.data.table
leveneTest(X3TGPATOT ~ S3CURJOBFTrnsfrmd, data = cleaned_up_thesis_data_5)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 2 70.361 < 2.2e-16 ***
15092
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#significant b/c p<.05 and the variances of the groups are not similar because Levene test p<0.05
#perform Welch's ANOVA
oneway.test(X3TGPATOT ~ S3CURJOBFTrnsfrmd, data = cleaned_up_thesis_data_5, var.equal=FALSE)
One-way analysis of means (not assuming equal variances)
data: X3TGPATOT and S3CURJOBFTrnsfrmd
F = 132.53, num df = 2.0, denom df = 5628.7, p-value < 2.2e-16
#pvalue is <.05 which means we can reject the null hypothesis that the gpa totals are equal between the current job ft groups
#there is significance
library(broom) #for cleaner tables
#anova
gpatotfamincbin2011.anova<- aov(X3TGPATOT ~ X2FAMINCOMEBIN, data = gpatotfamincbin2011)
tidy(gpatotfamincbin2011.anova)
#test for homoscedasticity
par(mfrow=c(2,2))
plot(gpatotcurjbft.anova)
par(mfrow=c(1,1))
#tukey test
gpatotfamincbin2011.anova.tukey<-TukeyHSD(gpatotfamincbin2011.anova, ordered = TRUE, conf.level = 0.95)
tidy(gpatotfamincbin2011.anova.tukey)
plot(gpatotfamincbin2011.anova.tukey, las = 1)
# Levene's test
library(car)
leveneTest(X3TGPATOT ~ X2FAMINCOMEBIN, data = gpatotfamincbin2011)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 9 27.16 < 2.2e-16 ***
17004
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#significant b/c p<.05 and the variances of the groups are not similar because Levene test p<0.05
#perform Welch's ANOVA
oneway.test(X3TGPATOT ~ X2FAMINCOMEBIN, data = gpatotfamincbin2011, var.equal=FALSE)
One-way analysis of means (not assuming equal variances)
data: X3TGPATOT and X2FAMINCOMEBIN
F = 211.09, num df = 9.0, denom df = 5320.9, p-value < 2.2e-16
#pvalue is <.05 which means we can reject the null hypothesis that the gpa totals are equal between the fam income bin2011 groups
library(broom) #for cleaner tables
#anova
gpatotraces.anova<- aov(X3TGPATOT ~ X1RACE, data = gpatotrace)
tidy(gpatotraces.anova)
#test for homoscedasticity
par(mfrow=c(2,2))
plot(gpatotraces.anova)
par(mfrow=c(1,1))
#tukey test
gpatotraces.anova.tukey<-TukeyHSD(gpatotraces.anova)
tidy(gpatotraces.anova.tukey)
plot(gpatotraces.anova.tukey, las = 1)
# Levene's test
library(car)
leveneTest(X3TGPATOT ~ X1RACE, data = gpatotrace)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 7 9.6196 5.361e-12 ***
18842
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#significant b/c p<.05 and the variances of the groups are not similar because Levene test p<0.05
#perform Welch's ANOVA
oneway.test(X3TGPATOT ~ X1RACE, data = gpatotrace, var.equal=FALSE)
One-way analysis of means (not assuming equal variances)
data: X3TGPATOT and X1RACE
F = 215.26, num df = 7.00, denom df = 800.49, p-value < 2.2e-16
#pvalue is <.05 which means we can reject the null hypothesis that the gpa totals are equal between the race groups
library(broom) #for cleaner tables
#anova
gpatotsex.anova<- aov(X3TGPATOT ~ X1SEX, data = gpatotsex)
tidy(gpatotsex.anova)
#test for homoscedasticity
par(mfrow=c(2,2))
plot(gpatotsex.anova)
par(mfrow=c(1,1))
#tukey test
gpatotsex.anova.tukey<-TukeyHSD(gpatotsex.anova)
tidy(gpatotsex.anova.tukey)
plot(gpatotsex.anova.tukey, las = 1)
# Levene's test
library(car)
leveneTest(X3TGPATOT ~ X1SEX, data = gpatotsex)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 1 72.34 < 2.2e-16 ***
18848
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#significant b/c p<.05 and the variances of the groups are not similar because Levene test p<0.05
#perform Welch's ANOVA
oneway.test(X3TGPATOT ~ X1SEX, data = gpatotsex, var.equal=FALSE)
One-way analysis of means (not assuming equal variances)
data: X3TGPATOT and X1SEX
F = 677.4, num df = 1, denom df = 18839, p-value < 2.2e-16
#pvalue is <.05 which means we can reject the null hypothesis that the gpa totals are equal between the sex groups
chisq.test(hs_completion$S1HRVIDEO, hs_completion$X3HSCOMPSTAT, correct=FALSE)
Warning in chisq.test(hs_completion$S1HRVIDEO, hs_completion$X3HSCOMPSTAT, :
Chi-squared approximation may be incorrect
Pearson's Chi-squared test
data: hs_completion$S1HRVIDEO and hs_completion$X3HSCOMPSTAT
X-squared = 141.14, df = 20, p-value < 2.2e-16
#Would affect each other.
chisq.test(no_negative_data$S1HRVIDEO, no_negative_data$X1SEX, correct=FALSE)
Pearson's Chi-squared test
data: no_negative_data$S1HRVIDEO and no_negative_data$X1SEX
X-squared = 2522.2, df = 5, p-value < 2.2e-16
chisq.test(no_negative_data$S1HRVIDEO, no_negative_data$X1RACE, correct=FALSE)
Warning in chisq.test(no_negative_data$S1HRVIDEO, no_negative_data$X1RACE, :
Chi-squared approximation may be incorrect
Pearson's Chi-squared test
data: no_negative_data$S1HRVIDEO and no_negative_data$X1RACE
X-squared = 98.423, df = 35, p-value = 6.102e-08
chisq.test(no_negative_data$S1HRVIDEO, no_negative_data$S3CURJOBFTrnsfrmd, correct=FALSE)
Pearson's Chi-squared test
data: no_negative_data$S1HRVIDEO and no_negative_data$S3CURJOBFTrnsfrmd
X-squared = 96.341, df = 10, p-value = 2.935e-16
chisq.test(no_negative_data$S1HRVIDEO, no_negative_data$X2FAMINCOMEBIN, correct=FALSE)
Pearson's Chi-squared test
data: no_negative_data$S1HRVIDEO and no_negative_data$X2FAMINCOMEBIN
X-squared = 146.57, df = 45, p-value = 1.098e-12
library(broom) #for cleaner tables
#anova for all variables
all.anova<- aov( X3TGPATOT ~ S1HRVIDEO*X1SEX*X1RACE*X2FAMINCOMEBIN*S3CURJOBFTrnsfrmd,data=no_negative_data)
tidy(all.anova)
#test for homoscedasticity
par(mfrow=c(2,2))
plot(all.anova)
Warning: not plotting observations with leverage one:
39, 115, 150, 206, 210, 270, 271, 385, 641, 696, 739, 770, 849, 1131, 1158, 1528, 2062, 2265, 2411, 2417, 2440, 2716, 2722, 3146, 3234, 3325, 3327, 3357, 3441, 3520, 3552, 3783, 3874, 4003, 4085, 4109, 4117, 4175, 4191, 4199, 4747, 4857, 4993, 5075, 5083, 5093, 5153, 5451, 5476, 5492, 5602, 5644, 5726, 5727, 5736, 5755, 5771, 5777, 5861, 5866, 5877, 5978, 6238, 6294, 6347, 6457, 6526, 6536, 6702, 6705, 6719, 6814, 6826, 6834, 6853, 6936, 6990, 7081, 7091, 7094, 7100, 7106, 7142, 7154, 7159, 7195, 7196, 7206, 7216, 7220, 7249, 7304, 7403, 7526, 7554, 7686, 7687, 7723, 7730, 7745, 7748, 7834, 7877, 7889, 7909, 7918, 7927, 7980, 8010, 8044, 8085, 8106, 8151, 8163, 8257, 8308, 8385, 8438, 8472, 8483, 8527, 8553, 8605, 8623, 8655, 8661, 8677, 8679, 8737, 8766, 8785, 8792, 8798, 8822, 8828, 8871, 8877, 8886, 8888, 8917, 8919, 8935, 8945, 8952, 8960, 8983, 9016, 9043, 9054, 9120, 9142, 9173, 9182, 9207, 9215, 9241, 9322, 9326, 9338, 9404, 9463, 9 [... truncated]
Warning in sqrt(crit * p * (1 - hh)/hh) : NaNs produced
Warning in sqrt(crit * p * (1 - hh)/hh) : NaNs produced
par(mfrow=c(1,1))
#tukey test
all.anova.tukey<-TukeyHSD(all.anova)
tidy(all.anova.tukey)
plot(all.anova.tukey, las = 1)
# Levene's test
library(car)
leveneTest(X3TGPATOT ~ S1HRVIDEO* X1SEX*X1RACE*X2FAMINCOMEBIN*S3CURJOBFTrnsfrmd, data =no_negative_data)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 1329 1.2752 3.221e-10 ***
12951
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#significant b/c p<.05 and the variances of the groups are not similar because Levene test p<0.05
library(broom) #for cleaner tables
#anova for all variables with confounding variables
GPAanovaWBlocking <- aov(X3TGPATOT ~ S1HRVIDEO + X1SEX + X1RACE +X2FAMINCOMEBIN + S3CURJOBFTrnsfrmd, data =no_negative_data)
tidy(GPAanovaWBlocking)
#test for homoscedasticity
par(mfrow=c(2,2))
plot(GPAanovaWBlocking)
par(mfrow=c(1,1))
#tukey test
GPAanovaWBlocking.tukey<-TukeyHSD(GPAanovaWBlocking)
tidy(GPAanovaWBlocking.tukey)
plot(GPAanovaWBlocking.tukey, las = 1)
NA
NA