Here’s the 611 schedule of topics from this year, and some code highlights from each week.
# a toy data set for us to work with
n <- 100
gender <- as.factor(c(rep("M", n/2), rep("F", n/2)))
IQ <- rnorm(n, mean=100, sd=15)
degree <- rep(c("HS", "BA", "MS", "PhD"), n/4)
height <- as.numeric(gender)-2 + rnorm(n, mean=5.5, sd=1)
RT1 <- rchisq(n, 4)
RT2 <- rchisq(n, 4)
DV <- 50 - 5*(as.numeric(gender)-1) + .1*IQ + rnorm(n)
df <- data.frame(awesome=DV, gender=gender, IQ=IQ, degree=degree, height=height, RT1=RT1, RT2=RT2)
head(df)
## awesome gender IQ degree height RT1 RT2
## 1 55.28120 M 112.14288 HS 4.646628 0.3633789 2.824327
## 2 54.09872 M 91.23829 BA 5.560339 2.2456835 1.692923
## 3 53.04562 M 83.92578 MS 3.446986 8.0926142 4.441702
## 4 55.60539 M 87.97319 PhD 4.846104 9.3437717 3.974989
## 5 57.23617 M 111.96855 HS 4.773782 3.6712809 2.625829
## 6 55.18866 M 92.76796 BA 6.822313 5.4973758 3.136531
View(df)
summary(df)
## awesome gender IQ degree height
## Min. :50.24 F:50 Min. : 63.51 BA :25 Min. :2.241
## 1st Qu.:54.94 M:50 1st Qu.: 88.89 HS :25 1st Qu.:4.367
## Median :57.61 Median :100.17 MS :25 Median :4.910
## Mean :57.35 Mean : 98.94 PhD:25 Mean :4.970
## 3rd Qu.:59.78 3rd Qu.:109.40 3rd Qu.:5.738
## Max. :62.47 Max. :133.81 Max. :7.160
## RT1 RT2
## Min. : 0.1865 Min. : 0.2513
## 1st Qu.: 2.1216 1st Qu.: 1.8807
## Median : 3.8957 Median : 3.3573
## Mean : 3.9809 Mean : 3.6674
## 3rd Qu.: 5.5078 3rd Qu.: 4.9681
## Max. :12.1995 Max. :13.7104
library(psych)
describe(df)
## vars n mean sd median trimmed mad min max range skew
## awesome 1 100 57.35 2.96 57.61 57.44 3.72 50.24 62.47 12.23 -0.23
## gender* 2 100 NaN NA NA NaN NA Inf -Inf -Inf NA
## IQ 3 100 98.94 14.73 100.17 99.16 15.35 63.51 133.81 70.30 -0.15
## degree* 4 100 NaN NA NA NaN NA Inf -Inf -Inf NA
## height 5 100 4.97 1.10 4.91 4.99 1.08 2.24 7.16 4.92 -0.15
## RT1 6 100 3.98 2.51 3.90 3.77 2.50 0.19 12.20 12.01 0.75
## RT2 7 100 3.67 2.33 3.36 3.43 2.31 0.25 13.71 13.46 1.16
## kurtosis se
## awesome -0.94 0.30
## gender* NA NA
## IQ -0.62 1.47
## degree* NA NA
## height -0.48 0.11
## RT1 0.25 0.25
## RT2 2.12 0.23
describe(df[,c("IQ","awesome")])
## vars n mean sd median trimmed mad min max range skew
## IQ 1 100 98.94 14.73 100.17 99.16 15.35 63.51 133.81 70.30 -0.15
## awesome 2 100 57.35 2.96 57.61 57.44 3.72 50.24 62.47 12.23 -0.23
## kurtosis se
## IQ -0.62 1.47
## awesome -0.94 0.30
plot(df$awesome ~ df$IQ)
plot(df$awesome ~ df$gender)
plot(df$awesome ~ df$degree)
plot(df$height ~ df$gender)
hist(df$IQ)
hist(df$RT1)
cor(df$awesome, df$IQ)
## [1] 0.4685434
cor(df[,c(1,3,5:7)])
## awesome IQ height RT1 RT2
## awesome 1.00000000 0.46854341 -0.33868198 -0.10441679 0.01706919
## IQ 0.46854341 1.00000000 -0.02077314 0.04033320 -0.03292867
## height -0.33868198 -0.02077314 1.00000000 0.06604399 -0.08854055
## RT1 -0.10441679 0.04033320 0.06604399 1.00000000 0.17752155
## RT2 0.01706919 -0.03292867 -0.08854055 0.17752155 1.00000000
var(df$awesome, df$IQ)
## [1] 20.45331
table(df[,c(2,4)])
## degree
## gender BA HS MS PhD
## F 12 12 13 13
## M 13 13 12 12
library(ggplot2)
library(GGally)
ggpairs(df) # the big guns
qqnorm(df$IQ)
qqline(df$IQ, col="red")
qqnorm(df$RT1)
qqline(df$RT1, col="red")
df$RT1_sqrt <- sqrt(df$RT1)
qqnorm(df$RT1_sqrt)
qqline(df$RT1_sqrt, col="red")
# one-sample
t.test(IQ, mu=100, data=df, conf.level=.95)
##
## One Sample t-test
##
## data: IQ
## t = -0.7218, df = 99, p-value = 0.4721
## alternative hypothesis: true mean is not equal to 100
## 95 percent confidence interval:
## 96.0150 101.8591
## sample estimates:
## mean of x
## 98.93704
# IST
t.test(IQ ~ gender, var.equal=TRUE, data=df)
##
## Two Sample t-test
##
## data: IQ by gender
## t = -0.2773, df = 98, p-value = 0.7821
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.692785 5.051723
## sample estimates:
## mean in group F mean in group M
## 98.52677 99.34731
t.test(x=df[gender=="F",3], y=df[gender=="M",3], var.equal=TRUE)
##
## Two Sample t-test
##
## data: df[gender == "F", 3] and df[gender == "M", 3]
## t = -0.2773, df = 98, p-value = 0.7821
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.692785 5.051723
## sample estimates:
## mean of x mean of y
## 98.52677 99.34731
# DST
t.test(x=RT1, y=RT2, paired=TRUE, data=df)
##
## Paired t-test
##
## data: RT1 and RT2
## t = 1.0094, df = 99, p-value = 0.3153
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.3028698 0.9300579
## sample estimates:
## mean of the differences
## 0.3135941
results <- aov(awesome ~ degree, data=df)
summary(aov(awesome ~ degree, data=df))
## Df Sum Sq Mean Sq F value Pr(>F)
## degree 3 12.1 4.042 0.452 0.716
## Residuals 96 857.8 8.935
summary(results)
## Df Sum Sq Mean Sq F value Pr(>F)
## degree 3 12.1 4.042 0.452 0.716
## Residuals 96 857.8 8.935
Contrasts are a mess in R, especially non-orthogonal contrasts, but you can do them. Some resources:
http://www.ats.ucla.edu/stat/r/library/contrast_coding.htm http://sites.stat.psu.edu/~jls/stat512/lectures/ContrastsInR.pdf http://www.uvm.edu/~dhowell/StatPages/More_Stuff/R/ContrastsInR.html
aov(awesome ~ degree*gender, data=df)
## Call:
## aov(formula = awesome ~ degree * gender, data = df)
##
## Terms:
## degree gender degree:gender Residuals
## Sum of Squares 12.1269 582.6513 19.7314 255.3908
## Deg. of Freedom 3 1 3 92
##
## Residual standard error: 1.666129
## Estimated effects may be unbalanced
summary(aov(awesome ~ degree*gender, data=df))
## Df Sum Sq Mean Sq F value Pr(>F)
## degree 3 12.1 4.0 1.456 0.2317
## gender 1 582.7 582.7 209.890 <2e-16 ***
## degree:gender 3 19.7 6.6 2.369 0.0757 .
## Residuals 92 255.4 2.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# regression coefficients output
summary(lm(awesome ~ degree*gender, data=df))
##
## Call:
## lm(formula = awesome ~ degree * gender, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.0557 -0.8270 -0.0566 1.0326 4.0704
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 59.15745 0.48097 122.996 < 2e-16 ***
## degreeHS -0.02985 0.68019 -0.044 0.9651
## degreeMS 1.30585 0.66699 1.958 0.0533 .
## degreePhD 1.09156 0.66699 1.637 0.1051
## genderM -3.85966 0.66699 -5.787 9.83e-08 ***
## degreeHS:genderM -0.25042 0.94326 -0.265 0.7912
## degreeMS:genderM -1.45544 0.94326 -1.543 0.1263
## degreePhD:genderM -2.18151 0.94326 -2.313 0.0230 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.666 on 92 degrees of freedom
## Multiple R-squared: 0.7064, Adjusted R-squared: 0.6841
## F-statistic: 31.62 on 7 and 92 DF, p-value: < 2.2e-16
cor(df$awesome, df$IQ)
## [1] 0.4685434
summary(lm(awesome ~ IQ, data=df))
##
## Call:
## lm(formula = awesome ~ IQ, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.567 -2.563 0.171 2.545 4.067
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.01950 1.79662 26.73 < 2e-16 ***
## IQ 0.09431 0.01796 5.25 8.82e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.632 on 98 degrees of freedom
## Multiple R-squared: 0.2195, Adjusted R-squared: 0.2116
## F-statistic: 27.57 on 1 and 98 DF, p-value: 8.822e-07
table(df[,c("gender","degree")])
## degree
## gender BA HS MS PhD
## F 12 12 13 13
## M 13 13 12 12
# goodness of fit
chisq.test(table(df$degree)) # are degrees equally distributed?
##
## Chi-squared test for given probabilities
##
## data: table(df$degree)
## X-squared = 0, df = 3, p-value = 1
chisq.test(table(df$degree), p=c(.4, .3, .2, .1)) # is it 40% BS, 30% HS, 20% MS, and 10% PhD?
##
## Chi-squared test for given probabilities
##
## data: table(df$degree)
## X-squared = 30.2083, df = 3, p-value = 1.248e-06
# test of independence
chisq.test(table(df[,c(2,4)]))
##
## Pearson's Chi-squared test
##
## data: table(df[, c(2, 4)])
## X-squared = 0.16, df = 3, p-value = 0.9838
# Get measures of association
library(vcd)
assocstats(table(df[,c(2,4)]))
## X^2 df P(> X^2)
## Likelihood Ratio 0.16004 3 0.98377
## Pearson 0.16000 3 0.98377
##
## Phi-Coefficient : 0.04
## Contingency Coeff.: 0.04
## Cramer's V : 0.04