611 Syllabus in R

Here’s the 611 schedule of topics from this year, and some code highlights from each week.

# a toy data set for us to work with
n <- 100
gender <- as.factor(c(rep("M", n/2), rep("F", n/2)))
IQ <- rnorm(n, mean=100, sd=15)
degree <- rep(c("HS", "BA", "MS", "PhD"), n/4)
height <- as.numeric(gender)-2 + rnorm(n, mean=5.5, sd=1)
RT1 <- rchisq(n, 4)
RT2 <- rchisq(n, 4)
DV <- 50 - 5*(as.numeric(gender)-1) + .1*IQ + rnorm(n)

df <- data.frame(awesome=DV, gender=gender, IQ=IQ, degree=degree, height=height, RT1=RT1, RT2=RT2)

Exploratory Data Analysis

head(df)
##    awesome gender        IQ degree   height       RT1      RT2
## 1 55.28120      M 112.14288     HS 4.646628 0.3633789 2.824327
## 2 54.09872      M  91.23829     BA 5.560339 2.2456835 1.692923
## 3 53.04562      M  83.92578     MS 3.446986 8.0926142 4.441702
## 4 55.60539      M  87.97319    PhD 4.846104 9.3437717 3.974989
## 5 57.23617      M 111.96855     HS 4.773782 3.6712809 2.625829
## 6 55.18866      M  92.76796     BA 6.822313 5.4973758 3.136531
View(df)

summary(df)
##     awesome      gender       IQ         degree       height     
##  Min.   :50.24   F:50   Min.   : 63.51   BA :25   Min.   :2.241  
##  1st Qu.:54.94   M:50   1st Qu.: 88.89   HS :25   1st Qu.:4.367  
##  Median :57.61          Median :100.17   MS :25   Median :4.910  
##  Mean   :57.35          Mean   : 98.94   PhD:25   Mean   :4.970  
##  3rd Qu.:59.78          3rd Qu.:109.40            3rd Qu.:5.738  
##  Max.   :62.47          Max.   :133.81            Max.   :7.160  
##       RT1               RT2         
##  Min.   : 0.1865   Min.   : 0.2513  
##  1st Qu.: 2.1216   1st Qu.: 1.8807  
##  Median : 3.8957   Median : 3.3573  
##  Mean   : 3.9809   Mean   : 3.6674  
##  3rd Qu.: 5.5078   3rd Qu.: 4.9681  
##  Max.   :12.1995   Max.   :13.7104
library(psych)
describe(df)
##         vars   n  mean    sd median trimmed   mad   min    max range  skew
## awesome    1 100 57.35  2.96  57.61   57.44  3.72 50.24  62.47 12.23 -0.23
## gender*    2 100   NaN    NA     NA     NaN    NA   Inf   -Inf  -Inf    NA
## IQ         3 100 98.94 14.73 100.17   99.16 15.35 63.51 133.81 70.30 -0.15
## degree*    4 100   NaN    NA     NA     NaN    NA   Inf   -Inf  -Inf    NA
## height     5 100  4.97  1.10   4.91    4.99  1.08  2.24   7.16  4.92 -0.15
## RT1        6 100  3.98  2.51   3.90    3.77  2.50  0.19  12.20 12.01  0.75
## RT2        7 100  3.67  2.33   3.36    3.43  2.31  0.25  13.71 13.46  1.16
##         kurtosis   se
## awesome    -0.94 0.30
## gender*       NA   NA
## IQ         -0.62 1.47
## degree*       NA   NA
## height     -0.48 0.11
## RT1         0.25 0.25
## RT2         2.12 0.23
describe(df[,c("IQ","awesome")])
##         vars   n  mean    sd median trimmed   mad   min    max range  skew
## IQ         1 100 98.94 14.73 100.17   99.16 15.35 63.51 133.81 70.30 -0.15
## awesome    2 100 57.35  2.96  57.61   57.44  3.72 50.24  62.47 12.23 -0.23
##         kurtosis   se
## IQ         -0.62 1.47
## awesome    -0.94 0.30
plot(df$awesome ~ df$IQ)

plot(df$awesome ~ df$gender)

plot(df$awesome ~ df$degree)

plot(df$height ~ df$gender)

hist(df$IQ)

hist(df$RT1)

cor(df$awesome, df$IQ)
## [1] 0.4685434
cor(df[,c(1,3,5:7)])
##             awesome          IQ      height         RT1         RT2
## awesome  1.00000000  0.46854341 -0.33868198 -0.10441679  0.01706919
## IQ       0.46854341  1.00000000 -0.02077314  0.04033320 -0.03292867
## height  -0.33868198 -0.02077314  1.00000000  0.06604399 -0.08854055
## RT1     -0.10441679  0.04033320  0.06604399  1.00000000  0.17752155
## RT2      0.01706919 -0.03292867 -0.08854055  0.17752155  1.00000000
var(df$awesome, df$IQ)
## [1] 20.45331
table(df[,c(2,4)])
##       degree
## gender BA HS MS PhD
##      F 12 12 13  13
##      M 13 13 12  12
library(ggplot2)

library(GGally)
ggpairs(df) # the big guns