Homework 1: National Longitudinal Survey of Youth (NLSY)
Load data file
## ─ Attaching packages ────────────────────────── tidyverse 1.3.0 ─
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.4
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ─ Conflicts ─────────────────────────── tidyverse_conflicts() ─
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## id sex race time grade year month math read
## 1 2390 Female Majority 1 0 6 67 14.285714 19.047619
## 2 2560 Female Majority 1 0 6 66 20.238095 21.428571
## 3 3740 Female Majority 1 0 6 67 17.857143 21.428571
## 4 4020 Male Majority 1 0 5 60 7.142857 7.142857
## 5 6350 Male Majority 1 1 7 78 29.761905 30.952381
## 6 7030 Male Majority 1 0 5 62 14.285714 17.857143
Plot a scatter plot of math scores across age and race by gender
xyplot(math ~ year | sex,
groups=race,
data=dta,
alpha=.8,
type=c('g','p', 'r'),
jitter.x=TRUE,
xlab="Age (years)",
ylim=seq(0,100,20),
auto.key=list(space="top",
columns=2))

Plot a boxplot of math scores across grade and gender by race

Plot a scatter plot of reading scores across age and race by gender
xyplot(read ~ year | sex,
groups=race,
data=dta,
alpha=.8,
type=c('g','p', 'r'),
jitter.x=TRUE,
xlab="Age (years)",
ylim=seq(0,100,20),
auto.key=list(space="top",
columns=2))

Plot a boxplot of reading scores across grade and gender by race

Plot a scatter plot of reading scores across math scores and grade by gender
xyplot(read ~ math| factor(grade),
groups=sex,
data=dta,
alpha=.8,
type=c('g','p', 'r'),
jitter.x=TRUE,
xlab="Math (scores)",
ylim=seq(0,100,20),
auto.key=list(space="top",
columns=2))

Plot a histogram of math scores across race by gender
histogram(~ math | sex,
groups = race,
data=dta,
type='density',
layout=c(1, 2),
between=list(y=0.5),
panel=function(x,...) {
panel.histogram(x,...)
panel.mathdensity(dmath=dnorm,
lwd=1.2,
args=list(mean=mean(x, na.rm=T),
sd=sd(x, na.rm=T)), ...)
},
par.settings=standard.theme(color=FALSE))

Plot a histogram of reading scores across race by gender
histogram(~ read | sex,
groups = race,
data=dta,
type='density',
layout=c(1, 2),
between=list(y=0.5),
panel=function(x,...) {
panel.histogram(x,...)
panel.mathdensity(dmath=dnorm,
lwd=1.2,
args=list(mean=mean(x, na.rm=T),
sd=sd(x, na.rm=T)), ...)
},
par.settings=standard.theme(color=FALSE))

Homework 2 : French Auxiological Survey
Load data file
## Wt Ht Hb Hc Cc Arm Calf Pelvis age id
## 1 1456 1025 602 486 520 157 205 170 4 S1
## 2 1426 998 572 501 520 150 215 169 4 S2
## 3 1335 961 560 494 495 145 214 158 4 S3
## 4 1607 1006 595 497 560 178 218 172 4 S4
## 5 1684 1012 584 490 553 165 220 158 4 S5
## 6 1374 1012 580 492 525 158 202 167 4 S6
Rename the variables
dta <- dta[,c(10,9,1:8)]
names(dta) <- c("ID", "Age", "Weight", "Height", "HtB", "Head", "Chest", "Arm", "Calf",
"Pelvis")
str(dta)
## 'data.frame': 360 obs. of 10 variables:
## $ ID : Factor w/ 30 levels "S1","S10","S11",..: 1 12 23 25 26 27 28 29 30 2 ...
## $ Age : int 4 4 4 4 4 4 4 4 4 4 ...
## $ Weight: int 1456 1426 1335 1607 1684 1374 1570 1450 1214 1456 ...
## $ Height: int 1025 998 961 1006 1012 1012 1040 990 968 983 ...
## $ HtB : int 602 572 560 595 584 580 586 561 571 563 ...
## $ Head : int 486 501 494 497 490 492 511 488 481 485 ...
## $ Chest : int 520 520 495 560 553 525 540 520 476 532 ...
## $ Arm : int 157 150 145 178 165 158 153 159 145 158 ...
## $ Calf : int 205 215 214 218 220 202 220 210 198 219 ...
## $ Pelvis: int 170 169 158 172 158 167 180 158 150 154 ...
Plot a scatter plot between height and weight across age

Plot a scatter matrix of the other physical measurements by age
splom(~ dta[,c("HtB", "Head", "Chest", "Arm", "Calf", "Pelvis")] | factor(Age),
data=dta,
type=c('p','r'),
pch='.',
axis.text.cex=0.3)

Homework 3: Sales Data on Sevral Products
Load data file
## product category customer year month quarter market sales expense region
## 1 Shoes Shoes Acme 2001 1 1 1 300 240 1
## 2 Boots Shoes Acme 2001 1 1 1 2200 1540 1
## 3 Slippers Slippers Acme 2001 1 1 1 900 540 1
## 4 Shoes Shoes Acme 2001 2 1 1 100 80 1
## 5 Boots Shoes Acme 2001 2 1 1 1400 980 1
## 6 Slippers Slippers Acme 2001 2 1 1 0 0 1
## district return constantv quantity
## 1 1 0 1 30
## 2 1 0 1 275
## 3 1 0 1 180
## 4 1 0 1 10
## 5 1 0 1 175
## 6 1 0 1 0
Relabel
dta$region <- factor(dta$region, levels = c(1:4),labels = c("Nothern", "Southern", "Eastern", "Western"))
dta$district <- factor(dta$district, levels = c(1:5),labels = c("North East", "South East", "South West", "North West", "Central West"))
dta$quarter <- factor(dta$quarter, levels = c(1:4),labels = c("1st", "2nd", "3rd", "4th"))
dta$month <- factor(dta$month, levels = c(1:12),labels = c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"))
dta[dta$sales < 0,]$sales = 0
head(dta)
## product category customer year month quarter market sales expense region
## 1 Shoes Shoes Acme 2001 Jan 1st 1 300 240 Nothern
## 2 Boots Shoes Acme 2001 Jan 1st 1 2200 1540 Nothern
## 3 Slippers Slippers Acme 2001 Jan 1st 1 900 540 Nothern
## 4 Shoes Shoes Acme 2001 Feb 1st 1 100 80 Nothern
## 5 Boots Shoes Acme 2001 Feb 1st 1 1400 980 Nothern
## 6 Slippers Slippers Acme 2001 Feb 1st 1 0 0 Nothern
## district return constantv quantity
## 1 North East 0 1 30
## 2 North East 0 1 275
## 3 North East 0 1 180
## 4 North East 0 1 10
## 5 North East 0 1 175
## 6 North East 0 1 0
Show the summary list of the data
## product category customer year month quarter
## Boots :24 Shoes :48 Acme :60 Min. :2001 Jan : 6 1st:18
## Shoes :24 Slippers:24 BigX : 6 1st Qu.:2001 Feb : 6 2nd:18
## Slippers:24 TwoFeet: 6 Median :2002 Mar : 6 3rd:18
## Mean :2002 Apr : 6 4th:18
## 3rd Qu.:2002 May : 6
## Max. :2002 Jun : 6
## (Other):36
## market sales expense region district
## Min. :1.000 Min. : 0 Min. :-980 Nothern :60 North East :60
## 1st Qu.:1.000 1st Qu.:1000 1st Qu.: 660 Southern: 6 South East : 0
## Median :2.000 Median :1550 Median :1065 Eastern : 0 South West : 6
## Mean :1.667 Mean :1736 Mean :1172 Western : 6 North West : 0
## 3rd Qu.:2.000 3rd Qu.:2525 3rd Qu.:1860 Central West: 6
## Max. :2.000 Max. :4700 Max. :2960
##
## return constantv quantity
## Min. :0.0000 Min. :1 Min. : 0.0
## 1st Qu.:0.0000 1st Qu.:1 1st Qu.:135.2
## Median :0.0000 Median :1 Median :220.0
## Mean :0.6667 Mean :1 Mean :248.9
## 3rd Qu.:0.0000 3rd Qu.:1 3rd Qu.:287.8
## Max. :5.0000 Max. :1 Max. :940.0
##
Plot a scatter plot of the product sales across month by year

Plot a histogram of the sales across district

Plot a boxplot of the products sales across quarter

Plot a scatter plot of the products sales across quarter

Plot a scatter plot between quantity and sales across region by quarter

Homework 4:
Load data file
## Sex Age Trial1 Trial2 Trial3 Trial4 Trial5
## 1 M 31 0.051 0.023 0.106 0.076 0.013
## 2 M 30 0.074 0.006 0.003 0.020 0.022
## 3 M 30 0.051 0.094 0.084 0.176 0.103
## 4 M 27 0.182 0.166 -0.073 -0.044 0.029
## 5 M 30 0.077 0.001 0.000 -0.027 -0.200
## 6 M 28 0.103 0.065 0.063 0.059 0.059
Show a summary list of the data
## Sex Age Trial1 Trial2
## F:43 Min. : 2.00 Min. :-0.41300 Min. :-0.78200
## M:70 1st Qu.: 9.00 1st Qu.:-0.04700 1st Qu.:-0.04600
## Median :13.00 Median : 0.06500 Median : 0.03900
## Mean :20.58 Mean : 0.06535 Mean : 0.04869
## 3rd Qu.:35.00 3rd Qu.: 0.15100 3rd Qu.: 0.12400
## Max. :52.00 Max. : 0.70900 Max. : 2.00400
## Trial3 Trial4 Trial5
## Min. :-0.49900 Min. :-0.52900 Min. :-0.36000
## 1st Qu.:-0.02500 1st Qu.:-0.02700 1st Qu.:-0.02500
## Median : 0.05200 Median : 0.03400 Median : 0.04400
## Mean : 0.05727 Mean : 0.04911 Mean : 0.04714
## 3rd Qu.: 0.11600 3rd Qu.: 0.12400 3rd Qu.: 0.11100
## Max. : 0.89600 Max. : 1.03700 Max. : 1.14900
Plot the Scatter plot to observe the repeated measures effect

Plot the Scatter plot to observe the age and sex effect

Plot the boxplot plot to observe the sex effect

Plot the Scatter plot to observe the age effect

Divide the participants to two group by age
Plot the boxplot to observe the age effect

Because I did not observe any effect of age of sex in my visual inspection, I conducted 2 way ANOVA for RT.
## Warning in aov(RT ~ (Sex * Age_group) + Error(ID/(Sex * Age_group)), data =
## dtaL): Error() model is singular
##
## Error: ID
## Df Sum Sq Mean Sq F value Pr(>F)
## Sex 1 0.077 0.0769 2.107 0.147187
## Age_group 1 0.011 0.0114 0.312 0.576956
## Sex:Age_group 1 0.433 0.4327 11.860 0.000617 ***
## Residuals 561 20.468 0.0365
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The results revealed that no main effects of Sex and Age. However, The Sex and Age interaction reach the significance level.