讀取資料

pacman::p_load(readxl, httr)
dta <- read_excel("C:/Users/PARENTCARECHILDREN/Desktop/2020-04-13-Homework-1-Trellis/nlsy86long.xlsx")
str(dta)
## Classes 'tbl_df', 'tbl' and 'data.frame':    664 obs. of  9 variables:
##  $ id   : num  2390 2560 3740 4020 6350 7030 7200 7610 7680 7700 ...
##  $ sex  : chr  "Female" "Female" "Female" "Male" ...
##  $ race : chr  "Majority" "Majority" "Majority" "Majority" ...
##  $ time : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ grade: num  0 0 0 0 1 0 0 0 0 0 ...
##  $ year : num  6 6 6 5 7 5 6 7 6 6 ...
##  $ month: num  67 66 67 60 78 62 66 79 76 67 ...
##  $ math : num  14.29 20.24 17.86 7.14 29.76 ...
##  $ read : num  19.05 21.43 21.43 7.14 30.95 ...

檢視年齡與閱讀、數學分數

library(lattice)
dta$year <- as.factor(dta$year)
bwplot(math ~ sex | year, data=dta, xlab="Year")

bwplot(read ~ sex | year, data=dta, xlab="Year")

xyplot(math + read ~ year, groups=sex,auto.key=list(column=2), data=dta, xlab="Height", ylab="Weight", type=c('p', 'g', 'r'), layout=c(1, 2))

隨著年齡越大,閱讀、數學分數有越來越好的趨勢

了解數學分數的密度圖

histogram(~ math | year, data=dta, type='density', layout=c(3, 3))

histogram(~ math | sex, data=dta, type='density', layout=c(2, 1))

densityplot(~ math, groups=sex, data=dta, auto.key=TRUE)

了解閱讀分數的密度圖

histogram(~ read | year, data=dta, type='density', layout=c(3, 3))

histogram(~ read | sex, data=dta, type='density', layout=c(2, 1))

densityplot(~ read, groups=sex, data=dta, auto.key=TRUE)

t.test(math~sex, data=dta)
## 
##  Welch Two Sample t-test
## 
## data:  math by sex
## t = -0.91586, df = 639.41, p-value = 0.3601
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.157960  1.513013
## sample estimates:
## mean in group Female   mean in group Male 
##             40.26419             41.58666
t.test(read~sex, data=dta)
## 
##  Welch Two Sample t-test
## 
## data:  read by sex
## t = 0.43345, df = 638.18, p-value = 0.6648
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.491909  3.903593
## sample estimates:
## mean in group Female   mean in group Male 
##             46.18395             45.47811

總觀來看,t檢定顯示,男生、女生的數學、閱讀分數沒有差異,因此分數主要受到成熟(年齡)的因素影響

檢視種族之間是否有差異

histogram(~ math | race, data=dta, type='density', layout=c(2, 1))

densityplot(~ math, groups=race, data=dta, auto.key=TRUE)

histogram(~ read | race, data=dta, type='density', layout=c(2, 1))

densityplot(~ read, groups=race, data=dta, auto.key=TRUE)

bwplot(math ~ year | race, data=dta, xlab="Year")

t檢定顯示,多數民族要比少數民族的分數高

aggregate(math ~ race, data=dta, FUN=mean)
##       race     math
## 1 Majority 43.51852
## 2 Minority 39.07991
t.test(math~race, data=dta)
## 
##  Welch Two Sample t-test
## 
## data:  math by race
## t = 3.0479, df = 601.28, p-value = 0.002406
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  1.578566 7.298643
## sample estimates:
## mean in group Majority mean in group Minority 
##               43.51852               39.07991
aggregate(read ~ race, data=dta, FUN=mean)
##       race     read
## 1 Majority 47.68932
## 2 Minority 44.33257
t.test(read~race, data=dta)
## 
##  Welch Two Sample t-test
## 
## data:  read by race
## t = 2.0284, df = 588.55, p-value = 0.04297
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1065048 6.6069858
## sample estimates:
## mean in group Majority mean in group Minority 
##               47.68932               44.33257
Year <- split(dta, dta$year)
lapply(Year, function(x) t.test(x$math~x$race))
## $`5`
## 
##  Welch Two Sample t-test
## 
## data:  x$math by x$race
## t = 1.1239, df = 45.94, p-value = 0.2669
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.9204092  3.2475314
## sample estimates:
## mean in group Majority mean in group Minority 
##               14.00000               12.83644 
## 
## 
## $`6`
## 
##  Welch Two Sample t-test
## 
## data:  x$math by x$race
## t = 2.3463, df = 69.786, p-value = 0.02181
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.4665941 5.7586043
## sample estimates:
## mean in group Majority mean in group Minority 
##               19.44444               16.33185 
## 
## 
## $`7`
## 
##  Welch Two Sample t-test
## 
## data:  x$math by x$race
## t = 1.9459, df = 17.494, p-value = 0.06792
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.4916112 12.4925732
## sample estimates:
## mean in group Majority mean in group Minority 
##               25.97403               19.97354 
## 
## 
## $`8`
## 
##  Welch Two Sample t-test
## 
## data:  x$math by x$race
## t = 3.292, df = 102.86, p-value = 0.001364
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   2.533357 10.211741
## sample estimates:
## mean in group Majority mean in group Minority 
##               38.91223               32.53968 
## 
## 
## $`9`
## 
##  Welch Two Sample t-test
## 
## data:  x$math by x$race
## t = 2.6366, df = 39.972, p-value = 0.01187
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   1.754694 13.278288
## sample estimates:
## mean in group Majority mean in group Minority 
##               46.31519               38.79870 
## 
## 
## $`10`
## 
##  Welch Two Sample t-test
## 
## data:  x$math by x$race
## t = 3.2991, df = 87.762, p-value = 0.001403
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  2.272868 9.159974
## sample estimates:
## mean in group Majority mean in group Minority 
##               53.42566               47.70924 
## 
## 
## $`11`
## 
##  Welch Two Sample t-test
## 
## data:  x$math by x$race
## t = 0.5819, df = 52.949, p-value = 0.5631
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.004122  5.459480
## sample estimates:
## mean in group Majority mean in group Minority 
##               54.31548               53.08780 
## 
## 
## $`12`
## 
##  Welch Two Sample t-test
## 
## data:  x$math by x$race
## t = 3.0403, df = 109.95, p-value = 0.002955
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  1.801390 8.546595
## sample estimates:
## mean in group Majority mean in group Minority 
##               62.61447               57.44048 
## 
## 
## $`13`
## 
##  Welch Two Sample t-test
## 
## data:  x$math by x$race
## t = 1.6581, df = 21.052, p-value = 0.1121
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.357719 12.046125
## sample estimates:
## mean in group Majority mean in group Minority 
##               62.79762               57.45342

分數的差異,應是受到種族的影響較大