2020-04-27-Homework-1

(a) test if any pairs of the five variables: read, write, math, science, and socst, are different in means.

讀取資料、整理

library(lattice)
dta <- read.table("hs0.txt",sep='\t', head=T)
dta.1 <- stack(dta[,c(7:11)])
str(dta.1)

## 'data.frame':    1000 obs. of  2 variables:
##  $ values: int  57 68 44 63 47 44 50 34 63 57 ...
##  $ ind   : Factor w/ 5 levels "read","write",..: 1 1 1 1 1 1 1 1 1 1 ...

read <- dta.1[c(1:200),]
write <- dta.1[c(201:400),]
math <- dta.1[c(401:600),]
science <- dta.1[c(601:800),]
socst <- dta.1[c(801:1000),]

分組、整合

r_w <- rbind(read, write)
r_w$group <- "r_w"
r_m <- rbind(read, math)
r_m$group <- "r_m"
r_sc <- rbind(read, science)
r_sc$group <- "r_sc"
r_so <- rbind(read, socst)
r_so$group <- "r_so"
w_m <- rbind(write, math)
w_m$group <- "w_m"
w_sc <- rbind(write, science)
w_sc$group <- "w_sc"
w_so <- rbind(write, socst)
w_so$group <- "w_so"
m_sc <- rbind(math, science)
m_sc$group <- "m_sc"
m_so <- rbind(math, socst)
m_so$group <- "m_so"
sc_so <- rbind(science, socst)
sc_so$group <- "sc_so"
dat <- rbind(rbind(rbind(rbind(rbind(rbind(rbind(rbind(rbind(r_w, r_m), r_sc),
                                                 r_so), w_m), w_sc), w_so),
                         m_sc), m_so), sc_so)

先了解各個科目的平均數、盒狀圖

aggregate(values ~ ind, data=dat, FUN=mean)

##       ind   values
## 1    read 52.23000
## 2   write 52.77500
## 3    math 52.64500
## 4 science 51.91795
## 5   socst 52.40500

bwplot(values ~ ind, data=dat, xlab="subject")

t-test

sapply(split(dat, dat$group), function(x) t.test(x$values ~ x$ind))

##             m_sc                      m_so                     
## statistic   0.7535309                 0.2382053                
## parameter   391.0861                  390.8348                 
## p.value     0.4515844                 0.8118467                
## conf.int    Numeric,2                 Numeric,2                
## estimate    Numeric,2                 Numeric,2                
## null.value  0                         0                        
## stderr      0.9648593                 1.007534                 
## alternative "two.sided"               "two.sided"              
## method      "Welch Two Sample t-test" "Welch Two Sample t-test"
## data.name   "x$values by x$ind"       "x$values by x$ind"      
##             r_m                       r_sc                     
## statistic   -0.4225787                0.3093215                
## parameter   394.804                   392.8398                 
## p.value     0.6728327                 0.757241                 
## conf.int    Numeric,2                 Numeric,2                
## estimate    Numeric,2                 Numeric,2                
## null.value  0                         0                        
## stderr      0.9820655                 1.008825                 
## alternative "two.sided"               "two.sided"              
## method      "Welch Two Sample t-test" "Welch Two Sample t-test"
## data.name   "x$values by x$ind"       "x$values by x$ind"      
##             r_so                      r_w                      
## statistic   -0.166712                 -0.5519906               
## parameter   397.1601                  395.5706                 
## p.value     0.8676815                 0.5812665                
## conf.int    Numeric,2                 Numeric,2                
## estimate    Numeric,2                 Numeric,2                
## null.value  0                         0                        
## stderr      1.049714                  0.9873356                
## alternative "two.sided"               "two.sided"              
## method      "Welch Two Sample t-test" "Welch Two Sample t-test"
## data.name   "x$values by x$ind"       "x$values by x$ind"      
##             sc_so                     w_m                      
## statistic   -0.4712025                0.1379504                
## parameter   391.2921                  397.9456                 
## p.value     0.6377587                 0.8903494                
## conf.int    Numeric,2                 Numeric,2                
## estimate    Numeric,2                 Numeric,2                
## null.value  0                         0                        
## stderr      1.033635                  0.9423678                
## alternative "two.sided"               "two.sided"              
## method      "Welch Two Sample t-test" "Welch Two Sample t-test"
## data.name   "x$values by x$ind"       "x$values by x$ind"      
##             w_sc                      w_so                     
## statistic   0.8833551                 0.3653701                
## parameter   391.6689                  391.9818                 
## p.value     0.3775863                 0.7150322                
## conf.int    Numeric,2                 Numeric,2                
## estimate    Numeric,2                 Numeric,2                
## null.value  0                         0                        
## stderr      0.9702228                 1.012672                 
## alternative "two.sided"               "two.sided"              
## method      "Welch Two Sample t-test" "Welch Two Sample t-test"
## data.name   "x$values by x$ind"       "x$values by x$ind"

Ans：No any pair of the five variables are different in means.

(b) test if the 4 different ethnic groups have the same mean scores for each of the 5 variables (individually): read, write, math, science, and socst.

先利用盒狀圖看每個族群的成績

dat$race <- dta$race
dat$ind <- as.factor(dat$ind)
bwplot(values ~ race | ind, data=dat, xlab="race", layout=c(2, 3))

切割後使用anova

lapply(split(dat, dat$ind), function(x) anova(lm(x$values~factor(x$race))))

## $read
## Analysis of Variance Table
## 
## Response: x$values
##                 Df Sum Sq Mean Sq F value    Pr(>F)    
## factor(x$race)   3   6999 2333.08   24.22 5.262e-15 ***
## Residuals      796  76678   96.33                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $write
## Analysis of Variance Table
## 
## Response: x$values
##                 Df Sum Sq Mean Sq F value    Pr(>F)    
## factor(x$race)   3   7657 2552.21  31.813 < 2.2e-16 ***
## Residuals      796  63859   80.22                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $math
## Analysis of Variance Table
## 
## Response: x$values
##                 Df Sum Sq Mean Sq F value    Pr(>F)    
## factor(x$race)   3   7369 2456.19  31.285 < 2.2e-16 ***
## Residuals      796  62495   78.51                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $science
## Analysis of Variance Table
## 
## Response: x$values
##                 Df Sum Sq Mean Sq F value    Pr(>F)    
## factor(x$race)   3  12678  4226.0  53.074 < 2.2e-16 ***
## Residuals      776  61789    79.6                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $socst
## Analysis of Variance Table
## 
## Response: x$values
##                 Df Sum Sq Mean Sq F value    Pr(>F)    
## factor(x$race)   3   3776 1258.51  11.388 2.562e-07 ***
## Residuals      796  87969  110.51                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Ans：Different ethnic groups have the different mean scores on every subject.

(c) Perform all pairwise simple regressions for these variables: read, write, math, science, and socst.

read與write, math, science, socst的回歸

head(dta)

##    id female  race    ses schtyp     prog read write math science socst
## 1  70   male white    low public  general   57    52   41      47    57
## 2 121 female white middle public vocation   68    59   53      63    61
## 3  86   male white   high public  general   44    33   54      58    31
## 4 141   male white   high public vocation   63    44   47      53    56
## 5 172   male white middle public academic   47    52   57      53    61
## 6 113   male white middle public academic   44    52   51      63    61

read <- dta[,"read"]
read.1 <- lapply(dta[, 8:11], function(x) lm(read ~ x))
lapply(read.1[1:4], broom::tidy)

## $write
## # A tibble: 2 x 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)   18.2      3.31        5.49 1.21e- 7
## 2 x              0.646    0.0617     10.5  1.11e-20
## 
## $math
## # A tibble: 2 x 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)   14.1      3.12        4.52 1.08e- 5
## 2 x              0.725    0.0583     12.4  1.28e-26
## 
## $science
## # A tibble: 2 x 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)   18.2      3.10        5.87 1.91e- 8
## 2 x              0.654    0.0586     11.2  1.22e-22
## 
## $socst
## # A tibble: 2 x 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)   21.1      2.84        7.43 3.22e-12
## 2 x              0.594    0.0532     11.2  9.29e-23

write與math, science, socst的回歸

write <- dta[,"write"]
write.1 <- lapply(dta[, 9:11], function(x) lm(write ~ x))
lapply(read.1[1:3], broom::tidy)

## $write
## # A tibble: 2 x 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)   18.2      3.31        5.49 1.21e- 7
## 2 x              0.646    0.0617     10.5  1.11e-20
## 
## $math
## # A tibble: 2 x 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)   14.1      3.12        4.52 1.08e- 5
## 2 x              0.725    0.0583     12.4  1.28e-26
## 
## $science
## # A tibble: 2 x 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)   18.2      3.10        5.87 1.91e- 8
## 2 x              0.654    0.0586     11.2  1.22e-22

math與science, socst的回歸

math <- dta[,"math"]
math.1 <- lapply(dta[, 9:11], function(x) lm(math ~ x))
lapply(read.1[1:2], broom::tidy)

## $write
## # A tibble: 2 x 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)   18.2      3.31        5.49 1.21e- 7
## 2 x              0.646    0.0617     10.5  1.11e-20
## 
## $math
## # A tibble: 2 x 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)   14.1      3.12        4.52 1.08e- 5
## 2 x              0.725    0.0583     12.4  1.28e-26

science與socst的回歸

science <- dta[,"math"]
lm(science ~ socst, dta)

## 
## Call:
## lm(formula = science ~ socst, data = dta)
## 
## Coefficients:
## (Intercept)        socst  
##     30.1197       0.4167

2020-04-27-Homework-1

Chang, C. Y.

2020-05-05