high schools

library(hrbrthemes)
library(tidyverse)
update_geom_font_defaults(font_rc)

#讀取資料
#只用race為asian的資料
#設定math與socst的相關係數
dta <- read.table("hs0.txt", header = T)
dta.asian <- subset(dta, race=="asian")
r0 <- cor(dta.asian$math, dta.asian$socst)

#cnt項目次數為0
#nIter重複1001次
#將read資料隨機取樣後，與math求相關係數
#最後算出比例

cnt <- 0
nIter <- 1001
for (i in 1:nIter) {
    new <- sample(dta.asian$read)
    r <- cor(new, dta.asian$math)
    if ( r0 <= r ) cnt <- cnt+1
}
cnt/nIter

## [1] 0.02997003

#結果太長，省略出現結果
#newread <- replicate(nIter, sample(dta.asian$read))
#newread <- data.frame(unlist(newread))
#with(newread, lapply(names(newread), function(x)
#cor(dta.asian$math, eval(substitute(tmp, list(tmp=as.name(x))))))
#)

#算Pearson's的"asian", "math"和"asian", "socst"相關
cor.test(dta[dta$race=="asian", "math"], dta[dta$race=="asian", "socst"])

## 
##  Pearson's product-moment correlation
## 
## data:  dta[dta$race == "asian", "math"] and dta[dta$race == "asian", "socst"]
## t = 1.9887, df = 9, p-value = 0.07796
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.07083501  0.86552255
## sample estimates:
##       cor 
## 0.5525177

##參考網路以repeat和if (i>=nIter) break試做，會有一樣的結果
dta <- read.table("hs0.txt", header = T)
dta.asian <- subset(dta, race=="asian")
r0 <- cor(dta.asian$math, dta.asian$socst)
cnt <- 0
nIter <- 1001
i <- 1

repeat{
  new <- sample(dta.asian$read)
  r <- cor(new, dta.asian$math)
  i=i+1
  if ( r0 <= r ) cnt <- cnt+1
  if (i>=nIter) break}
cnt/nIter

## [1] 0.03696304

#newread <- replicate(nIter, sample(dta.asian$read))
#newread <- data.frame(unlist(newread))
#with(newread, lapply(names(newread), function(x)
#cor(dta.asian$math, eval(substitute(tmp, list(tmp=as.name(x))))))
#)
cor.test(dta[dta$race=="asian", "math"], dta[dta$race=="asian", "socst"])

## 
##  Pearson's product-moment correlation
## 
## data:  dta[dta$race == "asian", "math"] and dta[dta$race == "asian", "socst"]
## t = 1.9887, df = 9, p-value = 0.07796
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.07083501  0.86552255
## sample estimates:
##       cor 
## 0.5525177