dta <- read.table("/Users/User/Desktop/DM_R/hk0316/exercise/ex3/IQ_Beh.txt", header = T)

#資料摘要

str(dta)
## 'data.frame':    94 obs. of  4 variables:
##  $ Mom: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Dep: Factor w/ 2 levels "D","N": 2 2 2 2 1 2 2 2 2 2 ...
##  $ IQ : int  103 124 124 104 96 92 124 99 92 116 ...
##  $ BP : int  4 12 9 3 3 3 6 4 3 9 ...

#資料前六筆

head(dta)
##   Mom Dep  IQ BP
## 1   1   N 103  4
## 2   2   N 124 12
## 3   3   N 124  9
## 4   4   N 104  3
## 5   5   D  96  3
## 6   6   N  92  3

#查看data類型

class(dta)
## [1] "data.frame"

資料變項與筆數

dim(dta)
## [1] 94  4

#94筆(列) 4個變項(欄)

變項名稱為何

names(dta)
## [1] "Mom" "Dep" "IQ"  "BP"

dta的BP 是vector陣列嗎?

is.vector(dta$BP)
## [1] TRUE

data第一列

dta[1, ]
##   Mom Dep  IQ BP
## 1   1   N 103  4

針對 data 第1到3列,且“IQ”變項

dta[1:3, "IQ"]
## [1] 103 124 124

查看倒數6列data, 根據BP變項排序來呈現

tail(dta[order(dta$BP), ])
##    Mom Dep  IQ BP
## 16  16   N  89 11
## 58  58   N 117 11
## 66  66   N 126 11
## 2    2   N 124 12
## 73  73   D  99 13
## 12  12   D  22 17

查看倒數4列data, 根據BP變項排序(大到小)來呈現

tail(dta[order(-dta$BP), ], 4)
##    Mom Dep  IQ BP
## 77  77   N 124  1
## 80  80   N 121  1
## 24  24   N 106  0
## 75  75   N 122  0

畫直方圖,變項指定IQ,圖標題空白

with(dta, hist(IQ, xlab = "IQ", main = ""))

盒狀圖,數值BP,分組Dep

boxplot(BP ~ Dep, data = dta, 
        xlab = "Depression", 
        ylab = "Behavior problem score")

散布圖 Y值數值IQ X值數值BP,pch哪一種點呈現,col顏色區別分組Dep

plot(IQ ~ BP, data = dta, pch = 20, col = dta$Dep, 
     xlab = "Behavior problem score", ylab = "IQ")
grid()   #呈現虛格線

回歸線

#plot畫圖,text調整點呈現(預設用數字呈現點),type 圖的呈現狀態,labels數字呈現轉成代號呈現,cex 點的大小,lty虛線狀態

plot(BP ~ IQ, data = dta, type = "n",
     ylab = "Behavior problem score", xlab = "IQ")
text(dta$IQ, dta$BP, labels = dta$Dep, cex = 0.5)  
abline(lm(BP ~ IQ, data = dta, subset = Dep == "D"))
abline(lm(BP ~ IQ, data = dta, subset = Dep == "N"), lty = 2)

# line charts

補充說明

https://www.statmethods.net/graphs/line.html

A caption

A caption

end

(1) Did the two groups of children have different IQ and/or behavioral problems?

兩組小孩的Behavioral Problems是否有差異 (t-test)

t.test(BP ~ Dep, data = dta)
## 
##  Welch Two Sample t-test
## 
## data:  BP by Dep
## t = 1.4924, df = 17.14, p-value = 0.1538
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.6637017  3.8788916
## sample estimates:
## mean in group D mean in group N 
##        7.000000        5.392405

p=0.1538 <0.05

兩組小孩的IQ是否有差異 (t-test)

t.test(IQ ~ Dep, data = dta)
## 
##  Welch Two Sample t-test
## 
## data:  IQ by Dep
## t = -1.6374, df = 15.53, p-value = 0.1216
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -26.926586   3.490299
## sample estimates:
## mean in group D mean in group N 
##        101.0667        112.7848

p=0.1216 <0.05 故兩組小孩在BP和IQ上沒有差異

(2) Was there any evidence of a relationship between IQ and behavioral problems?

cor.test(dta$BP,dta$IQ)
## 
##  Pearson's product-moment correlation
## 
## data:  dta$BP and dta$IQ
## t = -3.8088, df = 92, p-value = 0.0002518
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.5319037 -0.1798969
## sample estimates:
##        cor 
## -0.3690615

故IQ和BP 中等程度負相關-0.369(P<.001)