#
# An R script for IQ_Beh data set
#

#
dta <- read.table("IQ_Beh.txt", header = T, row.names = 1)

#
str(dta)
## 'data.frame':    94 obs. of  3 variables:
##  $ Dep: Factor w/ 2 levels "D","N": 2 2 2 2 1 2 2 2 2 2 ...
##  $ IQ : int  103 124 124 104 96 92 124 99 92 116 ...
##  $ BP : int  4 12 9 3 3 3 6 4 3 9 ...
#
head(dta)
##   Dep  IQ BP
## 1   N 103  4
## 2   N 124 12
## 3   N 124  9
## 4   N 104  3
## 5   D  96  3
## 6   N  92  3
#
class(dta)
## [1] "data.frame"
#
dim(dta)
## [1] 94  3
#
names(dta)
## [1] "Dep" "IQ"  "BP"
#
is.vector(dta$BP)
## [1] TRUE
#
dta[1, ]
##   Dep  IQ BP
## 1   N 103  4
#
dta[1:3, "IQ"]
## [1] 103 124 124
#
tail(dta[order(dta$BP), ])
##    Dep  IQ BP
## 16   N  89 11
## 58   N 117 11
## 66   N 126 11
## 2    N 124 12
## 73   D  99 13
## 12   D  22 17
#
tail(dta[order(-dta$BP), ], 4)
##    Dep  IQ BP
## 77   N 124  1
## 80   N 121  1
## 24   N 106  0
## 75   N 122  0
#
with(dta, hist(IQ, xlab = "IQ", main = ""))

#
boxplot(BP ~ Dep, data = dta, 
        xlab = "Depression", 
        ylab = "Behavior problem score")

#
plot(IQ ~ BP, data = dta, pch = 20, col = dta$Dep, 
     xlab = "Behavior problem score", ylab = "IQ")
grid()

#
plot(BP ~ IQ, data = dta, type = "n",
     ylab = "Behavior problem score", xlab = "IQ")
text(dta$IQ, dta$BP, labels = dta$Dep, cex = 0.5)
abline(lm(BP ~ IQ, data = dta, subset = Dep == "D"))
abline(lm(BP ~ IQ, data = dta, subset = Dep == "N"), lty = 2)

## end

t.test(IQ~Dep, data=dta)
## 
##  Welch Two Sample t-test
## 
## data:  IQ by Dep
## t = -1.6374, df = 15.53, p-value = 0.1216
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -26.926586   3.490299
## sample estimates:
## mean in group D mean in group N 
##        101.0667        112.7848
##p值>.05未達顯著差異(沮喪和IQ之間沒有差異)

t.test(BP~Dep, data=dta)
## 
##  Welch Two Sample t-test
## 
## data:  BP by Dep
## t = 1.4924, df = 17.14, p-value = 0.1538
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.6637017  3.8788916
## sample estimates:
## mean in group D mean in group N 
##        7.000000        5.392405
##p值>.05未達顯著差異(沮喪和行為之間沒有差異)

cor.test(dta$IQ, dta$BP)
## 
##  Pearson's product-moment correlation
## 
## data:  dta$IQ and dta$BP
## t = -3.8088, df = 92, p-value = 0.0002518
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.5319037 -0.1798969
## sample estimates:
##        cor 
## -0.3690615
##p值<.05達顯著差異(IQ和行為具有相關)


dta <- read.table("C:/tmp/usBirths2015.txt",header=T,stringsAsFactor=FALSE)
str(dta)
## 'data.frame':    12 obs. of  2 variables:
##  $ birth: int  325955 298058 328923 320832 327917 330541 353415 351791 347516 339007 ...
##  $ month: chr  "January" "February" "March" "April" ...
dta$month <- c("spring", "spring", "spring", "summer", "summer", "summer", "autumn", "autumn", "autumn", "winter", "winter", "winter")
dta
##     birth  month
## 1  325955 spring
## 2  298058 spring
## 3  328923 spring
## 4  320832 summer
## 5  327917 summer
## 6  330541 summer
## 7  353415 autumn
## 8  351791 autumn
## 9  347516 autumn
## 10 339007 winter
## 11 318820 winter
## 12 335722 winter
aggregate(birth ~ month, mean, data=dta)
##    month    birth
## 1 autumn 350907.3
## 2 spring 317645.3
## 3 summer 326430.0
## 4 winter 331183.0
dta <- read.table("C:/tmp/readingtimes.txt",header=T,stringsAsFactor=FALSE)
dta
##   Snt Sp Wrds New   S01   S02   S03   S04   S05    S06    S07   S08   S09   S10
## 1   1  1   13   1 3.429 2.795 4.161 3.071 3.625  3.161  3.232 7.161 1.536 4.063
## 2   2  2   16   3 6.482 5.411 4.491 5.063 9.295  5.643  8.357 4.313 2.946 6.652
## 3   3  3    9   2 1.714 2.339 3.018 2.464 6.045  2.455  4.920 3.366 1.375 2.179
## 4   4  4    9   2 3.679 3.714 2.866 2.732 4.205  6.241  3.723 6.330 1.152 3.661
## 5   5  5   10   3 4.000 2.902 2.991 2.670 3.884  3.223  3.143 6.143 2.759 3.330
## 6   6  6   18   4 6.973 8.018 6.625 7.571 8.795 13.188 11.170 6.071 7.964 7.866
## 7   7  7    6   1 2.634 1.750 2.268 2.884 3.491  3.688  2.054 1.696 1.455 3.705
dta01 <- mean(dta$S01)
dta01
## [1] 4.130143
dta02 <- mean(dta$S02)
dta02
## [1] 3.847
dta03 <- mean(dta$S03)
dta03
## [1] 3.774286
dta04 <- mean(dta$S04)
dta04
## [1] 3.779286
dta05 <- mean(dta$S05)
dta05
## [1] 5.62
dta06 <- mean(dta$S06)
dta06
## [1] 5.371286
dta07 <- mean(dta$S07)
dta07
## [1] 5.228429
dta08 <- mean(dta$S08)
dta08
## [1] 5.011429
dta09 <- mean(dta$S09)
dta09
## [1] 2.741
dta10 <- mean(dta$S10)
dta10
## [1] 4.493714
dtr <- apply(dta[,5:14], 2, mean)
dtr
##      S01      S02      S03      S04      S05      S06      S07      S08 
## 4.130143 3.847000 3.774286 3.779286 5.620000 5.371286 5.228429 5.011429 
##      S09      S10 
## 2.741000 4.493714
order(dtr)
##  [1]  9  3  4  2  1 10  8  7  6  5