#
# a case study - II
#

##load data
dta2 <- read.table("NCEA2007.txt", sep=":", quote="", h=T, as.is=T)

##dimension of dta2
dim(dta2)
## [1] 88  4
## the abstract of dta2
str(dta2)
## 'data.frame':    88 obs. of  4 variables:
##  $ Name  : chr  "Al-Madinah School" "Alfriston College" "Ambury Park Centre for Riding Therapy" "Aorere College" ...
##  $ Level1: num  61.5 53.9 33.3 39.5 71.2 22.1 50.8 57.3 89.3 59.8 ...
##  $ Level2: num  75 44.1 20 50.2 78.9 30.8 34.8 49.8 89.7 65.7 ...
##  $ Level3: num  0 0 0 30.6 55.5 26.3 48.9 44.6 88.6 50.4 ...
##display top six of dta2
head(dta2)
##                                    Name Level1 Level2 Level3
## 1                     Al-Madinah School   61.5   75.0    0.0
## 2                     Alfriston College   53.9   44.1    0.0
## 3 Ambury Park Centre for Riding Therapy   33.3   20.0    0.0
## 4                        Aorere College   39.5   50.2   30.6
## 5        Auckland Girls' Grammar School   71.2   78.9   55.5
## 6                      Auckland Grammar   22.1   30.8   26.3
## compute the mean of level
apply(dta2[, -1], MARGIN=2, FUN=mean)
##   Level1   Level2   Level3 
## 62.26705 61.06818 47.97614
##Another alternative code to perform the same calculation where appropriate!:D
Level1<-mean(dta2$Level1)
Level2<-mean(dta2$Level2)
Level3<-mean(dta2$Level3)
cbind.data.frame(Level1,Level2,Level3)
##     Level1   Level2   Level3
## 1 62.26705 61.06818 47.97614
## list apply
lapply(dta2[, -1], FUN=mean)
## $Level1
## [1] 62.26705
## 
## $Level2
## [1] 61.06818
## 
## $Level3
## [1] 47.97614
####Another alternative code to perform the same calculation where appropriate!:D
Level1<-mean(dta2$Level1)
Level2<-mean(dta2$Level2)
Level3<-mean(dta2$Level3)
a<-rbind(Level1,Level2,Level3)
colnames(a) <- c("levelmean")
a
##        levelmean
## Level1  62.26705
## Level2  61.06818
## Level3  47.97614
## simplify the list apply
sapply(dta2[, -1], FUN=mean)
##   Level1   Level2   Level3 
## 62.26705 61.06818 47.97614
##compute the level range by apply
apply(dta2[, -1], MARGIN=2, FUN=range)
##      Level1 Level2 Level3
## [1,]    2.8    0.0    0.0
## [2,]   97.4   95.7   95.7
##compute the level range by lapply
lapply(dta2[, -1], FUN=range)
## $Level1
## [1]  2.8 97.4
## 
## $Level2
## [1]  0.0 95.7
## 
## $Level3
## [1]  0.0 95.7
##compute the level range by sapply
sapply(dta2[, -1], FUN=range)
##      Level1 Level2 Level3
## [1,]    2.8    0.0    0.0
## [2,]   97.4   95.7   95.7
##不知道為什麼level1最小值不是2.8而卻顯示10.8...但實際列出B來確有看到
b<-stack(dta2)
# 在b的132筆有最小值2.8的存在
aggregate(values ~ ind, data=b, FUN=range)[-1,]
##      ind values.1 values.2
## 2 Level1     10.8     97.4
## 3 Level2        0     95.7
## 4 Level3        0     95.7
aggregate(b["values"], by=list(b$ind), FUN=range)[-1,]
##   Group.1 values.1 values.2
## 2  Level1     10.8     97.4
## 3  Level2        0     95.7
## 4  Level3        0     95.7
## splitting

##split dta by Auth
dta <- read.csv("nzSchools.csv", as.is=2)

rollsByAuth <- split(dta$Roll, dta$Auth)

##the abstract of rollsByAuth
str(rollsByAuth)
## List of 4
##  $ Other           : int 51
##  $ Private         : int [1:99] 255 39 154 73 83 25 95 85 94 729 ...
##  $ State           : int [1:2144] 318 200 455 86 577 329 637 395 201 267 ...
##  $ State Integrated: int [1:327] 438 26 191 560 151 114 126 171 211 57 ...
##what type of the rollsByAuth?
class(rollsByAuth)
## [1] "list"
##compute the mean by Auth
lapply(split(dta$Roll, dta$Auth), mean)
## $Other
## [1] 51
## 
## $Private
## [1] 308.798
## 
## $State
## [1] 300.6301
## 
## $`State Integrated`
## [1] 258.3792
##compute the mean by Auth
sapply(split(dta$Roll, dta$Auth), mean)
##            Other          Private            State State Integrated 
##          51.0000         308.7980         300.6301         258.3792
##依照上面相同的語法將變數替換
aggregate(dta["Roll"], by=list(dta$Auth), FUN=mean)
##            Group.1     Roll
## 1            Other  51.0000
## 2          Private 308.7980
## 3            State 300.6301
## 4 State Integrated 258.3792
###