Function in-class exercise4.5
Function in-class exercise4
# a case study
## keep the school names with white spaces
dta <- read.csv("C:/Users/5A88/Desktop/nzSchools.csv", as.is=2)
## display the structure of the data
str(dta)## 'data.frame': 2571 obs. of 6 variables:
## $ ID : int 1015 1052 1062 1092 1130 1018 1029 1030 1588 1154 ...
## $ Name: chr "Hora Hora School" "Morningside School" "Onerahi School" "Raurimu Avenue School" ...
## $ City: Factor w/ 541 levels "Ahaura","Ahipara",..: 533 533 533 533 533 533 533 533 533 533 ...
## $ Auth: Factor w/ 4 levels "Other","Private",..: 3 3 3 3 3 3 3 3 4 3 ...
## $ Dec : int 2 3 4 2 4 8 5 5 6 1 ...
## $ Roll: int 318 200 455 86 577 329 637 395 438 201 ...
## [1] 2571 6
## binning
## if dta$Roll is bigger than median, label it Large, otherwise Small
dta$Size <- ifelse(dta$Roll > median(dta$Roll), "Large", "Small")
## save it as a null object
dta$Size <- NULL
## display the first data
head(dta)## cut dta$Roll into 3 groups and label them small, medium and large
dta$Size <- cut(dta$Roll, 3, labels=c("Small", "Mediam", "Large"))
## display dta$Size as a table
table(dta$Size)##
## Small Mediam Large
## 2555 15 1
## sorting
## create a new variable dta$RollOrd according to the decreasing order of dta$Roll
dta$RollOrd <- order(dta$Roll, decreasing=T)
## display the first rows in dta$RollOrd
head(dta[dta$RollOrd, ])## display the head rows in the decreasing order of dta$City and dta$Roll
head(dta[order(dta$City, dta$Roll, decreasing=T), ])## display the last rows in the decreasing order of dta$City and dta$Roll
tail(dta[order(dta$City, dta$Roll, decreasing=T), ])##
## Other Private State State Integrated
## 1 99 2144 327
##
## Other Private State State Integrated
## 1 99 2144 327
## [1] "table"
## Dec
## Auth 1 2 3 4 5 6 7 8 9 10
## Other 1 0 0 0 0 0 0 0 0 0
## Private 0 0 2 6 2 2 6 11 12 38
## State 259 230 208 219 214 215 188 200 205 205
## State Integrated 12 22 35 28 38 34 45 45 37 31
## [1] 295.4737
## compute mean of dta$Roll and show the mean of Roll on the condition that Auth is private
mean(dta$Roll[dta$Auth == "Private"])## [1] 308.798
## create a varible Rich which Dec is bigger than 5 and display the result as a table
dta$Rich <- dta$Dec > 5
table(dta$Rich)##
## FALSE TRUE
## 1276 1274
## split the data into cross group of Auth and Rich and compute each mean.
aggregate(dta["Roll"], by=list(dta$Auth, dta$Rich), FUN=mean)## : Other
## [1] 51 51
## ------------------------------------------------------------
## : Private
## [1] 7 1663
## ------------------------------------------------------------
## : State
## [1] 5 5546
## ------------------------------------------------------------
## : State Integrated
## [1] 18 1475
Function in-class exercise5
#
# a case study - II
#
## read the txt file and save it as dta2
dta2 <- read.table("C:/Users/5A88/Desktop/NCEA2007.txt", sep=":", quote="", h=T, as.is=T)
## display the dimension of the dta2
dim(dta2)## [1] 88 4
## 'data.frame': 88 obs. of 4 variables:
## $ Name : chr "Al-Madinah School" "Alfriston College" "Ambury Park Centre for Riding Therapy" "Aorere College" ...
## $ Level1: num 61.5 53.9 33.3 39.5 71.2 22.1 50.8 57.3 89.3 59.8 ...
## $ Level2: num 75 44.1 20 50.2 78.9 30.8 34.8 49.8 89.7 65.7 ...
## $ Level3: num 0 0 0 30.6 55.5 26.3 48.9 44.6 88.6 50.4 ...
## Level1 Level2 Level3
## 62.26705 61.06818 47.97614
## $Level1
## [1] 62.26705
##
## $Level2
## [1] 61.06818
##
## $Level3
## [1] 47.97614
## Level1 Level2 Level3
## 62.26705 61.06818 47.97614
## find the column range except column 1 and return the result as matrix
apply(dta2[, -1], MARGIN=2, FUN=range)## Level1 Level2 Level3
## [1,] 2.8 0.0 0.0
## [2,] 97.4 95.7 95.7
## $Level1
## [1] 2.8 97.4
##
## $Level2
## [1] 0.0 95.7
##
## $Level3
## [1] 0.0 95.7
## Level1 Level2 Level3
## [1,] 2.8 0.0 0.0
## [2,] 97.4 95.7 95.7
## splitting
## split Roll by Auth and save it as RollsByAuth
rollsByAuth <- split(dta$Roll, dta$Auth)
## display the structure of RollsByAuth
str(rollsByAuth)## List of 4
## $ Other : int 51
## $ Private : int [1:99] 255 39 154 73 83 25 95 85 94 729 ...
## $ State : int [1:2144] 318 200 455 86 577 329 637 395 201 267 ...
## $ State Integrated: int [1:327] 438 26 191 560 151 114 126 171 211 57 ...
## [1] "list"
## split Roll by the group of Auth, compute each mean and return it as a list
lapply(split(dta$Roll, dta$Auth), mean)## $Other
## [1] 51
##
## $Private
## [1] 308.798
##
## $State
## [1] 300.6301
##
## $`State Integrated`
## [1] 258.3792
## do the same thing as lapply but return the result as a named vector
sapply(split(dta$Roll, dta$Auth), mean)## Other Private State State Integrated
## 51.0000 308.7980 300.6301 258.3792