Function in-class exercise4.5

Function in-class exercise4

# a case study


## keep the school names with white spaces
dta <- read.csv("C:/Users/5A88/Desktop/nzSchools.csv", as.is=2)

## display the structure of the data
str(dta)

## 'data.frame':    2571 obs. of  6 variables:
##  $ ID  : int  1015 1052 1062 1092 1130 1018 1029 1030 1588 1154 ...
##  $ Name: chr  "Hora Hora School" "Morningside School" "Onerahi School" "Raurimu Avenue School" ...
##  $ City: Factor w/ 541 levels "Ahaura","Ahipara",..: 533 533 533 533 533 533 533 533 533 533 ...
##  $ Auth: Factor w/ 4 levels "Other","Private",..: 3 3 3 3 3 3 3 3 4 3 ...
##  $ Dec : int  2 3 4 2 4 8 5 5 6 1 ...
##  $ Roll: int  318 200 455 86 577 329 637 395 438 201 ...

## display the dimension of the data
dim(dta)

## [1] 2571    6

## binning

## if dta$Roll is bigger than median, label it Large, otherwise Small
dta$Size <- ifelse(dta$Roll > median(dta$Roll), "Large", "Small")

## save it as a null object
dta$Size <- NULL

## display the first data
head(dta)

## cut dta$Roll into 3 groups and label them small, medium and large
dta$Size <- cut(dta$Roll, 3, labels=c("Small", "Mediam", "Large"))

## display dta$Size as a table
table(dta$Size)

## 
##  Small Mediam  Large 
##   2555     15      1

## sorting

## create a new variable dta$RollOrd according to the decreasing order of dta$Roll
dta$RollOrd <- order(dta$Roll, decreasing=T)

## display the first rows in dta$RollOrd
head(dta[dta$RollOrd, ])

## display the last rows in dta$RollOrd
tail(dta[dta$RollOrd, ])

## display the head rows in the decreasing order of dta$City and dta$Roll
head(dta[order(dta$City, dta$Roll, decreasing=T), ])

## display the last rows in the decreasing order of dta$City and dta$Roll
tail(dta[order(dta$City, dta$Roll, decreasing=T), ])

## counting

## display the frequency table of Auth
table(dta$Auth)

## 
##            Other          Private            State State Integrated 
##                1               99             2144              327

## save the table as authtbl and show it
authtbl <- table(dta$Auth); authtbl

## 
##            Other          Private            State State Integrated 
##                1               99             2144              327

## show the class of authtbl
class(authtbl)

## [1] "table"

## display the data on the condition that Auth is other
dta[dta$Auth == "Other", ]

## Create a contingency table of Auth and Dec
xtabs(~ Auth + Dec, data=dta)

##                   Dec
## Auth                 1   2   3   4   5   6   7   8   9  10
##   Other              1   0   0   0   0   0   0   0   0   0
##   Private            0   0   2   6   2   2   6  11  12  38
##   State            259 230 208 219 214 215 188 200 205 205
##   State Integrated  12  22  35  28  38  34  45  45  37  31

## aggregating

## compute mean scores of dta$Roll
mean(dta$Roll)

## [1] 295.4737

## compute mean of dta$Roll and show the mean of Roll on the condition that Auth is private
mean(dta$Roll[dta$Auth == "Private"])

## [1] 308.798

## computes means for each groups of Auth
aggregate(dta["Roll"], by=list(dta$Auth), FUN=mean)

## create a varible Rich which Dec is bigger than 5 and display the result as a table
dta$Rich <- dta$Dec > 5
table(dta$Rich)

## 
## FALSE  TRUE 
##  1276  1274

## split the data into cross group of Auth and Rich and compute each mean.
aggregate(dta["Roll"], by=list(dta$Auth, dta$Rich), FUN=mean)

## find the range of each group in Roll
by(dta["Roll"], INDICES=list(dta$Auth), FUN=range)

## : Other
## [1] 51 51
## ------------------------------------------------------------ 
## : Private
## [1]    7 1663
## ------------------------------------------------------------ 
## : State
## [1]    5 5546
## ------------------------------------------------------------ 
## : State Integrated
## [1]   18 1475

###

Function in-class exercise5

#
# a case study - II
#

## read the txt file and save it as dta2
dta2 <- read.table("C:/Users/5A88/Desktop/NCEA2007.txt", sep=":", quote="", h=T, as.is=T)

## display the dimension of the dta2
dim(dta2)

## [1] 88  4

## display the structure of the dta2
str(dta2)

## 'data.frame':    88 obs. of  4 variables:
##  $ Name  : chr  "Al-Madinah School" "Alfriston College" "Ambury Park Centre for Riding Therapy" "Aorere College" ...
##  $ Level1: num  61.5 53.9 33.3 39.5 71.2 22.1 50.8 57.3 89.3 59.8 ...
##  $ Level2: num  75 44.1 20 50.2 78.9 30.8 34.8 49.8 89.7 65.7 ...
##  $ Level3: num  0 0 0 30.6 55.5 26.3 48.9 44.6 88.6 50.4 ...

## display the first rows in dta2
head(dta2)

## compute column mean except the first column 
apply(dta2[, -1], MARGIN=2, FUN=mean)

##   Level1   Level2   Level3 
## 62.26705 61.06818 47.97614

## list apply 
lapply(dta2[, -1], FUN=mean)

## $Level1
## [1] 62.26705
## 
## $Level2
## [1] 61.06818
## 
## $Level3
## [1] 47.97614

## simplify the list apply
sapply(dta2[, -1], FUN=mean)

##   Level1   Level2   Level3 
## 62.26705 61.06818 47.97614

## find the column range except column 1 and return the result as matrix
apply(dta2[, -1], MARGIN=2, FUN=range)

##      Level1 Level2 Level3
## [1,]    2.8    0.0    0.0
## [2,]   97.4   95.7   95.7

## return the result as a list
lapply(dta2[, -1], FUN=range)

## $Level1
## [1]  2.8 97.4
## 
## $Level2
## [1]  0.0 95.7
## 
## $Level3
## [1]  0.0 95.7

## simplify the list apply. The output is a matrix
sapply(dta2[, -1], FUN=range)

##      Level1 Level2 Level3
## [1,]    2.8    0.0    0.0
## [2,]   97.4   95.7   95.7

## splitting

## split Roll by Auth and save it as RollsByAuth
rollsByAuth <- split(dta$Roll, dta$Auth)

## display the structure of RollsByAuth
str(rollsByAuth)

## List of 4
##  $ Other           : int 51
##  $ Private         : int [1:99] 255 39 154 73 83 25 95 85 94 729 ...
##  $ State           : int [1:2144] 318 200 455 86 577 329 637 395 201 267 ...
##  $ State Integrated: int [1:327] 438 26 191 560 151 114 126 171 211 57 ...

## show the class of RollsByAuth
class(rollsByAuth)

## [1] "list"

## split Roll by the group of Auth, compute each mean and return it as a list
lapply(split(dta$Roll, dta$Auth), mean)

## $Other
## [1] 51
## 
## $Private
## [1] 308.798
## 
## $State
## [1] 300.6301
## 
## $`State Integrated`
## [1] 258.3792

## do the same thing as lapply but return the result as a named vector
sapply(split(dta$Roll, dta$Auth), mean)

##            Other          Private            State State Integrated 
##          51.0000         308.7980         300.6301         258.3792

###