Title Week 6 assignment IS607

Seoungyoon Lim

#NYC The 3rd-to-8th-grade Math test 2013-2014

#1.Choose and load R dataset
math <- read.csv("/Users/seoungyoonlim/Documents/cuny/IS607/week6/MathResults20132014.csv")
str(math)
## 'data.frame':    240 obs. of  6 variables:
##  $ Borough      : Factor w/ 5 levels "BRONX","BROOKLYN",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Grade        : int  3 3 3 3 3 3 3 3 4 4 ...
##  $ Year         : int  2013 2013 2013 2013 2014 2014 2014 2014 2013 2013 ...
##  $ Category     : Factor w/ 4 levels "Asian","Black",..: 1 2 3 4 1 2 3 4 1 2 ...
##  $ Number.Tested: int  586 4474 9961 707 618 4419 10061 721 624 4326 ...
##  $ Mean         : int  313 284 285 304 315 286 288 309 316 283 ...
#Since Grade and Year variable are integer 
# in csv file, I want to change them to factors first
math$Grade<-as.factor(math$Grade)
math$Year<-as.factor(math$Year)

#2.Generate summary level descriptive statistics
summary(math)
##           Borough   Grade    Year         Category  Number.Tested  
##  BRONX        :48   3:40   2013:120   Asian   :60   Min.   :  280  
##  BROOKLYN     :48   4:40   2014:120   Black   :60   1st Qu.: 1082  
##  MANHATTAN    :48   5:40              Hispanic:60   Median : 2884  
##  QUEENS       :48   6:40              White   :60   Mean   : 3389  
##  STATEN ISLAND:48   7:40                            3rd Qu.: 4498  
##                     8:40                            Max.   :10061  
##       Mean      
##  Min.   :275.0  
##  1st Qu.:288.0  
##  Median :300.5  
##  Mean   :305.0  
##  3rd Qu.:322.0  
##  Max.   :344.0
#3.Determine the frequency
table(math$Category)
## 
##    Asian    Black Hispanic    White 
##       60       60       60       60
#4. Determine the frequency two categorical variables
table(math$Category, math$Borough)
##           
##            BRONX BROOKLYN MANHATTAN QUEENS STATEN ISLAND
##   Asian       12       12        12     12            12
##   Black       12       12        12     12            12
##   Hispanic    12       12        12     12            12
##   White       12       12        12     12            12
table(math$Year, math$Borough)
##       
##        BRONX BROOKLYN MANHATTAN QUEENS STATEN ISLAND
##   2013    24       24        24     24            24
##   2014    24       24        24     24            24
#5.Create a graph for a single numeric variable
hist(math$Mean) #total distribution of math mean of students

plot of chunk unnamed-chunk-1

# math mean boxplot 2013 and 2014 based on ethnic group
math_2013 <- math[which(math$Year==2013,),]
A13 <-math_2013[which(math$Category=='Asian',),]
B13 <-math_2013[which(math$Category=='Black',),]
W13 <-math_2013[which(math$Category=='White',),]
H13 <-math_2013[which(math$Category=='Hispanic',),]
boxplot(A13$Mean,B13$Mean,W13$Mean,H13$Mean, main='2013 Math test', names=c('Asian','Black','White','Hispanic') )

plot of chunk unnamed-chunk-1

math_2014 <- math[which(math$Year==2014,),]
A14 <-math_2014[which(math$Category=='Asian',),]
B14 <-math_2014[which(math$Category=='Black',),]
W14 <-math_2014[which(math$Category=='White',),]
H14 <-math_2014[which(math$Category=='Hispanic',),]
boxplot(A14$Mean,B14$Mean,W14$Mean,H14$Mean, main='2014 Math test', names=c('Asian','Black','White','Hispanic') )

plot of chunk unnamed-chunk-1

# math mean boxplot based on Borough
bx <- math[which(math$Borough=='BRONX',),]
mh <- math[which(math$Borough=='MANHATTAN',),]
qn <- math[which(math$Borough=='QUEENS',),]
bn <- math[which(math$Borough=='BROOKLIN',),]
st <- math[which(math$Borough=='STATEN ISLAND',),]

hist(st$Mean, main='histogram of Staten island students Math result')

#ggplot
library(ggplot2)

plot of chunk unnamed-chunk-1

qplot(Mean, data=st, binwidth=10, main='histogram of Staten island students Math result')

plot of chunk unnamed-chunk-1

#6.Create a graph for a two numeric variable
#number of students and test score do not show corelation
plot(math$Number.Tested, math$Mean)

plot of chunk unnamed-chunk-1

qplot(Number.Tested, Mean, data=math)

plot of chunk unnamed-chunk-1