(1) Create a .CSV file that includes all of the information above.

The CSV file is saved in the Github.

(2) Read the information from your .CSV file into R, and use tidyr and dplyr as needed to tidy and transform your data.

library(tidyr)
## Warning: package 'tidyr' was built under R version 3.2.5
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.2.5
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
require(knitr)
## Loading required package: knitr
## Warning: package 'knitr' was built under R version 3.2.5
mathtest <- read.csv("https://raw.githubusercontent.com/xkong100/IS607/master/Project2/Math_Test_Results_2006-2012_-_Citywide_-_SWD.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = c("", "NA"))
kable(head(mathtest))
Grade Year Demographic Number Tested Mean Scale Score Num Level 1 Pct Level 1 Num Level 2 Pct Level 2 Num Level 3 Pct Level 3 Num Level 4 Pct Level 4 Num Level 3 and 4 Pct Level 3 and 4
3 2006 Not SWD 62092 680 3419 5.5 8535 13.7 32946 53.1 17192 27.7 50138 80.7
3 2007 Not SWD 60525 689 1631 2.7 5917 9.8 32758 54.1 20219 33.4 52977 87.5
3 2008 Not SWD 58364 690 757 1.3 4094 7.0 37672 64.5 15841 27.1 53513 91.7
3 2009 Not SWD 59155 694 301 0.5 2620 4.4 39383 66.6 16851 28.5 56234 95.1
3 2010 Not SWD 58565 694 4619 7.9 18859 32.2 20295 34.7 14792 25.3 35087 59.9
3 2011 Not SWD 59507 687 4596 7.7 19119 32.1 27532 46.3 8260 13.9 35792 60.1

Get rid of useless columns

mathtest <- data.frame(mathtest[,-(c(3,7,9,11,13,15))])
kable(head(mathtest))
Grade Year Number.Tested Mean.Scale.Score Num.Level.1 Num.Level.2 Num.Level.3 Num.Level.4 Num.Level.3.and.4
3 2006 62092 680 3419 8535 32946 17192 50138
3 2007 60525 689 1631 5917 32758 20219 52977
3 2008 58364 690 757 4094 37672 15841 53513
3 2009 59155 694 301 2620 39383 16851 56234
3 2010 58565 694 4619 18859 20295 14792 35087
3 2011 59507 687 4596 19119 27532 8260 35792

Convert the Number of levels’ columns into numerical value to help us calculate later. Also Find the total non-passing students and total passing students.

tmathtest <- within(mathtest, {Num.Level.1 <- as.numeric(as.character(Num.Level.1))
Num.Level.2 <-as.numeric(as.character(Num.Level.2))
Num.Level.3.and.4 <- as.numeric(as.character(Num.Level.3.and.4))})

tmathtest1<-mutate(tmathtest, Total_non_passing= Num.Level.1 + Num.Level.2)
tmathtest2 <- gather(tmathtest1, "No Passing", "Non_Passing_students",5:6 )
tmathtest3 <- mutate(tmathtest2, Total_passing = Num.Level.3.and.4)
tmathtest4 <- gather(tmathtest3, "Passing", "Passing_students", 5:6)
tmathtest5 <- data.frame(tmathtest4[,-5])
kable(head(tmathtest5))
Grade Year Number.Tested Mean.Scale.Score Total_non_passing No.Passing Non_Passing_students Total_passing Passing Passing_students
3 2006 62092 680 11954 Num.Level.1 3419 50138 Num.Level.3 32946
3 2007 60525 689 7548 Num.Level.1 1631 52977 Num.Level.3 32758
3 2008 58364 690 4851 Num.Level.1 757 53513 Num.Level.3 37672
3 2009 59155 694 2921 Num.Level.1 301 56234 Num.Level.3 39383
3 2010 58565 694 23478 Num.Level.1 4619 35087 Num.Level.3 20295
3 2011 59507 687 23715 Num.Level.1 4596 35792 Num.Level.3 27532

Analysis: Find the percentage of non_passing student. We need to convert “Number. Tested” into numerical value first.

tmathtest6 <- within(tmathtest5, {Number.Tested <- as.numeric(as.character(Number.Tested))})
tmathtest7 <- mutate(tmathtest6, percentage_non_passing = Total_non_passing / Number.Tested * 100)
kable(head(tmathtest7))
Grade Year Number.Tested Mean.Scale.Score Total_non_passing No.Passing Non_Passing_students Total_passing Passing Passing_students percentage_non_passing
3 2006 62092 680 11954 Num.Level.1 3419 50138 Num.Level.3 32946 19.252078
3 2007 60525 689 7548 Num.Level.1 1631 52977 Num.Level.3 32758 12.470880
3 2008 58364 690 4851 Num.Level.1 757 53513 Num.Level.3 37672 8.311630
3 2009 59155 694 2921 Num.Level.1 301 56234 Num.Level.3 39383 4.937875
3 2010 58565 694 23478 Num.Level.1 4619 35087 Num.Level.3 20295 40.088790
3 2011 59507 687 23715 Num.Level.1 4596 35792 Num.Level.3 27532 39.852454

Grouping by grade and year, it will clearly show use how many students did not pass the exams at that year and grade.

Non_Passing_rate <- tmathtest7 %>% group_by(Grade,Year) %>% summarise(MeanPercent= round(mean(percentage_non_passing, 0)))
Non_Passing_rate
## Source: local data frame [49 x 3]
## Groups: Grade [?]
## 
##    Grade  Year MeanPercent
##    <chr> <int>       <dbl>
## 1      3  2006          37
## 2      3  2007          29
## 3      3  2008          22
## 4      3  2009          16
## 5      3  2010          56
## 6      3  2011          55
## 7      3  2012          53
## 8      4  2006          42
## 9      4  2007          38
## 10     4  2008          32
## # ... with 39 more rows