Get rid of useless columns
mathtest <- data.frame(mathtest[,-(c(3,7,9,11,13,15))])
kable(head(mathtest))
| 3 |
2006 |
62092 |
680 |
3419 |
8535 |
32946 |
17192 |
50138 |
| 3 |
2007 |
60525 |
689 |
1631 |
5917 |
32758 |
20219 |
52977 |
| 3 |
2008 |
58364 |
690 |
757 |
4094 |
37672 |
15841 |
53513 |
| 3 |
2009 |
59155 |
694 |
301 |
2620 |
39383 |
16851 |
56234 |
| 3 |
2010 |
58565 |
694 |
4619 |
18859 |
20295 |
14792 |
35087 |
| 3 |
2011 |
59507 |
687 |
4596 |
19119 |
27532 |
8260 |
35792 |
Convert the Number of levels’ columns into numerical value to help us calculate later. Also Find the total non-passing students and total passing students.
tmathtest <- within(mathtest, {Num.Level.1 <- as.numeric(as.character(Num.Level.1))
Num.Level.2 <-as.numeric(as.character(Num.Level.2))
Num.Level.3.and.4 <- as.numeric(as.character(Num.Level.3.and.4))})
tmathtest1<-mutate(tmathtest, Total_non_passing= Num.Level.1 + Num.Level.2)
tmathtest2 <- gather(tmathtest1, "No Passing", "Non_Passing_students",5:6 )
tmathtest3 <- mutate(tmathtest2, Total_passing = Num.Level.3.and.4)
tmathtest4 <- gather(tmathtest3, "Passing", "Passing_students", 5:6)
tmathtest5 <- data.frame(tmathtest4[,-5])
kable(head(tmathtest5))
| 3 |
2006 |
62092 |
680 |
11954 |
Num.Level.1 |
3419 |
50138 |
Num.Level.3 |
32946 |
| 3 |
2007 |
60525 |
689 |
7548 |
Num.Level.1 |
1631 |
52977 |
Num.Level.3 |
32758 |
| 3 |
2008 |
58364 |
690 |
4851 |
Num.Level.1 |
757 |
53513 |
Num.Level.3 |
37672 |
| 3 |
2009 |
59155 |
694 |
2921 |
Num.Level.1 |
301 |
56234 |
Num.Level.3 |
39383 |
| 3 |
2010 |
58565 |
694 |
23478 |
Num.Level.1 |
4619 |
35087 |
Num.Level.3 |
20295 |
| 3 |
2011 |
59507 |
687 |
23715 |
Num.Level.1 |
4596 |
35792 |
Num.Level.3 |
27532 |
Analysis: Find the percentage of non_passing student. We need to convert “Number. Tested” into numerical value first.
tmathtest6 <- within(tmathtest5, {Number.Tested <- as.numeric(as.character(Number.Tested))})
tmathtest7 <- mutate(tmathtest6, percentage_non_passing = Total_non_passing / Number.Tested * 100)
kable(head(tmathtest7))
| 3 |
2006 |
62092 |
680 |
11954 |
Num.Level.1 |
3419 |
50138 |
Num.Level.3 |
32946 |
19.252078 |
| 3 |
2007 |
60525 |
689 |
7548 |
Num.Level.1 |
1631 |
52977 |
Num.Level.3 |
32758 |
12.470880 |
| 3 |
2008 |
58364 |
690 |
4851 |
Num.Level.1 |
757 |
53513 |
Num.Level.3 |
37672 |
8.311630 |
| 3 |
2009 |
59155 |
694 |
2921 |
Num.Level.1 |
301 |
56234 |
Num.Level.3 |
39383 |
4.937875 |
| 3 |
2010 |
58565 |
694 |
23478 |
Num.Level.1 |
4619 |
35087 |
Num.Level.3 |
20295 |
40.088790 |
| 3 |
2011 |
59507 |
687 |
23715 |
Num.Level.1 |
4596 |
35792 |
Num.Level.3 |
27532 |
39.852454 |
Grouping by grade and year, it will clearly show use how many students did not pass the exams at that year and grade.
Non_Passing_rate <- tmathtest7 %>% group_by(Grade,Year) %>% summarise(MeanPercent= round(mean(percentage_non_passing, 0)))
Non_Passing_rate
## Source: local data frame [49 x 3]
## Groups: Grade [?]
##
## Grade Year MeanPercent
## <chr> <int> <dbl>
## 1 3 2006 37
## 2 3 2007 29
## 3 3 2008 22
## 4 3 2009 16
## 5 3 2010 56
## 6 3 2011 55
## 7 3 2012 53
## 8 4 2006 42
## 9 4 2007 38
## 10 4 2008 32
## # ... with 39 more rows