The chart below describes the grades of fourteen students from 5th grade of Vincent Houghton Elementary school. It contains the student’s grade from five subjects and the number of days they were present in school.

image

Part 1:

library(curl)
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.2.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.2.2
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(ggplot2)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 3.2.2
library(knitr)

Up

Part 2:

Vincent_Grade_5 <- read.csv(file="https://raw.githubusercontent.com/nabilahossain/Class-IS607/master/Project%202/Vincent%20Houghton%20Elementary%20School/Vincent%20Houghton%20(Grade%205).csv", header=TRUE, sep=",")
Vincent_Grade_5
##           Student English Math Science Social.Studies Technology
## 1     Alicia Cruz      46   47      94             96         84
## 2   Solomon Tunde      64   84      43             60         83
## 3   Daniel Zainea      47   82      52             58         96
## 4    Thomas Brown      96   50      58             71        100
## 5   Lisa Giamonco      40   76      50             85         78
## 6  Cheryl Harldat      75   78      98             41         88
## 7   Winston Cadiz      90   73      90             75         45
## 8    Pablo Romero      91   50      85             68         72
## 9   Jennifer Weil      98   63      43             58         40
## 10 Ronald Rosario      98   40      98             75         81
## 11  Martha Carpio      69   60      52             60         47
## 12 Jasmine Carpio      99   86      49             89         93
## 13    Raul Martin      42   59      54             48         80
## 14    Desio Trump      41   42      69             92         55
##    Days.Present
## 1           169
## 2           121
## 3           146
## 4            95
## 5           118
## 6           159
## 7           175
## 8           101
## 9           107
## 10           94
## 11          171
## 12          140
## 13          174
## 14          167

Up

VES <- Vincent_Grade_5 %>%  separate(Student, c("First_Name", "Last_Name")) %>%  arrange(Last_Name)
VES1 <- VES  %>% gather("Subject", "Scores", 3:7) %>% select(Last_Name, First_Name, Subject, Scores, Days_Present = Days.Present)
VES1$Subject <- str_replace_all(VES1$Subject, pattern = "\\.", replacement = " ") 
head(VES1, 15)
##    Last_Name First_Name Subject Scores Days_Present
## 1      Brown     Thomas English     96           95
## 2      Cadiz    Winston English     90          175
## 3     Carpio     Martha English     69          171
## 4     Carpio    Jasmine English     99          140
## 5       Cruz     Alicia English     46          169
## 6   Giamonco       Lisa English     40          118
## 7    Harldat     Cheryl English     75          159
## 8     Martin       Raul English     42          174
## 9     Romero      Pablo English     91          101
## 10   Rosario     Ronald English     98           94
## 11     Trump      Desio English     41          167
## 12     Tunde    Solomon English     64          121
## 13      Weil   Jennifer English     98          107
## 14    Zainea     Daniel English     47          146
## 15     Brown     Thomas    Math     50           95

Up

VES2 <- VES1 %>% group_by(Subject) %>% summarise(Average_Grade=mean(Scores), Average_Days_Present=mean(Days_Present)) %>% data.frame()
VES2
##          Subject Average_Grade Average_Days_Present
## 1        English      71.14286             138.3571
## 2           Math      63.57143             138.3571
## 3        Science      66.78571             138.3571
## 4 Social Studies      69.71429             138.3571
## 5     Technology      74.42857             138.3571
VES3 <- VES1 %>% group_by(Last_Name, First_Name) %>% summarise(Average_Grade=mean(Scores), Days_Present=mean(Days_Present)) %>% data.frame()
VES3
##    Last_Name First_Name Average_Grade Days_Present
## 1      Brown     Thomas          75.0           95
## 2      Cadiz    Winston          74.6          175
## 3     Carpio    Jasmine          83.2          140
## 4     Carpio     Martha          57.6          171
## 5       Cruz     Alicia          73.4          169
## 6   Giamonco       Lisa          65.8          118
## 7    Harldat     Cheryl          76.0          159
## 8     Martin       Raul          56.6          174
## 9     Romero      Pablo          73.2          101
## 10   Rosario     Ronald          78.4           94
## 11     Trump      Desio          59.8          167
## 12     Tunde    Solomon          66.8          121
## 13      Weil   Jennifer          60.4          107
## 14    Zainea     Daniel          67.0          146

Up

Part 3:

y <- cor(VES1$Scores, VES1$Days_Present)
y
## [1] -0.1263216
ggplot(data = VES1, aes(x = Scores, y = Days_Present, color=First_Name)) +  geom_point(aes(shape = First_Name)) + theme_igray() + geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) + scale_shape_manual(values = c(1:14)) + ggtitle("Graph 1: Grades Vs Days Present (by student)") + ylab("Days Present") + xlab("Grades")

ggplot(data = VES1, aes(x = Scores, y = Days_Present, color=Subject)) +  geom_point() + scale_colour_tableau()  + theme_solarized(light=FALSE) + ggtitle("Graph 2: Grades Vs. Days Present (by subject)") + geom_smooth(method = "lm", se=FALSE, color="yellow", formula = y ~ x) + ylab("Days Present") + xlab("Grades")

Up

z <- VES1 %>% select(Subject, Scores, Days_Present) %>% group_by(Subject) %>% summarize(Correlation = cor(Scores, Days_Present)) %>% data.frame()
VES4 <- VES2 %>% left_join(z, by = "Subject")
kable(VES4, digits = 2, align = "c", caption = "Table 1: Summary of each subject.")
Table 1: Summary of each subject.
Subject Average_Grade Average_Days_Present Correlation
English 71.14 138.36 -0.53
Math 63.57 138.36 0.15
Science 66.79 138.36 0.13
Social Studies 69.71 138.36 -0.02
Technology 74.43 138.36 -0.25
ggplot(data = VES1, aes(x = Scores, y = Days_Present, color=First_Name)) +  geom_point() + geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) + facet_wrap(~Subject) + theme_igray() + ggtitle("Graph 3: Grades Vs. Days Present in Each subject (by students)") + ylab("Days Present") + xlab("Grades")   

Up

kable(VES3, align = "c", caption = "Table 2: Summary of each student.")   
Table 2: Summary of each student.
Last_Name First_Name Average_Grade Days_Present
Brown Thomas 75.0 95
Cadiz Winston 74.6 175
Carpio Jasmine 83.2 140
Carpio Martha 57.6 171
Cruz Alicia 73.4 169
Giamonco Lisa 65.8 118
Harldat Cheryl 76.0 159
Martin Raul 56.6 174
Romero Pablo 73.2 101
Rosario Ronald 78.4 94
Trump Desio 59.8 167
Tunde Solomon 66.8 121
Weil Jennifer 60.4 107
Zainea Daniel 67.0 146
x <- cor(VES3$Average_Grade, VES3$Days_Present)
x
## [1] -0.3066169
ggplot(data = VES3, aes(y = Days_Present, x = Average_Grade, color=First_Name)) +  geom_point() + theme_solarized(light=FALSE) + geom_smooth(method = "lm", se=FALSE, color="red", formula = y ~ x) + ggtitle("Graph 4: Average Grade Vs. Days Present")+ ylab("Days Present") + xlab("Average Grades") 

Up