#read dataset from online site
Data_education <- read.csv(file="https://raw.githubusercontent.com/Benson90/BrigeR/main/Mathlevel.csv", header = TRUE, sep = ",")
summary(Data_education)
## X mathlevel sat language
## Min. : 1 Length:609 Min. :400.0 Length:609
## 1st Qu.:153 Class :character 1st Qu.:590.0 Class :character
## Median :305 Mode :character Median :630.0 Mode :character
## Mean :305 Mean :624.8
## 3rd Qu.:457 3rd Qu.:660.0
## Max. :609 Max. :790.0
## sex major mathcourse physiccourse
## Length:609 Length:609 Min. :0.000 Min. :0.0000
## Class :character Class :character 1st Qu.:1.000 1st Qu.:0.0000
## Mode :character Mode :character Median :1.000 Median :1.0000
## Mean :1.427 Mean :0.7077
## 3rd Qu.:2.000 3rd Qu.:1.0000
## Max. :3.000 Max. :2.0000
## chemistcourse
## Min. :0.000
## 1st Qu.:1.000
## Median :1.000
## Mean :1.053
## 3rd Qu.:1.000
## Max. :2.000
#display mean and median for SAT score
mean <- sapply(Data_education[c("sat")], mean)
median <- sapply(Data_education[c("sat")], median)
mmFrame <- data.frame(mean,median)
print(mmFrame)
## mean median
## sat 624.844 630
#Count sex
sexFrame <- data.frame(table(Data_education['sex']))
print(sexFrame)
## Var1 Freq
## 1 female 236
## 2 male 373
#Count math level
mathlevelFrame <- data.frame(table(Data_education['mathlevel']))
print(mathlevelFrame)
## Var1 Freq
## 1 170 164
## 2 171a 49
## 3 171b 228
## 4 172a 11
## 5 172b 42
## 6 221a 31
## 7 221b 84
#Count major
majorFrame <- data.frame(table(Data_education['major']))
print(majorFrame)
## Var1 Freq
## 1 eco 209
## 2 hum 41
## 3 ns 126
## 4 oss 103
## 5 other 130
# math level transformation
#insert lib
library("stringr")
#convert math level
new_mathlevel <- Data_education[,"mathlevel"]
new_mathlevel <- sapply(new_mathlevel, as.character)
unique(new_mathlevel)
## [1] "170" "171a" "172a" "171b" "172b" "221a" "221b"
new_mathlevel <- str_replace(string = new_mathlevel, pattern = "170", "Pre Calculus")
new_mathlevel <- str_replace(string = new_mathlevel, pattern = "171a", "Calculus 1a")
new_mathlevel <- str_replace(string = new_mathlevel, pattern = "172a", "Calculus 2a")
new_mathlevel <- str_replace(string = new_mathlevel, pattern = "171b", "Calculus 1b")
new_mathlevel <- str_replace(string = new_mathlevel, pattern = "172b", "Calculus 2b")
new_mathlevel <- str_replace(string = new_mathlevel, pattern = "221a", "Vector Calculus 1")
new_mathlevel <- str_replace(string = new_mathlevel, pattern = "221b", "Vector Calculus 2")
unique(new_mathlevel)
## [1] "Pre Calculus" "Calculus 1a" "Calculus 2a"
## [4] "Calculus 1b" "Calculus 2b" "Vector Calculus 1"
## [7] "Vector Calculus 2"
#convert language
new_language <- Data_education[,"language"]
new_language <- sapply(new_language, as.character)
unique(new_language)
## [1] "no" "yes"
new_language <- str_replace(string = new_language, pattern = "yes", "ESL")
new_language <- str_replace(string = new_language, pattern = "no", "Native English")
unique(new_language)
## [1] "Native English" "ESL"
#convert Major
new_major <- Data_education[,"major"]
new_major <- sapply(new_major, as.character)
unique(new_major)
## [1] "ns" "other" "eco" "oss" "hum"
new_major <- str_replace(string = new_major, pattern = "eco", "Economic")
new_major <- str_replace(string = new_major, pattern = "oss", "Social Sciences")
new_major <- str_replace(string = new_major, pattern = "ns", "Natural Sciences")
new_major <- str_replace(string = new_major, pattern = "hum", "Humanities")
new_major <- str_replace(string = new_major, pattern = "other", "Other")
unique(new_major)
## [1] "Natural Sciences" "Other" "Economic" "Social Sciences"
## [5] "Humanities"
library(ggplot2)
#count major in bar chart
ggplot(data = Data_education) + geom_bar(mapping = aes (x = new_major, fill = new_major)) + ggtitle("Student's major")
#Relation between major and math sat in plot chart
ggplot(data = Data_education, mapping = aes(x = new_major, y = sat)) +
geom_boxplot() + ggtitle("SAT(math) score and Student's major")
#customer organize column
level_order <- c("Pre Calculus","Calculus 1a","Calculus 1b","Calculus 2a","Calculus 2b","Vector Calculus 1","Vector Calculus 2")
#math level to sat
ggplot(data = Data_education, mapping = aes(x = factor(new_mathlevel, level = level_order), y = sat, color = mathcourse)) + geom_point(size = 3) + geom_line(color = "red") + ggtitle("Relation between SAT(math) score and number of math course in different math classes")
#math level to sat in major
ggplot(data = Data_education, mapping = aes(x = factor(new_mathlevel, level = level_order), y = sat, color = new_major)) + geom_point(size = 3) + geom_line(color = "red") + ggtitle("Relation between SAT(math) score and student's major in different math classes")
#count major in bar chart by different sex
ggplot(data = Data_education) + geom_bar(mapping = aes (x = new_major, fill = sex)) + ggtitle("Student's major by sex")
#math level to sat in major
ggplot(data = Data_education, mapping = aes(x = factor(new_mathlevel, level = level_order), y = sat, color = sex)) + geom_point(size = 3) + geom_line(color = "red") + ggtitle("Relation between SAT(math) score and student's major in different math classes")
#count major in bar chart by different language
ggplot(data = Data_education) + geom_bar(mapping = aes (x = new_major, fill = new_language)) + ggtitle("Student's major by foreign language proficiency") + scale_fill_brewer(palette="Accent")
#math level to sat in major
ggplot(data = Data_education, mapping = aes(x = factor(new_mathlevel, level = level_order), y = sat, color = new_language)) + geom_point(size = 3) + geom_line(color = "red") + ggtitle("SAT(math) score and student's foreign language proficiency in different math classes") + scale_color_brewer(palette="Accent")
From a different angle of view to analyze the relation between SAT math score and Student’s major, no substantial evidence shows students’ choice is significantly based on their SAT math score. Based on the SAT math score and Student’s major chart, all students had almost the same average SAT score in all different majors. Also, sex and language proficiency have not directly established student majors. However, the relation between SAT math score and student major shows students good in certain math classes might choose they major in economics, Natural sciences, and Social sciences.
In conclusion, Students have a high chance of choosing their majors based on their interest in different math classes.