Analyze Roadmap

Purpose

  • To find the relation between math level and major in college.

Data Exploration

#read dataset from online site
Data_education <- read.csv(file="https://raw.githubusercontent.com/Benson90/BrigeR/main/Mathlevel.csv", header = TRUE, sep = ",")
summary(Data_education)
##        X        mathlevel              sat          language        
##  Min.   :  1   Length:609         Min.   :400.0   Length:609        
##  1st Qu.:153   Class :character   1st Qu.:590.0   Class :character  
##  Median :305   Mode  :character   Median :630.0   Mode  :character  
##  Mean   :305                      Mean   :624.8                     
##  3rd Qu.:457                      3rd Qu.:660.0                     
##  Max.   :609                      Max.   :790.0                     
##      sex               major             mathcourse     physiccourse   
##  Length:609         Length:609         Min.   :0.000   Min.   :0.0000  
##  Class :character   Class :character   1st Qu.:1.000   1st Qu.:0.0000  
##  Mode  :character   Mode  :character   Median :1.000   Median :1.0000  
##                                        Mean   :1.427   Mean   :0.7077  
##                                        3rd Qu.:2.000   3rd Qu.:1.0000  
##                                        Max.   :3.000   Max.   :2.0000  
##  chemistcourse  
##  Min.   :0.000  
##  1st Qu.:1.000  
##  Median :1.000  
##  Mean   :1.053  
##  3rd Qu.:1.000  
##  Max.   :2.000
#display mean and median for SAT score 
mean <- sapply(Data_education[c("sat")], mean)
median <- sapply(Data_education[c("sat")], median)

mmFrame <- data.frame(mean,median)
print(mmFrame)
##        mean median
## sat 624.844    630
#Count sex
sexFrame <- data.frame(table(Data_education['sex']))
print(sexFrame)
##     Var1 Freq
## 1 female  236
## 2   male  373
#Count math level
mathlevelFrame <- data.frame(table(Data_education['mathlevel']))
print(mathlevelFrame)
##   Var1 Freq
## 1  170  164
## 2 171a   49
## 3 171b  228
## 4 172a   11
## 5 172b   42
## 6 221a   31
## 7 221b   84
#Count major
majorFrame <- data.frame(table(Data_education['major']))
print(majorFrame)
##    Var1 Freq
## 1   eco  209
## 2   hum   41
## 3    ns  126
## 4   oss  103
## 5 other  130

Data Wrangling

# math level transformation

#insert lib
library("stringr")

#convert math level
new_mathlevel <- Data_education[,"mathlevel"]
new_mathlevel <- sapply(new_mathlevel, as.character)
unique(new_mathlevel)
## [1] "170"  "171a" "172a" "171b" "172b" "221a" "221b"
new_mathlevel <- str_replace(string = new_mathlevel, pattern = "170", "Pre Calculus")
new_mathlevel <- str_replace(string = new_mathlevel, pattern = "171a", "Calculus 1a")
new_mathlevel <- str_replace(string = new_mathlevel, pattern = "172a", "Calculus 2a")
new_mathlevel <- str_replace(string = new_mathlevel, pattern = "171b", "Calculus 1b")
new_mathlevel <- str_replace(string = new_mathlevel, pattern = "172b", "Calculus 2b")
new_mathlevel <- str_replace(string = new_mathlevel, pattern = "221a", "Vector Calculus 1")
new_mathlevel <- str_replace(string = new_mathlevel, pattern = "221b", "Vector Calculus 2")
unique(new_mathlevel)
## [1] "Pre Calculus"      "Calculus 1a"       "Calculus 2a"      
## [4] "Calculus 1b"       "Calculus 2b"       "Vector Calculus 1"
## [7] "Vector Calculus 2"
#convert language
new_language <- Data_education[,"language"]
new_language <- sapply(new_language, as.character)
unique(new_language)
## [1] "no"  "yes"
new_language <- str_replace(string = new_language, pattern = "yes", "ESL")
new_language <- str_replace(string = new_language, pattern = "no", "Native English")

unique(new_language)
## [1] "Native English" "ESL"
#convert Major
new_major <- Data_education[,"major"]
new_major <- sapply(new_major, as.character)
unique(new_major)
## [1] "ns"    "other" "eco"   "oss"   "hum"
new_major <- str_replace(string = new_major, pattern = "eco", "Economic")
new_major <- str_replace(string = new_major, pattern = "oss", "Social Sciences")
new_major <- str_replace(string = new_major, pattern = "ns", "Natural Sciences")
new_major <- str_replace(string = new_major, pattern = "hum", "Humanities")
new_major <- str_replace(string = new_major, pattern = "other", "Other")

unique(new_major)
## [1] "Natural Sciences" "Other"            "Economic"         "Social Sciences" 
## [5] "Humanities"

Visualization data detail

library(ggplot2)

#count major in bar chart
ggplot(data = Data_education) + geom_bar(mapping = aes (x = new_major, fill = new_major)) + ggtitle("Student's major")

#Relation between major and math sat in plot chart 
ggplot(data = Data_education, mapping = aes(x = new_major, y = sat)) + 
  geom_boxplot() + ggtitle("SAT(math) score and Student's major")

  • The student’s major chart shows that high volume students were enrolled into Economic major.
  • The SAT score chart has almost the same median SAT score in all student’s majors.

Visualization Relation math skills and major

#customer organize column
level_order <- c("Pre Calculus","Calculus 1a","Calculus 1b","Calculus 2a","Calculus 2b","Vector Calculus 1","Vector Calculus 2")

#math level to sat
ggplot(data = Data_education, mapping = aes(x = factor(new_mathlevel, level = level_order), y = sat, color = mathcourse)) + geom_point(size = 3) + geom_line(color = "red") + ggtitle("Relation between SAT(math) score and number of math course in different math classes")

#math level to sat in major
ggplot(data = Data_education, mapping = aes(x = factor(new_mathlevel, level = level_order), y = sat, color = new_major)) + geom_point(size = 3) + geom_line(color = "red") + ggtitle("Relation between SAT(math) score and student's major in different math classes")

  • From the Relation between SAT score and the number of math course table, it shows that the students who had more math courses were taken, the better score in SAT in all levels of math classes.
  • From the Relation between SAT score and the student’s major table, it shows that the students who had highest SAT in Math, they will go for social science.

Visualization additional Relation math skills and other reason

#count major in bar chart by different sex
ggplot(data = Data_education) + geom_bar(mapping = aes (x = new_major, fill = sex)) + ggtitle("Student's major by sex")

#math level to sat in major
ggplot(data = Data_education, mapping = aes(x = factor(new_mathlevel, level = level_order), y = sat, color = sex)) + geom_point(size = 3) + geom_line(color = "red") + ggtitle("Relation between SAT(math) score and student's major in different math classes")

#count major in bar chart by different language
ggplot(data = Data_education) + geom_bar(mapping = aes (x = new_major, fill = new_language)) + ggtitle("Student's major by foreign language proficiency") + scale_fill_brewer(palette="Accent")

#math level to sat in major
ggplot(data = Data_education, mapping = aes(x = factor(new_mathlevel, level = level_order), y = sat, color = new_language)) + geom_point(size = 3) + geom_line(color = "red") + ggtitle("SAT(math) score and student's foreign language proficiency in different math classes") + scale_color_brewer(palette="Accent")

  • From the Student’s major by sex chart, female Student had more average in all significant but male students was more favor in Economic major.
  • The SAT score and students’ foreign language proficiency in different math classes show CAl1 and CAl2 ESL students had the highest SAT score than native English students.

Conclusion

From a different angle of view to analyze the relation between SAT math score and Student’s major, no substantial evidence shows students’ choice is significantly based on their SAT math score. Based on the SAT math score and Student’s major chart, all students had almost the same average SAT score in all different majors. Also, sex and language proficiency have not directly established student majors. However, the relation between SAT math score and student major shows students good in certain math classes might choose they major in economics, Natural sciences, and Social sciences.
In conclusion, Students have a high chance of choosing their majors based on their interest in different math classes.