## HW1: Student Performance Data (Logical Indices)
# Due: Feb 3, 2020
# Alec Aversa
## 1. Upload the data StudentPerformance on
## RStudio and find what kind of variables are present there.
StudentsPerformance <- read.csv("StudentsPerformance.csv")
# gender = Character
# race.ethnicity = Character
# parental.level.of.education = Character
# lunch = Character
# text.preperation.course
# math.score = Number
# reading.score = Number
# writing.score = Number
## 2. Print the categories of "parental level of education".
unique(StudentsPerformance$parental.level.of.education)
## [1] bachelor's degree some college master's degree associate's degree
## [5] high school some high school
## 6 Levels: associate's degree bachelor's degree high school ... some high school
## 3. In "parental level of education", rename the level "high school" as "HS".
levels(StudentsPerformance$parental.level.of.education)
## [1] "associate's degree" "bachelor's degree" "high school"
## [4] "master's degree" "some college" "some high school"
levels(StudentsPerformance$parental.level.of.education)[3] <- "HS"
levels(StudentsPerformance$parental.level.of.education)
## [1] "associate's degree" "bachelor's degree" "HS"
## [4] "master's degree" "some college" "some high school"
## 4. How many males and females participated in the survey?
length(StudentsPerformance$gender[StudentsPerformance$gender == "female"])
## [1] 518
length(StudentsPerformance$gender[StudentsPerformance$gender == "male"])
## [1] 482
## 5. How many females were on the list who attended “high school” or “some high school”?
length(StudentsPerformance$gender[StudentsPerformance$gender == "female" & (StudentsPerformance$parental.level.of.education == "high school" | StudentsPerformance$parental.level.of.education == "some college")])
## [1] 118
## 6. How many students completed the test preparation course?
length(StudentsPerformance$test.preparation.course[StudentsPerformance$test.preparation.course == "completed"])
## [1] 358
## 7. How many students did not complete the test prep course but still scored above 80 in math?
length(StudentsPerformance$test.preparation.course[StudentsPerformance$test.preparation.course == "none" & StudentsPerformance$math.score > 80])
## [1] 92
## 8. Suppose the teachers will choose the candidate(s) who has the highest score (total in
## math, reading and writing). What is the highest score?
max(StudentsPerformance$math.score + StudentsPerformance$reading.score + StudentsPerformance$writing.score)
## [1] 300
## 9. How many highest scorers are there?
M <- max(StudentsPerformance$math.score + StudentsPerformance$reading.score + StudentsPerformance$writing.score)
sum(length(StudentsPerformance$parental.level.of.education[StudentsPerformance$math.score + StudentsPerformance$reading.score + StudentsPerformance$writing.score == M]))
## [1] 3
## 10. Answer the followings:
#(a) Is there any highest scorer who completed the test prep course?
sum(length(StudentsPerformance$parental.level.of.education[StudentsPerformance$math.score + StudentsPerformance$reading.score + StudentsPerformance$writing.score == M & StudentsPerformance$test.preparation.course == "completed"]))
## [1] 1
#(b) If there is any highest scorer who completed the test prep, find his/her score in
#reading.
StudentsPerformance$reading.score[StudentsPerformance$math.score + StudentsPerformance$reading.score + StudentsPerformance$writing.score == M & StudentsPerformance$test.preparation.course == "completed"]
## [1] 100