- Import the two Census Files into R
library(tidyverse)
census_term1 <- read_csv("https://raw.githubusercontent.com/bpattizUCM/MidAIR2017/master/Census%20Term1.csv")
census_term2 <- read_csv("https://raw.githubusercontent.com/bpattizUCM/MidAIR2017/master/Census%20Term2.csv")
# Read Majors and Schools Code File
majors_code_file <- read_csv("https://raw.githubusercontent.com/bpattizUCM/MidAIR2017/master/Major%20Codes.csv") %>% select(-Prop)
- Apply the following function to both Census Files to clean up the major codes
CleanUpMajorCodes <- function(dataset){
library(stringr)
df <- dataset %>%
mutate(MAJOR_CODE =
case_when(str_length(MAJOR_CODE) == 1 ~ str_c("00000", MAJOR_CODE),
str_length(MAJOR_CODE) == 2 ~ str_c("0000", MAJOR_CODE),
str_length(MAJOR_CODE) == 3 ~ str_c("000", MAJOR_CODE)))
return(df)
}
census_term1_tidy_df <- CleanUpMajorCodes(census_term1)
census_term2_tidy_df <- CleanUpMajorCodes(census_term2)
- Merge the Majors and Schools Code File to both Census Files
census_term1_tidy_dataframe <-
census_term1_tidy_df %>%
left_join(majors_code_file)
census_term2_tidy_dataframe <-
census_term2_tidy_df %>%
left_join(majors_code_file)
- Calculate the retention rate of all first-time freshmen (where Stype is ‘N’)
census_term1_ftf <-
census_term1_tidy_dataframe %>%
filter(Stype == "N")
mean(census_term1_ftf$ID %in% census_term2_tidy_dataframe$ID)
## [1] 0.7099935
- How many first-time freshmen retained to Term 2 but are part-time? (Hours < 12)
census_term2_tidy_df %>%
filter(Hours < 12 & (ID %in% census_term1_ftf$ID)) %>%
summarise(n())
## # A tibble: 1 x 1
## `n()`
## <int>
## 1 266
- Get the table of students in the first term College of Education students whose ACT scores are higher than 22, HS GPA is above 3.5 and are full time
census_term1_tidy_dataframe %>%
filter(COLLEGE == "Education" & ACT > 22 & HS_GPA > 3.5 & Hours >= 12)
## # A tibble: 57 x 16
## ID Race Age Sex HS_GPA ACT ACT_ENG ACT_MATH ACT_READ
## <int> <chr> <int> <chr> <dbl> <int> <int> <int> <int>
## 1 121 White 32 Female 3.71 24 24 26 23
## 2 133 Black 20 Female 3.90 23 21 22 20
## 3 278 Hispanic 32 Male 3.78 23 25 24 23
## 4 479 Asian 30 Female 3.71 24 24 21 22
## 5 556 White 32 Female 3.69 32 33 34 30
## 6 619 Hispanic 33 Male 3.59 25 25 24 26
## 7 1029 White 18 Male 3.59 28 28 29 31
## 8 1041 Black 18 Male 3.59 29 27 26 30
## 9 1081 Black 35 Male 3.68 23 24 19 22
## 10 1816 White 18 Male 3.57 23 22 24 22
## # ... with 47 more rows, and 7 more variables: ACT_SCI <int>,
## # MAJOR_CODE <chr>, Stype <chr>, Hours <dbl>, MAJOR_TITLE <chr>,
## # COLLEGE <chr>, DEPARTMENT <chr>
- Create a new field in the Majors and Schools file that creates codes for each College. Call this variable “COLLEGE_CODE” and encode it as the following: For the Arts, Humanities, and Social Sciences “CAHSS”, Business and Professional Studies “BPS”, Education “ED”, Provost (“PROVOST”), and Undesignated/Open Option (“UNDECIDED”).
majors_code_file_df <-
majors_code_file %>%
mutate(COLLEGE_CODE =
case_when(COLLEGE == "Arts, Humanities, and Social Sciences" ~ "CAHSS",
COLLEGE == "Business and Professional Studies" ~ "BPS",
COLLEGE == "Education" ~ "ED",
COLLEGE == "Health, Science and Technology" ~ "HST",
COLLEGE == "Provost" ~ "PROVOST",
COLLEGE == "Undesignated/Open Option" ~ "UNDECIDED"))
head(majors_code_file_df$COLLEGE_CODE)
## [1] "UNDECIDED" "UNDECIDED" "HST" "ED" "HST" "HST"
- For first-time freshmen who retained to the second term, find their minimum, maxmum, mean and standard deviation of their ACT and HS GPA
census_term1_ftf %>%
filter(ID %in% census_term2_tidy_df$ID) %>%
summarise(min(ACT, na.rm = TRUE), max(ACT, na.rm = TRUE), mean(ACT, na.rm = TRUE), sd(ACT, na.rm = TRUE),
min(HS_GPA), max(HS_GPA), mean(HS_GPA), sd(HS_GPA))
## # A tibble: 1 x 8
## `min(ACT, na.rm = TRUE)` `max(ACT, na.rm = TRUE)`
## <dbl> <dbl>
## 1 12 34
## # ... with 6 more variables: `mean(ACT, na.rm = TRUE)` <dbl>, `sd(ACT,
## # na.rm = TRUE)` <dbl>, `min(HS_GPA)` <dbl>, `max(HS_GPA)` <dbl>,
## # `mean(HS_GPA)` <dbl>, `sd(HS_GPA)` <dbl>
- From the Census File for Term 2, compute the counts of students by ethnicity and student type
census_term2_tidy_df %>%
group_by(Race, Stype) %>%
summarise(Count = n())
## # A tibble: 36 x 3
## # Groups: Race [?]
## Race Stype Count
## <chr> <chr> <int>
## 1 Asian C 179
## 2 Asian M 51
## 3 Asian N 89
## 4 Asian R 5
## 5 Asian T 60
## 6 Bi-Racial C 52
## 7 Bi-Racial M 15
## 8 Bi-Racial N 38
## 9 Bi-Racial R 3
## 10 Bi-Racial T 32
## # ... with 26 more rows
- Calculate the Perentage Change in Biology Majors from Census 1 to Census 2 ((N2 - N1)/N2
biology_census1 <-
census_term1_tidy_dataframe %>%
filter(MAJOR_TITLE == "Biology")
n_biology_census1 <- nrow(biology_census1)
biology_census2 <-
census_term2_tidy_dataframe %>%
filter(MAJOR_TITLE == "Biology")
n_biology_census2 <- nrow(biology_census2)
(n_biology_census2 - n_biology_census1)/n_biology_census1
## [1] -0.1628571