Setting up.
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0 v purrr 0.2.5
## v tibble 1.4.2 v dplyr 0.7.7
## v tidyr 0.8.2 v stringr 1.3.1
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts ------------------------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
student <- read.csv("C:/Users/gene4/OneDrive/R Stuff/pca/PCA/filter.csv", na.strings="",header=TRUE)
How many semesters/quarters did they complete
How many graduated w/ what degree titles (bs/ba/MS etc)
record_found <- student %>% filter(Record.Found.Y.N == "Y")
summary(record_found)
## First.Name Last.Name Record.Found.Y.N Search.Date
## KASIM : 25 ABDULLAH: 25 N: 0 Min. :20090601
## IVETTE : 22 BROWN : 22 Y:319 1st Qu.:20090601
## KIMBERLY : 21 GUZMAN : 22 Median :20090601
## KEVIN : 20 RAMIREZ : 21 Mean :20090601
## ELIZABETH: 19 RAMOS : 21 3rd Qu.:20090601
## OLIVIA : 19 VO : 19 Max. :20090601
## (Other) :193 (Other) :189
## College.Name College.State
## UNIVERSITY OF CALIFORNIA-DAVIS : 57 AZ: 1
## CALIFORNIA STATE UNIVERSITY - EAST BAY: 43 CA:302
## SAN FRANCISCO STATE UNIVERSITY : 31 KY: 9
## COLLEGE OF ALAMEDA : 26 LA: 1
## MERRITT COLLEGE : 26 TX: 6
## LANEY COLLEGE : 24
## (Other) :112
## X2.year...4.year Public...Private Enrollment.Begin Enrollment.End
## Min. :2.000 Private: 16 Min. :20090817 Min. :20091208
## 1st Qu.:2.000 Public :303 1st Qu.:20103217 1st Qu.:20103495
## Median :4.000 Median :20120827 Median :20121214
## Mean :3.172 Mean :20127235 Mean :20127661
## 3rd Qu.:4.000 3rd Qu.:20140818 3rd Qu.:20141212
## Max. :4.000 Max. :20181009 Max. :20181220
## NA's :17 NA's :17
## Graduated. Degree.Title Degree.Major.1
## N:302 BACHELOR OF ARTS : 4 HEALTH SCIENCES : 2
## Y: 17 ASSOCIATE IN ARTS : 2 ANIMAL SCIENCE : 1
## BACHELOR OF SCIENCE: 2 BIOLOGY-BA : 1
## BS : 2 BUS BUSINESS ADMIN-TR: 1
## MASTER OF SCIENCE : 2 COMMUNICATION : 1
## (Other) : 4 (Other) : 10
## NA's :303 NA's :303
semesters_completed <- record_found %>%
group_by(First.Name) %>%
summarise(n=n()) %>%
arrange(desc(n))
semesters_completed
## # A tibble: 31 x 2
## First.Name n
## <fct> <int>
## 1 KASIM 25
## 2 IVETTE 22
## 3 KIMBERLY 21
## 4 KEVIN 20
## 5 ELIZABETH 19
## 6 OLIVIA 19
## 7 BEATRIZ 18
## 8 YESENIA 18
## 9 MARICRUZ 17
## 10 BRANDUN 15
## # ... with 21 more rows
record_found %>%
group_by(Graduated.) %>%
summarise(n=n())
## # A tibble: 2 x 2
## Graduated. n
## <fct> <int>
## 1 N 302
## 2 Y 17
record_found %>%
group_by(Degree.Major.1) %>%
summarise(n=n())
## # A tibble: 16 x 2
## Degree.Major.1 n
## <fct> <int>
## 1 ANIMAL SCIENCE 1
## 2 BIOLOGY-BA 1
## 3 BUS BUSINESS ADMIN-TR 1
## 4 COMMUNICATION 1
## 5 COMMUNICATION TRANSF (CSU GE) 1
## 6 COMMUNITY AND REGIONAL DEVELOP 1
## 7 COUN (MARR, FAM & CHILD)-MS 1
## 8 GLOBAL HEALTH SCIENCES 1
## 9 HEALTH SCIENCES 2
## 10 HLTSC HEALTH SCIENCES 1
## 11 LIBERAL ARTS-EMPH ARTS & HUMAN 1
## 12 MEDAS CLINICAL MEDICAL ASSIST 1
## 13 MEDICAL ASSISTING 1
## 14 PSYCHOLOGY 1
## 15 SOCIOLOGY 1
## 16 <NA> 303
record_found %>%
group_by(Degree.Title) %>%
summarise(n=n())
## # A tibble: 10 x 2
## Degree.Title n
## <fct> <int>
## 1 AS BUS BUSINESS ADMIN-TR 1
## 2 AS HLTSC HEALTH SCIENCES 1
## 3 ASSOCIATE IN ARTS 2
## 4 BACHELOR OF ARTS 4
## 5 BACHELOR OF SCIENCE 2
## 6 BS 2
## 7 CERTIFICATE 1
## 8 CP MEDAS CLINICAL MEDICAL ASSIST 1
## 9 MASTER OF SCIENCE 2
## 10 <NA> 303