Lower Middle Iowa College Recruitment
Load necessary R libraries.
library(dplyr)
library(reshape2)
library(readxl)
Read the Excel file into R, and create a long dataset where each observation has its own row. NA values are omitted and a frequency table is made to represent the amount of times that a post education goal was mentioned. First, the class starting in the Fall of 2017 is analyzed.
RecruitFall17 <- read_excel("~/Spring 2017/Data Analytics/RecruitFall17.xlsx")
View(RecruitFall17)
long17 <- melt(RecruitFall17, "Applicant ID", c(2:6))
long17$variable <- NULL
cleanlong17 <- na.omit(long17)
Ordered17 <- cleanlong17[order(cleanlong17$`Applicant ID`),]
Ordered.freq17=table(Ordered17$value)
Ordered.freq17 <-cbind(Ordered.freq17)
Make sure that the number of rows in the long data set is equal to the sum of frequencies in the frequency table. This ensures every observation is accounted for before moving forward.
sum(Ordered.freq17)
## [1] 1350
nrow(Ordered17)
## [1] 1350
Make the frequency table into a matrix so it can be analyzed.
DF17<-as.data.frame.matrix(Ordered.freq17)
names <- rownames(DF17)
rownames(DF17) <- NULL
data17 <-cbind(names,DF17)
data17
## names Ordered.freq17
## 1 ACCT 15
## 2 ACSC 63
## 3 ANTH 1
## 4 ART 10
## 5 ATR 2
## 6 BCH 3
## 7 BIOL 8
## 8 BMGT 22
## 9 CHEM 7
## 10 COMM 6
## 11 COSC 516
## 12 ECON 5
## 13 ELED 14
## 14 ENGL 5
## 15 ENGR 47
## 16 ENVST 1
## 17 EXNS 1
## 18 EXPL 101
## 19 EXSC 2
## 20 FREN 3
## 21 HIST 7
## 22 INSYS 100
## 23 IS 1
## 24 MATH 283
## 25 MEVO 1
## 26 MUS 9
## 27 NASC 13
## 28 PHIL 1
## 29 PHYS 9
## 30 PREA 8
## 31 PREE 35
## 32 PREL 5
## 33 PREM 5
## 34 PREN 1
## 35 PREP 4
## 36 PREPH 2
## 37 PREV 1
## 38 PSYC 13
## 39 SECED 11
## 40 SOC 4
## 41 SPAN 3
## 42 THEA 1
## 43 VCOGA 1
Filter out the more popular majors and create a barplot. List the remaining less popular majors in a frequency matrix.
Popular17<-filter(data17,data17$Ordered.freq17>=100)
SomewhatPopular17<-filter(data17, data17$Ordered.freq<100, data17$Ordered.freq17>=20)
LessPopular17 <-filter(data17, data17$Ordered.freq17<20)
Fall 2017
barplot(Popular17$Ordered.freq17 , names.arg=Popular17$names, ylab="Frequency", xlab="Post Education Goals", main="Most Popular Post Education Goals '17")

barplot(SomewhatPopular17$Ordered.freq17, names.arg=SomewhatPopular17$names, ylab="Frequency", xlab="Post Education Goals", main="Somewhat Popular Post Education Goals '17")

LessPopular17
## names Ordered.freq17
## 1 ACCT 15
## 2 ANTH 1
## 3 ART 10
## 4 ATR 2
## 5 BCH 3
## 6 BIOL 8
## 7 CHEM 7
## 8 COMM 6
## 9 ECON 5
## 10 ELED 14
## 11 ENGL 5
## 12 ENVST 1
## 13 EXNS 1
## 14 EXSC 2
## 15 FREN 3
## 16 HIST 7
## 17 IS 1
## 18 MEVO 1
## 19 MUS 9
## 20 NASC 13
## 21 PHIL 1
## 22 PHYS 9
## 23 PREA 8
## 24 PREL 5
## 25 PREM 5
## 26 PREN 1
## 27 PREP 4
## 28 PREPH 2
## 29 PREV 1
## 30 PSYC 13
## 31 SECED 11
## 32 SOC 4
## 33 SPAN 3
## 34 THEA 1
## 35 VCOGA 1
Repeat for Fall of 2018 and Fall of 2019
Fall 2018
RecruitFall18 <- read_excel("~/Spring 2017/Data Analytics/RecruitFall18.xlsx")
View(RecruitFall18)
long18 <- melt(RecruitFall18, "Applicant ID", c(2:8))
long18$variable <- NULL
cleanlong18 <- na.omit(long18)
Ordered18 <- cleanlong18[order(cleanlong18$`Applicant ID`),]
Ordered.freq18=table(Ordered18$value)
Ordered.freq18 <-cbind(Ordered.freq18)
sum(Ordered.freq18)
## [1] 678
nrow(Ordered18)
## [1] 678
DF18<-as.data.frame.matrix(Ordered.freq18)
names <- rownames(DF18)
rownames(DF18) <- NULL
data18 <-cbind(names,DF18)
data18
## names Ordered.freq18
## 1 ACCT 9
## 2 ACSC 15
## 3 ART 4
## 4 ATR 1
## 5 BCH 1
## 6 BIOL 6
## 7 BMGT 16
## 8 CHEM 3
## 9 COMM 1
## 10 COSC 276
## 11 ECON 1
## 12 ELED 4
## 13 ENGL 3
## 14 ENGR 22
## 15 EXPL 43
## 16 EXSC 2
## 17 HIST 5
## 18 INSYS 39
## 19 LING 1
## 20 MATH 169
## 21 MEEL 1
## 22 MUS 4
## 23 NASC 14
## 24 PHYS 2
## 25 PREA 1
## 26 PREE 17
## 27 PREL 2
## 28 PREM 1
## 29 PREP 1
## 30 PSYC 8
## 31 SECED 3
## 32 SOC 3
Popular18<-filter(data18,data18$Ordered.freq18>=100)
SomewhatPopular18<-filter(data18, data18$Ordered.freq18<100, data18$Ordered.freq18>=20)
LessPopular18 <-filter(data18, data18$Ordered.freq18<20)
barplot(Popular18$Ordered.freq18 , names.arg=Popular18$names, ylab="Frequency", xlab="Post Education Goals", main="Most Popular Post Education Goals '18")

barplot(SomewhatPopular18$Ordered.freq18, names.arg=SomewhatPopular18$names, ylab="Frequency", xlab="Post Education Goals", main="Somewhat Popular Post Education Goals '18")

LessPopular18
## names Ordered.freq18
## 1 ACCT 9
## 2 ACSC 15
## 3 ART 4
## 4 ATR 1
## 5 BCH 1
## 6 BIOL 6
## 7 BMGT 16
## 8 CHEM 3
## 9 COMM 1
## 10 ECON 1
## 11 ELED 4
## 12 ENGL 3
## 13 EXSC 2
## 14 HIST 5
## 15 LING 1
## 16 MEEL 1
## 17 MUS 4
## 18 NASC 14
## 19 PHYS 2
## 20 PREA 1
## 21 PREE 17
## 22 PREL 2
## 23 PREM 1
## 24 PREP 1
## 25 PSYC 8
## 26 SECED 3
## 27 SOC 3
Fall 2019
RecruitFall19 <- read_excel("~/Spring 2017/Data Analytics/RecruitFall19.xlsx")
View(RecruitFall19)
long19 <- melt(RecruitFall19, "Applicant ID", c(2:5))
long19$variable <- NULL
cleanlong19 <- na.omit(long19)
Ordered19 <- cleanlong19[order(cleanlong19$`Applicant ID`),]
Ordered.freq19=table(Ordered19$value)
Ordered.freq19 <-cbind(Ordered.freq19)
sum(Ordered.freq19)
## [1] 181
nrow(Ordered19)
## [1] 181
DF19<-as.data.frame.matrix(Ordered.freq19)
names <- rownames(DF19)
rownames(DF19) <- NULL
data19 <-cbind(names,DF19)
data19
## names Ordered.freq19
## 1 ACCT 2
## 2 ACSC 4
## 3 ART 2
## 4 BIOL 2
## 5 BMGT 4
## 6 COSC 53
## 7 ELED 3
## 8 ENGL 2
## 9 ENGR 14
## 10 EXPL 17
## 11 EXSC 2
## 12 HIST 2
## 13 INSYS 4
## 14 MATH 55
## 15 MUS 3
## 16 NASC 3
## 17 PHYS 2
## 18 PREM 1
## 19 PREP 1
## 20 PSYC 1
## 21 SECED 1
## 22 SOC 2
## 23 THEA 1
Popular19<-filter(data19,data19$Ordered.freq19>=14)
LessPopular19 <-filter(data19, data19$Ordered.freq19<14)
Fall 2019
barplot(Popular19$Ordered.freq19 , names.arg=Popular19$names, ylab="Frequency", xlab="Post Education Goals", main="Most Popular Post Education Goals '19")

LessPopular19
## names Ordered.freq19
## 1 ACCT 2
## 2 ACSC 4
## 3 ART 2
## 4 BIOL 2
## 5 BMGT 4
## 6 ELED 3
## 7 ENGL 2
## 8 EXSC 2
## 9 HIST 2
## 10 INSYS 4
## 11 MUS 3
## 12 NASC 3
## 13 PHYS 2
## 14 PREM 1
## 15 PREP 1
## 16 PSYC 1
## 17 SECED 1
## 18 SOC 2
## 19 THEA 1