grad<-read.csv("grad-rate.csv", colClasses="character")
str(grad)
## 'data.frame': 1252 obs. of 3 variables:
## $ School : chr "Juilliard School" "Bowdoin College" "College of the Holy Cross" "Amherst College" ...
## $ City..State : chr "New York, NY" "Brunswick, ME" "Worcester, MA" "Amherst, MA" ...
## $ X4.year.graduation.rate: chr "94%" "91%" "91%" "90%" ...
grad$rate<-as.numeric(gsub("%", "", grad$X4.year.graduation.rate))
hist(grad[,4],xlab="Graduation Rate", main="Histogram of Graduation Rate", col='skyblue', border='white')

str(grad)
## 'data.frame': 1252 obs. of 4 variables:
## $ School : chr "Juilliard School" "Bowdoin College" "College of the Holy Cross" "Amherst College" ...
## $ City..State : chr "New York, NY" "Brunswick, ME" "Worcester, MA" "Amherst, MA" ...
## $ X4.year.graduation.rate: chr "94%" "91%" "91%" "90%" ...
## $ rate : num 94 91 91 90 90 90 90 90 90 90 ...
grad$n_char<-nchar(grad$City..State)
str(grad)
## 'data.frame': 1252 obs. of 5 variables:
## $ School : chr "Juilliard School" "Bowdoin College" "College of the Holy Cross" "Amherst College" ...
## $ City..State : chr "New York, NY" "Brunswick, ME" "Worcester, MA" "Amherst, MA" ...
## $ X4.year.graduation.rate: chr "94%" "91%" "91%" "90%" ...
## $ rate : num 94 91 91 90 90 90 90 90 90 90 ...
## $ n_char : int 12 13 13 11 15 14 11 15 13 14 ...
grad$State<-substr(grad$City..State,grad$n_char-1, grad$n_char)
str(grad)
## 'data.frame': 1252 obs. of 6 variables:
## $ School : chr "Juilliard School" "Bowdoin College" "College of the Holy Cross" "Amherst College" ...
## $ City..State : chr "New York, NY" "Brunswick, ME" "Worcester, MA" "Amherst, MA" ...
## $ X4.year.graduation.rate: chr "94%" "91%" "91%" "90%" ...
## $ rate : num 94 91 91 90 90 90 90 90 90 90 ...
## $ n_char : int 12 13 13 11 15 14 11 15 13 14 ...
## $ State : chr "NY" "ME" "MA" "MA" ...
grad$City<-substr(grad$City..State, 1, grad$n_char-4)
str(grad)
## 'data.frame': 1252 obs. of 7 variables:
## $ School : chr "Juilliard School" "Bowdoin College" "College of the Holy Cross" "Amherst College" ...
## $ City..State : chr "New York, NY" "Brunswick, ME" "Worcester, MA" "Amherst, MA" ...
## $ X4.year.graduation.rate: chr "94%" "91%" "91%" "90%" ...
## $ rate : num 94 91 91 90 90 90 90 90 90 90 ...
## $ n_char : int 12 13 13 11 15 14 11 15 13 14 ...
## $ State : chr "NY" "ME" "MA" "MA" ...
## $ City : chr "New York" "Brunswick" "Worcester" "Amherst" ...
rankgrad<-grad[,c(1,4,6,7)]
str(rankgrad)
## 'data.frame': 1252 obs. of 4 variables:
## $ School: chr "Juilliard School" "Bowdoin College" "College of the Holy Cross" "Amherst College" ...
## $ rate : num 94 91 91 90 90 90 90 90 90 90 ...
## $ State : chr "NY" "ME" "MA" "MA" ...
## $ City : chr "New York" "Brunswick" "Worcester" "Amherst" ...
rankgrad<-rankgrad[order(rankgrad[,2],decreasing=TRUE),]
head(rankgrad)
## School rate State City
## 1 Juilliard School 94 NY New York
## 2 Bowdoin College 91 ME Brunswick
## 3 College of the Holy Cross 91 MA Worcester
## 4 Amherst College 90 MA Amherst
## 5 Babson College 90 MA Babson Park
## 6 Georgetown University 90 DC Washington
best_in_state<-function(state1) {
dfdata<-data.frame(rankgrad)
if(!state1 %in% unique(dfdata$State)){stop("invalid state")}
df<-dfdata[dfdata$State==state1,]
new_df<-df[complete.cases(df),]
new_df<-new_df[order(new_df[,2], decreasing=TRUE),]
print(head(new_df))
print(tail(new_df))
ncollege<-nrow(new_df)
print(ncollege)
min_df<-df[which.min(new_df[,2]), 1]
max_df<-df[which.max(new_df[,2]), 1]
print(max_df)
print(min_df)
}
best_in_state("NY")
## School rate State City
## 1 Juilliard School 94 NY New York
## 7 Hamilton College 90 NY Clinton
## 19 Colgate University 88 NY Hamilton
## 20 Columbia University 88 NY New York
## 28 Barnard College 87 NY New York
## 44 Cornell University 85 NY Ithaca
## School rate State City
## 1063 CUNY--College of Staten Island 20 NY Staten Island
## 1064 CUNY--Lehman College 20 NY Bronx
## 1182 CUNY--City College 12 NY New York
## 1222 Metropolitan College of New York 8 NY New York
## 1228 CUNY--York College 7 NY Jamaica
## 1236 CUNY--Medgar Evers College 6 NY Brooklyn
## [1] 108
## [1] "Juilliard School"
## [1] "CUNY--Medgar Evers College"
best_in_state("TX")
## School rate State City
## 61 Rice University 83 TX Houston
## 148 Trinity University 72 TX San Antonio
## 162 Southern Methodist University 71 TX Dallas
## 184 Texas Christian University 69 TX Fort Worth
## 209 Southwestern University 67 TX Georgetown
## 229 University of Dallas 65 TX Irving
## School rate State City
## 1153 University of Texas--San Antonio 15 TX San Antonio
## 1176 Prairie View A&M University 13 TX Prairie View
## 1226 Texas Southern University 8 TX Houston
## 1229 Jarvis Christian College 7 TX Hawkins
## 1240 Wayland Baptist University 6 TX Plainview
## 1246 University of Houston--Downtown 3 TX Houston
## [1] 53
## [1] "Rice University"
## [1] "University of Houston--Downtown"
rankrate<-function(state2, outcome2, num='best'){
dfdat<-data.frame(rankgrad)
if(!state2 %in% unique(dfdat$State)){stop("invalid state")}
df1<-dfdat[dfdat$State==state2,]
new_df1<-df1[complete.cases(df1),]
new_df1<-new_df1[order(new_df1[,2], decreasing=TRUE),]
mrate<-nrow(new_df1)
print(head(new_df1))
print(mrate)
switch(num, "best"={num=1}, "worst"={num=mrate})
if(num>mrate){return(NA)}
rank_df<-new_df1[num ,1]
rank_df
}
rankrate("NY", "rate", 1)
## School rate State City
## 1 Juilliard School 94 NY New York
## 7 Hamilton College 90 NY Clinton
## 19 Colgate University 88 NY Hamilton
## 20 Columbia University 88 NY New York
## 28 Barnard College 87 NY New York
## 44 Cornell University 85 NY Ithaca
## [1] 108
## [1] "Juilliard School"
rankrate("TX", "rate", "worst")
## School rate State City
## 61 Rice University 83 TX Houston
## 148 Trinity University 72 TX San Antonio
## 162 Southern Methodist University 71 TX Dallas
## 184 Texas Christian University 69 TX Fort Worth
## 209 Southwestern University 67 TX Georgetown
## 229 University of Dallas 65 TX Irving
## [1] 53
## [1] "University of Houston--Downtown"