grad<-read.csv("grad-rate.csv", colClasses="character")
str(grad)
## 'data.frame':    1252 obs. of  3 variables:
##  $ School                 : chr  "Juilliard School" "Bowdoin College" "College of the Holy Cross" "Amherst College" ...
##  $ City..State            : chr  "New York, NY" "Brunswick, ME" "Worcester, MA" "Amherst, MA" ...
##  $ X4.year.graduation.rate: chr  "94%" "91%" "91%" "90%" ...
grad$rate<-as.numeric(gsub("%", "", grad$X4.year.graduation.rate))
hist(grad[,4],xlab="Graduation Rate", main="Histogram of Graduation Rate", col='skyblue', border='white')

str(grad)
## 'data.frame':    1252 obs. of  4 variables:
##  $ School                 : chr  "Juilliard School" "Bowdoin College" "College of the Holy Cross" "Amherst College" ...
##  $ City..State            : chr  "New York, NY" "Brunswick, ME" "Worcester, MA" "Amherst, MA" ...
##  $ X4.year.graduation.rate: chr  "94%" "91%" "91%" "90%" ...
##  $ rate                   : num  94 91 91 90 90 90 90 90 90 90 ...
grad$n_char<-nchar(grad$City..State)
str(grad)
## 'data.frame':    1252 obs. of  5 variables:
##  $ School                 : chr  "Juilliard School" "Bowdoin College" "College of the Holy Cross" "Amherst College" ...
##  $ City..State            : chr  "New York, NY" "Brunswick, ME" "Worcester, MA" "Amherst, MA" ...
##  $ X4.year.graduation.rate: chr  "94%" "91%" "91%" "90%" ...
##  $ rate                   : num  94 91 91 90 90 90 90 90 90 90 ...
##  $ n_char                 : int  12 13 13 11 15 14 11 15 13 14 ...
grad$State<-substr(grad$City..State,grad$n_char-1, grad$n_char)
str(grad)
## 'data.frame':    1252 obs. of  6 variables:
##  $ School                 : chr  "Juilliard School" "Bowdoin College" "College of the Holy Cross" "Amherst College" ...
##  $ City..State            : chr  "New York, NY" "Brunswick, ME" "Worcester, MA" "Amherst, MA" ...
##  $ X4.year.graduation.rate: chr  "94%" "91%" "91%" "90%" ...
##  $ rate                   : num  94 91 91 90 90 90 90 90 90 90 ...
##  $ n_char                 : int  12 13 13 11 15 14 11 15 13 14 ...
##  $ State                  : chr  "NY" "ME" "MA" "MA" ...
grad$City<-substr(grad$City..State, 1, grad$n_char-4)
str(grad)
## 'data.frame':    1252 obs. of  7 variables:
##  $ School                 : chr  "Juilliard School" "Bowdoin College" "College of the Holy Cross" "Amherst College" ...
##  $ City..State            : chr  "New York, NY" "Brunswick, ME" "Worcester, MA" "Amherst, MA" ...
##  $ X4.year.graduation.rate: chr  "94%" "91%" "91%" "90%" ...
##  $ rate                   : num  94 91 91 90 90 90 90 90 90 90 ...
##  $ n_char                 : int  12 13 13 11 15 14 11 15 13 14 ...
##  $ State                  : chr  "NY" "ME" "MA" "MA" ...
##  $ City                   : chr  "New York" "Brunswick" "Worcester" "Amherst" ...
rankgrad<-grad[,c(1,4,6,7)]
str(rankgrad)
## 'data.frame':    1252 obs. of  4 variables:
##  $ School: chr  "Juilliard School" "Bowdoin College" "College of the Holy Cross" "Amherst College" ...
##  $ rate  : num  94 91 91 90 90 90 90 90 90 90 ...
##  $ State : chr  "NY" "ME" "MA" "MA" ...
##  $ City  : chr  "New York" "Brunswick" "Worcester" "Amherst" ...
rankgrad<-rankgrad[order(rankgrad[,2],decreasing=TRUE),]
head(rankgrad)
##                      School rate State        City
## 1          Juilliard School   94    NY    New York
## 2           Bowdoin College   91    ME   Brunswick
## 3 College of the Holy Cross   91    MA   Worcester
## 4           Amherst College   90    MA     Amherst
## 5            Babson College   90    MA Babson Park
## 6     Georgetown University   90    DC  Washington
best_in_state<-function(state1) {
  dfdata<-data.frame(rankgrad)
  if(!state1 %in% unique(dfdata$State)){stop("invalid state")}
  df<-dfdata[dfdata$State==state1,]
  new_df<-df[complete.cases(df),]
  new_df<-new_df[order(new_df[,2], decreasing=TRUE),]
  print(head(new_df))
  print(tail(new_df))
  ncollege<-nrow(new_df)
  print(ncollege)
  min_df<-df[which.min(new_df[,2]), 1]
  max_df<-df[which.max(new_df[,2]), 1]
  print(max_df)
  print(min_df)
}
best_in_state("NY")
##                 School rate State     City
## 1     Juilliard School   94    NY New York
## 7     Hamilton College   90    NY  Clinton
## 19  Colgate University   88    NY Hamilton
## 20 Columbia University   88    NY New York
## 28     Barnard College   87    NY New York
## 44  Cornell University   85    NY   Ithaca
##                                School rate State          City
## 1063   CUNY--College of Staten Island   20    NY Staten Island
## 1064             CUNY--Lehman College   20    NY         Bronx
## 1182               CUNY--City College   12    NY      New York
## 1222 Metropolitan College of New York    8    NY      New York
## 1228               CUNY--York College    7    NY       Jamaica
## 1236       CUNY--Medgar Evers College    6    NY      Brooklyn
## [1] 108
## [1] "Juilliard School"
## [1] "CUNY--Medgar Evers College"
best_in_state("TX")
##                            School rate State        City
## 61                Rice University   83    TX     Houston
## 148            Trinity University   72    TX San Antonio
## 162 Southern Methodist University   71    TX      Dallas
## 184    Texas Christian University   69    TX  Fort Worth
## 209       Southwestern University   67    TX  Georgetown
## 229          University of Dallas   65    TX      Irving
##                                School rate State         City
## 1153 University of Texas--San Antonio   15    TX  San Antonio
## 1176      Prairie View A&M University   13    TX Prairie View
## 1226        Texas Southern University    8    TX      Houston
## 1229         Jarvis Christian College    7    TX      Hawkins
## 1240       Wayland Baptist University    6    TX    Plainview
## 1246  University of Houston--Downtown    3    TX      Houston
## [1] 53
## [1] "Rice University"
## [1] "University of Houston--Downtown"
rankrate<-function(state2, outcome2, num='best'){
  dfdat<-data.frame(rankgrad)
  if(!state2 %in% unique(dfdat$State)){stop("invalid state")}
  df1<-dfdat[dfdat$State==state2,]
  new_df1<-df1[complete.cases(df1),]
  new_df1<-new_df1[order(new_df1[,2], decreasing=TRUE),]
  mrate<-nrow(new_df1)
  print(head(new_df1))
  print(mrate)
  switch(num, "best"={num=1}, "worst"={num=mrate})
  if(num>mrate){return(NA)}
  rank_df<-new_df1[num ,1]
  rank_df
}
rankrate("NY", "rate", 1)
##                 School rate State     City
## 1     Juilliard School   94    NY New York
## 7     Hamilton College   90    NY  Clinton
## 19  Colgate University   88    NY Hamilton
## 20 Columbia University   88    NY New York
## 28     Barnard College   87    NY New York
## 44  Cornell University   85    NY   Ithaca
## [1] 108
## [1] "Juilliard School"
rankrate("TX", "rate", "worst")
##                            School rate State        City
## 61                Rice University   83    TX     Houston
## 148            Trinity University   72    TX San Antonio
## 162 Southern Methodist University   71    TX      Dallas
## 184    Texas Christian University   69    TX  Fort Worth
## 209       Southwestern University   67    TX  Georgetown
## 229          University of Dallas   65    TX      Irving
## [1] 53
## [1] "University of Houston--Downtown"