This is a demonstration of BCO6007 Lecture 4 12.08.20 class examples in html format
Assign variable "grad" to graduate-programs.csv and read 1st 2 rows
library(tidyverse)
grad<-read_csv("graduate-programs.csv")
head(grad, n=2)
## # A tibble: 2 x 16 ## subject Inst AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg… ## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 econom… ARIZ… 0.9 1.57 31.3 31.7 5.6 ## 2 econom… AUBU… 0.79 0.64 77.6 44.4 3.84 ## # … with 9 more variables: PctMinorityFac <dbl>, PctFemaleFac <dbl>, ## # PctFemaleStud <dbl>, PctIntlStud <dbl>, AvNumPhDs <dbl>, AvGREs <dbl>, ## # TotFac <dbl>, PctAsstProf <dbl>, NumStud <dbl>
Using group() function on grad by subject and avocado by region show various icl. slice()
group_grad<-grad %>% group_by(subject) avocado<-read_csv("avocado.csv")
## Warning: Missing column names filled in: 'X1' [1]
group_avocado <- avocado %>% group_by(region) grad %>% slice_head(n=2)
## # A tibble: 2 x 16 ## subject Inst AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg… ## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 econom… ARIZ… 0.9 1.57 31.3 31.7 5.6 ## 2 econom… AUBU… 0.79 0.64 77.6 44.4 3.84 ## # … with 9 more variables: PctMinorityFac <dbl>, PctFemaleFac <dbl>, ## # PctFemaleStud <dbl>, PctIntlStud <dbl>, AvNumPhDs <dbl>, AvGREs <dbl>, ## # TotFac <dbl>, PctAsstProf <dbl>, NumStud <dbl>
avocado %>% slice_tail(n=2)
## # A tibble: 2 x 14 ## X1 Date AveragePrice `Total Volume` `4046` `4225` `4770` `Total Bags` ## <dbl> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 10 2018-01-14 1.93 16205. 1528. 2981. 727. 10970. ## 2 11 2018-01-07 1.62 17490. 2895. 2356. 225. 12014. ## # … with 6 more variables: `Small Bags` <dbl>, `Large Bags` <dbl>, `XLarge ## # Bags` <dbl>, type <chr>, year <dbl>, region <chr>
grad %>% group_by(subject)%>% slice_head(n=2)
## # A tibble: 8 x 16 ## # Groups: subject [4] ## subject Inst AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg… ## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 astron… CALI… 6.57 6.44 100 39.3 6 ## 2 astron… COLU… 4.48 4.47 100 58.7 6 ## 3 econom… ARIZ… 0.9 1.57 31.3 31.7 5.6 ## 4 econom… AUBU… 0.79 0.64 77.6 44.4 3.84 ## 5 entomo… CLEM… 1.04 0.59 89.9 63.3 5.5 ## 6 entomo… CORN… 1.77 1.37 90.2 37.2 6.4 ## 7 psycho… AMER… 0.63 0.94 25 47.4 6.7 ## 8 psycho… ARIZ… 1.55 2.91 80.5 26.1 7 ## # … with 9 more variables: PctMinorityFac <dbl>, PctFemaleFac <dbl>, ## # PctFemaleStud <dbl>, PctIntlStud <dbl>, AvNumPhDs <dbl>, AvGREs <dbl>, ## # TotFac <dbl>, PctAsstProf <dbl>, NumStud <dbl>
avocado%>% group_by(region)%>% slice_tail(n=5)
## # A tibble: 270 x 14 ## # Groups: region [54] ## X1 Date AveragePrice `Total Volume` `4046` `4225` `4770` ## <dbl> <date> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 7 2018-02-04 1.52 4125. 118. 420. 0 ## 2 8 2018-01-28 1.32 6988. 434. 375. 0 ## 3 9 2018-01-21 1.54 3347. 14.7 253. 0 ## 4 10 2018-01-14 1.47 4141. 7.3 302. 0 ## 5 11 2018-01-07 1.54 4817. 43.5 412. 0 ## 6 7 2018-02-04 1.62 11900. 384. 4043. 0 ## 7 8 2018-01-28 1.67 14446. 390. 5130. 0 ## 8 9 2018-01-21 1.64 18555. 349. 3968. 0 ## 9 10 2018-01-14 1.56 16152. 292. 3583. 0 ## 10 11 2018-01-07 1.53 15714. 405. 4195. 0 ## # … with 260 more rows, and 7 more variables: `Total Bags` <dbl>, `Small ## # Bags` <dbl>, `Large Bags` <dbl>, `XLarge Bags` <dbl>, type <chr>, ## # year <dbl>, region <chr>
grad %>% slice_min(order_by=NumStud, n=3)
## # A tibble: 5 x 16 ## subject Inst AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg… ## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 psycho… FLOR… 0.23 0.580 0 50 2.5 ## 2 entomo… OREG… 0.44 0.93 100 60.7 5.88 ## 3 entomo… UNIV… 1.48 0.41 82.9 64.3 3.9 ## 4 psycho… UNIV… 1.87 3.47 63.6 34 7.5 ## 5 psycho… UNIV… 0.07 1.43 0 72.3 5 ## # … with 9 more variables: PctMinorityFac <dbl>, PctFemaleFac <dbl>, ## # PctFemaleStud <dbl>, PctIntlStud <dbl>, AvNumPhDs <dbl>, AvGREs <dbl>, ## # TotFac <dbl>, PctAsstProf <dbl>, NumStud <dbl>
grad %>% group_by(subject) %>% slice_min(order_by=NumStud, n=2, with_ties = FALSE)
## # A tibble: 8 x 16 ## # Groups: subject [4] ## subject Inst AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg… ## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 astron… UNIV… 5.98 2.01 100 53.3 6 ## 2 astron… MICH… 4.56 3.89 87.4 36.7 4.3 ## 3 econom… UNIV… 0.05 0.39 60.9 0 4.83 ## 4 econom… UNIV… 0.6 0.71 60 53 6 ## 5 entomo… OREG… 0.44 0.93 100 60.7 5.88 ## 6 entomo… UNIV… 1.48 0.41 82.9 64.3 3.9 ## 7 psycho… FLOR… 0.23 0.580 0 50 2.5 ## 8 psycho… UNIV… 1.87 3.47 63.6 34 7.5 ## # … with 9 more variables: PctMinorityFac <dbl>, PctFemaleFac <dbl>, ## # PctFemaleStud <dbl>, PctIntlStud <dbl>, AvNumPhDs <dbl>, AvGREs <dbl>, ## # TotFac <dbl>, PctAsstProf <dbl>, NumStud <dbl>
avocado %>% slice_max(order_by=AveragePrice, n=5)
## # A tibble: 5 x 14 ## X1 Date AveragePrice `Total Volume` `4046` `4225` `4770` `Total Bags` ## <dbl> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 8 2016-10-30 3.25 16701. 2326. 1.11e4 0 3232. ## 2 37 2017-04-16 3.17 3019. 1256. 8.23e1 0 1681. ## 3 7 2016-11-06 3.12 19044. 5898. 1.00e4 0 3106. ## 4 42 2017-03-12 3.05 2068. 1044. 7.74e1 0 947. ## 5 18 2017-08-27 3.04 12656. 419. 4.85e3 145. 7240. ## # … with 6 more variables: `Small Bags` <dbl>, `Large Bags` <dbl>, `XLarge ## # Bags` <dbl>, type <chr>, year <dbl>, region <chr>
grad %>% group_by(subject) %>% slice_max(order_by=NumStud, n=2, with_ties = FALSE)
## # A tibble: 8 x 16 ## # Groups: subject [4] ## subject Inst AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg… ## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 astron… UNIV… 3.4 3.24 86.8 70.7 5 ## 2 astron… UNIV… 3.33 2.84 95 51.3 5.72 ## 3 econom… UNIV… 0.61 3.44 54.8 62.5 5.5 ## 4 econom… UNIV… 0.79 2.68 71.4 42.6 5.7 ## 5 entomo… UNIV… 1.49 1 85.4 48 5.33 ## 6 entomo… UNIV… 2.69 1.35 95.9 48.7 5 ## 7 psycho… UNIV… 1.39 3.3 57.2 28.1 6.17 ## 8 psycho… UNIV… 1.05 1.72 65.6 34.2 6 ## # … with 9 more variables: PctMinorityFac <dbl>, PctFemaleFac <dbl>, ## # PctFemaleStud <dbl>, PctIntlStud <dbl>, AvNumPhDs <dbl>, AvGREs <dbl>, ## # TotFac <dbl>, PctAsstProf <dbl>, NumStud <dbl>
grad%>% count(subject)%>% slice_head(n=3)
## # A tibble: 3 x 2 ## subject n ## <chr> <int> ## 1 astronomy 32 ## 2 economics 117 ## 3 entomology 27
grad%>% count(subject, Inst)%>% slice_head(n=3)
## # A tibble: 3 x 3 ## subject Inst n ## <chr> <chr> <int> ## 1 astronomy CALIFORNIA INSTITUTE OF TECHNOLOGY 1 ## 2 astronomy COLUMBIA UNIVERSITY IN THE CITY OF NEW YORK 1 ## 3 astronomy CORNELL UNIVERSITY 1
avocado_counted<-avocado %>% add_count(region) %>% select(X1, region, n) avocado_counted %>% slice_head(n=5)
## # A tibble: 5 x 3 ## X1 region n ## <dbl> <chr> <int> ## 1 0 Albany 338 ## 2 1 Albany 338 ## 3 2 Albany 338 ## 4 3 Albany 338 ## 5 4 Albany 338
grad %>% mutate(Student2Staff=NumStud/TotFac)%>% slice_head(n=5)
## # A tibble: 5 x 17 ## subject Inst AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg… ## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 econom… ARIZ… 0.9 1.57 31.3 31.7 5.6 ## 2 econom… AUBU… 0.79 0.64 77.6 44.4 3.84 ## 3 econom… BOST… 0.51 1.03 43.5 46.8 5 ## 4 econom… BOST… 0.49 2.66 36.9 34.2 5.5 ## 5 econom… BRAN… 0.3 3.03 36.8 48.7 5.29 ## # … with 10 more variables: PctMinorityFac <dbl>, PctFemaleFac <dbl>, ## # PctFemaleStud <dbl>, PctIntlStud <dbl>, AvNumPhDs <dbl>, AvGREs <dbl>, ## # TotFac <dbl>, PctAsstProf <dbl>, NumStud <dbl>, Student2Staff <dbl>
grad %>% summarize(mean = mean(NumStud), n = n())
## # A tibble: 1 x 2 ## mean n ## <dbl> <int> ## 1 54.5 412
grad %>% group_by(subject)%>% summarize(mean = mean(NumStud), n = n())
## # A tibble: 4 x 3 ## subject mean n ## <chr> <dbl> <int> ## 1 astronomy 30.0 32 ## 2 economics 60.7 117 ## 3 entomology 20.1 27 ## 4 psychology 58.7 236
grad %>% filter(NumStud>100)
## # A tibble: 56 x 16 ## subject Inst AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg… ## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 econom… BOST… 0.49 2.66 36.9 34.2 5.5 ## 2 econom… COLU… 0.62 1.52 60.2 43.4 6 ## 3 econom… CORN… 0.71 1.61 59.7 67.9 5.4 ## 4 econom… HARV… 1.17 4.26 68.9 79.7 5 ## 5 econom… MASS… 1.23 3.17 55.7 81.8 4.8 ## 6 econom… NEW … 0.76 2.35 71.3 56.4 5.67 ## 7 econom… NORT… 0.53 2.43 48.6 54.6 5.1 ## 8 econom… OHIO… 0.51 1.09 42 32.3 5.75 ## 9 econom… STAN… 0.52 2.73 45.3 58.7 6 ## 10 econom… TEXA… 0.46 0.73 43.9 44.1 5 ## # … with 46 more rows, and 9 more variables: PctMinorityFac <dbl>, ## # PctFemaleFac <dbl>, PctFemaleStud <dbl>, PctIntlStud <dbl>, ## # AvNumPhDs <dbl>, AvGREs <dbl>, TotFac <dbl>, PctAsstProf <dbl>, ## # NumStud <dbl>
grad %>% group_by(Inst)%>% filter(NumStud>mean(NumStud))
## # A tibble: 170 x 16 ## # Groups: Inst [127] ## subject Inst AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg… ## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 econom… BOST… 0.51 1.03 43.5 46.8 5 ## 2 econom… BOST… 0.49 2.66 36.9 34.2 5.5 ## 3 econom… BRAN… 0.3 3.03 36.8 48.7 5.29 ## 4 econom… BROW… 0.84 2.31 27.1 54.6 6 ## 5 econom… CALI… 0.99 2.31 56.4 83.3 4 ## 6 econom… CARN… 0.43 1.67 35.2 45.6 5.05 ## 7 econom… CITY… 0.35 1.06 38.1 27.9 5.2 ## 8 econom… COLU… 0.62 1.52 60.2 43.4 6 ## 9 econom… CORN… 0.71 1.61 59.7 67.9 5.4 ## 10 econom… FORD… 0.27 0.23 16.7 30.1 5.5 ## # … with 160 more rows, and 9 more variables: PctMinorityFac <dbl>, ## # PctFemaleFac <dbl>, PctFemaleStud <dbl>, PctIntlStud <dbl>, ## # AvNumPhDs <dbl>, AvGREs <dbl>, TotFac <dbl>, PctAsstProf <dbl>, ## # NumStud <dbl>
grad %>% select(Inst)
## # A tibble: 412 x 1 ## Inst ## <chr> ## 1 ARIZONA STATE UNIVERSITY ## 2 AUBURN UNIVERSITY ## 3 BOSTON COLLEGE ## 4 BOSTON UNIVERSITY ## 5 BRANDEIS UNIVERSITY ## 6 BROWN UNIVERSITY ## 7 CALIFORNIA INSTITUTE OF TECHNOLOGY ## 8 CARNEGIE MELLON UNIVERSITY ## 9 CITY UNIVERSITY OF NEW YORK GRAD. CENTER ## 10 CLAREMONT GRADUATE UNIVERSITY ## # … with 402 more rows
grad %>% select(subject, Inst, NumStud)
## # A tibble: 412 x 3 ## subject Inst NumStud ## <chr> <chr> <dbl> ## 1 economics ARIZONA STATE UNIVERSITY 33 ## 2 economics AUBURN UNIVERSITY 21 ## 3 economics BOSTON COLLEGE 64 ## 4 economics BOSTON UNIVERSITY 148 ## 5 economics BRANDEIS UNIVERSITY 24 ## 6 economics BROWN UNIVERSITY 81 ## 7 economics CALIFORNIA INSTITUTE OF TECHNOLOGY 32 ## 8 economics CARNEGIE MELLON UNIVERSITY 35 ## 9 economics CITY UNIVERSITY OF NEW YORK GRAD. CENTER 96 ## 10 economics CLAREMONT GRADUATE UNIVERSITY 76 ## # … with 402 more rows
grad %>% slice_min(TotFac, n=5)
## # A tibble: 5 x 16 ## subject Inst AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg… ## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 psycho… STAT… 1.05 2.27 100 50.6 5.5 ## 2 psycho… AUBU… 0.52 0.290 37.7 37.5 5.88 ## 3 psycho… SETO… 0.04 0 0 0 NA ## 4 psycho… STAT… 1.14 2.21 50 38.9 6.25 ## 5 psycho… STAT… 0.45 1.86 0 44 7.75 ## # … with 9 more variables: PctMinorityFac <dbl>, PctFemaleFac <dbl>, ## # PctFemaleStud <dbl>, PctIntlStud <dbl>, AvNumPhDs <dbl>, AvGREs <dbl>, ## # TotFac <dbl>, PctAsstProf <dbl>, NumStud <dbl>
grad %>% slice_max(NumStud, n=4)
## # A tibble: 4 x 16 ## subject Inst AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg… ## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> ## 1 econom… UNIV… 0.61 3.44 54.8 62.5 5.5 ## 2 psycho… UNIV… 1.39 3.3 57.2 28.1 6.17 ## 3 econom… UNIV… 0.79 2.68 71.4 42.6 5.7 ## 4 psycho… UNIV… 1.05 1.72 65.6 34.2 6 ## # … with 9 more variables: PctMinorityFac <dbl>, PctFemaleFac <dbl>, ## # PctFemaleStud <dbl>, PctIntlStud <dbl>, AvNumPhDs <dbl>, AvGREs <dbl>, ## # TotFac <dbl>, PctAsstProf <dbl>, NumStud <dbl>