This is a demonstration of BCO6007 Lecture 4 12.08.20 class examples in html format

Assign variable "grad" to graduate-programs.csv and read 1st 2 rows

library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.0
## ✓ tidyr   1.1.0     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
grad<-read_csv("graduate-programs.csv")
## Parsed with column specification:
## cols(
##   subject = col_character(),
##   Inst = col_character(),
##   AvNumPubs = col_double(),
##   AvNumCits = col_double(),
##   PctFacGrants = col_double(),
##   PctCompletion = col_double(),
##   MedianTimetoDegree = col_double(),
##   PctMinorityFac = col_double(),
##   PctFemaleFac = col_double(),
##   PctFemaleStud = col_double(),
##   PctIntlStud = col_double(),
##   AvNumPhDs = col_double(),
##   AvGREs = col_double(),
##   TotFac = col_double(),
##   PctAsstProf = col_double(),
##   NumStud = col_double()
## )
head(grad, n=2)
## # A tibble: 2 x 16
##   subject Inst  AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg…
##   <chr>   <chr>     <dbl>     <dbl>        <dbl>         <dbl>            <dbl>
## 1 econom… ARIZ…      0.9       1.57         31.3          31.7             5.6 
## 2 econom… AUBU…      0.79      0.64         77.6          44.4             3.84
## # … with 9 more variables: PctMinorityFac <dbl>, PctFemaleFac <dbl>,
## #   PctFemaleStud <dbl>, PctIntlStud <dbl>, AvNumPhDs <dbl>, AvGREs <dbl>,
## #   TotFac <dbl>, PctAsstProf <dbl>, NumStud <dbl>

Using group() function on grad by subject and avocado by region show various icl. slice()

group_grad<-grad %>%
  group_by(subject)

avocado<-read_csv("avocado.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   Date = col_date(format = ""),
##   AveragePrice = col_double(),
##   `Total Volume` = col_double(),
##   `4046` = col_double(),
##   `4225` = col_double(),
##   `4770` = col_double(),
##   `Total Bags` = col_double(),
##   `Small Bags` = col_double(),
##   `Large Bags` = col_double(),
##   `XLarge Bags` = col_double(),
##   type = col_character(),
##   year = col_double(),
##   region = col_character()
## )
group_avocado <- avocado %>%
  group_by(region)

grad %>% slice_head(n=2)
## # A tibble: 2 x 16
##   subject Inst  AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg…
##   <chr>   <chr>     <dbl>     <dbl>        <dbl>         <dbl>            <dbl>
## 1 econom… ARIZ…      0.9       1.57         31.3          31.7             5.6 
## 2 econom… AUBU…      0.79      0.64         77.6          44.4             3.84
## # … with 9 more variables: PctMinorityFac <dbl>, PctFemaleFac <dbl>,
## #   PctFemaleStud <dbl>, PctIntlStud <dbl>, AvNumPhDs <dbl>, AvGREs <dbl>,
## #   TotFac <dbl>, PctAsstProf <dbl>, NumStud <dbl>
avocado %>% slice_tail(n=2)
## # A tibble: 2 x 14
##      X1 Date       AveragePrice `Total Volume` `4046` `4225` `4770` `Total Bags`
##   <dbl> <date>            <dbl>          <dbl>  <dbl>  <dbl>  <dbl>        <dbl>
## 1    10 2018-01-14         1.93         16205.  1528.  2981.   727.       10970.
## 2    11 2018-01-07         1.62         17490.  2895.  2356.   225.       12014.
## # … with 6 more variables: `Small Bags` <dbl>, `Large Bags` <dbl>, `XLarge
## #   Bags` <dbl>, type <chr>, year <dbl>, region <chr>
grad %>%
  group_by(subject)%>%
  slice_head(n=2)
## # A tibble: 8 x 16
## # Groups:   subject [4]
##   subject Inst  AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg…
##   <chr>   <chr>     <dbl>     <dbl>        <dbl>         <dbl>            <dbl>
## 1 astron… CALI…      6.57      6.44        100            39.3             6   
## 2 astron… COLU…      4.48      4.47        100            58.7             6   
## 3 econom… ARIZ…      0.9       1.57         31.3          31.7             5.6 
## 4 econom… AUBU…      0.79      0.64         77.6          44.4             3.84
## 5 entomo… CLEM…      1.04      0.59         89.9          63.3             5.5 
## 6 entomo… CORN…      1.77      1.37         90.2          37.2             6.4 
## 7 psycho… AMER…      0.63      0.94         25            47.4             6.7 
## 8 psycho… ARIZ…      1.55      2.91         80.5          26.1             7   
## # … with 9 more variables: PctMinorityFac <dbl>, PctFemaleFac <dbl>,
## #   PctFemaleStud <dbl>, PctIntlStud <dbl>, AvNumPhDs <dbl>, AvGREs <dbl>,
## #   TotFac <dbl>, PctAsstProf <dbl>, NumStud <dbl>
avocado%>%
  group_by(region)%>%
  slice_tail(n=5)
## # A tibble: 270 x 14
## # Groups:   region [54]
##       X1 Date       AveragePrice `Total Volume` `4046` `4225` `4770`
##    <dbl> <date>            <dbl>          <dbl>  <dbl>  <dbl>  <dbl>
##  1     7 2018-02-04         1.52          4125.  118.    420.      0
##  2     8 2018-01-28         1.32          6988.  434.    375.      0
##  3     9 2018-01-21         1.54          3347.   14.7   253.      0
##  4    10 2018-01-14         1.47          4141.    7.3   302.      0
##  5    11 2018-01-07         1.54          4817.   43.5   412.      0
##  6     7 2018-02-04         1.62         11900.  384.   4043.      0
##  7     8 2018-01-28         1.67         14446.  390.   5130.      0
##  8     9 2018-01-21         1.64         18555.  349.   3968.      0
##  9    10 2018-01-14         1.56         16152.  292.   3583.      0
## 10    11 2018-01-07         1.53         15714.  405.   4195.      0
## # … with 260 more rows, and 7 more variables: `Total Bags` <dbl>, `Small
## #   Bags` <dbl>, `Large Bags` <dbl>, `XLarge Bags` <dbl>, type <chr>,
## #   year <dbl>, region <chr>
grad  %>%
  slice_min(order_by=NumStud, n=3)
## # A tibble: 5 x 16
##   subject Inst  AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg…
##   <chr>   <chr>     <dbl>     <dbl>        <dbl>         <dbl>            <dbl>
## 1 psycho… FLOR…      0.23     0.580          0            50               2.5 
## 2 entomo… OREG…      0.44     0.93         100            60.7             5.88
## 3 entomo… UNIV…      1.48     0.41          82.9          64.3             3.9 
## 4 psycho… UNIV…      1.87     3.47          63.6          34               7.5 
## 5 psycho… UNIV…      0.07     1.43           0            72.3             5   
## # … with 9 more variables: PctMinorityFac <dbl>, PctFemaleFac <dbl>,
## #   PctFemaleStud <dbl>, PctIntlStud <dbl>, AvNumPhDs <dbl>, AvGREs <dbl>,
## #   TotFac <dbl>, PctAsstProf <dbl>, NumStud <dbl>
grad %>%
  group_by(subject) %>%
  slice_min(order_by=NumStud, n=2, with_ties = FALSE)
## # A tibble: 8 x 16
## # Groups:   subject [4]
##   subject Inst  AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg…
##   <chr>   <chr>     <dbl>     <dbl>        <dbl>         <dbl>            <dbl>
## 1 astron… UNIV…      5.98     2.01         100            53.3             6   
## 2 astron… MICH…      4.56     3.89          87.4          36.7             4.3 
## 3 econom… UNIV…      0.05     0.39          60.9           0               4.83
## 4 econom… UNIV…      0.6      0.71          60            53               6   
## 5 entomo… OREG…      0.44     0.93         100            60.7             5.88
## 6 entomo… UNIV…      1.48     0.41          82.9          64.3             3.9 
## 7 psycho… FLOR…      0.23     0.580          0            50               2.5 
## 8 psycho… UNIV…      1.87     3.47          63.6          34               7.5 
## # … with 9 more variables: PctMinorityFac <dbl>, PctFemaleFac <dbl>,
## #   PctFemaleStud <dbl>, PctIntlStud <dbl>, AvNumPhDs <dbl>, AvGREs <dbl>,
## #   TotFac <dbl>, PctAsstProf <dbl>, NumStud <dbl>
avocado  %>%
  slice_max(order_by=AveragePrice, n=5)
## # A tibble: 5 x 14
##      X1 Date       AveragePrice `Total Volume` `4046` `4225` `4770` `Total Bags`
##   <dbl> <date>            <dbl>          <dbl>  <dbl>  <dbl>  <dbl>        <dbl>
## 1     8 2016-10-30         3.25         16701.  2326. 1.11e4     0         3232.
## 2    37 2017-04-16         3.17          3019.  1256. 8.23e1     0         1681.
## 3     7 2016-11-06         3.12         19044.  5898. 1.00e4     0         3106.
## 4    42 2017-03-12         3.05          2068.  1044. 7.74e1     0          947.
## 5    18 2017-08-27         3.04         12656.   419. 4.85e3   145.        7240.
## # … with 6 more variables: `Small Bags` <dbl>, `Large Bags` <dbl>, `XLarge
## #   Bags` <dbl>, type <chr>, year <dbl>, region <chr>
grad %>%
  group_by(subject) %>%
  slice_max(order_by=NumStud, n=2, with_ties = FALSE)
## # A tibble: 8 x 16
## # Groups:   subject [4]
##   subject Inst  AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg…
##   <chr>   <chr>     <dbl>     <dbl>        <dbl>         <dbl>            <dbl>
## 1 astron… UNIV…      3.4       3.24         86.8          70.7             5   
## 2 astron… UNIV…      3.33      2.84         95            51.3             5.72
## 3 econom… UNIV…      0.61      3.44         54.8          62.5             5.5 
## 4 econom… UNIV…      0.79      2.68         71.4          42.6             5.7 
## 5 entomo… UNIV…      1.49      1            85.4          48               5.33
## 6 entomo… UNIV…      2.69      1.35         95.9          48.7             5   
## 7 psycho… UNIV…      1.39      3.3          57.2          28.1             6.17
## 8 psycho… UNIV…      1.05      1.72         65.6          34.2             6   
## # … with 9 more variables: PctMinorityFac <dbl>, PctFemaleFac <dbl>,
## #   PctFemaleStud <dbl>, PctIntlStud <dbl>, AvNumPhDs <dbl>, AvGREs <dbl>,
## #   TotFac <dbl>, PctAsstProf <dbl>, NumStud <dbl>
grad%>%
  count(subject)%>%
  slice_head(n=3)
## # A tibble: 3 x 2
##   subject        n
##   <chr>      <int>
## 1 astronomy     32
## 2 economics    117
## 3 entomology    27
grad%>%
  count(subject, Inst)%>%
  slice_head(n=3)
## # A tibble: 3 x 3
##   subject   Inst                                            n
##   <chr>     <chr>                                       <int>
## 1 astronomy CALIFORNIA INSTITUTE OF TECHNOLOGY              1
## 2 astronomy COLUMBIA UNIVERSITY IN THE CITY OF NEW YORK     1
## 3 astronomy CORNELL UNIVERSITY                              1
avocado_counted<-avocado %>%
  add_count(region) %>%
  select(X1, region, n)

avocado_counted %>% slice_head(n=5)
## # A tibble: 5 x 3
##      X1 region     n
##   <dbl> <chr>  <int>
## 1     0 Albany   338
## 2     1 Albany   338
## 3     2 Albany   338
## 4     3 Albany   338
## 5     4 Albany   338
grad  %>%
  mutate(Student2Staff=NumStud/TotFac)%>%
  slice_head(n=5)
## # A tibble: 5 x 17
##   subject Inst  AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg…
##   <chr>   <chr>     <dbl>     <dbl>        <dbl>         <dbl>            <dbl>
## 1 econom… ARIZ…      0.9       1.57         31.3          31.7             5.6 
## 2 econom… AUBU…      0.79      0.64         77.6          44.4             3.84
## 3 econom… BOST…      0.51      1.03         43.5          46.8             5   
## 4 econom… BOST…      0.49      2.66         36.9          34.2             5.5 
## 5 econom… BRAN…      0.3       3.03         36.8          48.7             5.29
## # … with 10 more variables: PctMinorityFac <dbl>, PctFemaleFac <dbl>,
## #   PctFemaleStud <dbl>, PctIntlStud <dbl>, AvNumPhDs <dbl>, AvGREs <dbl>,
## #   TotFac <dbl>, PctAsstProf <dbl>, NumStud <dbl>, Student2Staff <dbl>
grad  %>% summarize(mean = mean(NumStud), n = n())
## # A tibble: 1 x 2
##    mean     n
##   <dbl> <int>
## 1  54.5   412
grad  %>%
  group_by(subject)%>%
  summarize(mean = mean(NumStud), n = n())
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 4 x 3
##   subject     mean     n
##   <chr>      <dbl> <int>
## 1 astronomy   30.0    32
## 2 economics   60.7   117
## 3 entomology  20.1    27
## 4 psychology  58.7   236
grad  %>% filter(NumStud>100)
## # A tibble: 56 x 16
##    subject Inst  AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg…
##    <chr>   <chr>     <dbl>     <dbl>        <dbl>         <dbl>            <dbl>
##  1 econom… BOST…      0.49      2.66         36.9          34.2             5.5 
##  2 econom… COLU…      0.62      1.52         60.2          43.4             6   
##  3 econom… CORN…      0.71      1.61         59.7          67.9             5.4 
##  4 econom… HARV…      1.17      4.26         68.9          79.7             5   
##  5 econom… MASS…      1.23      3.17         55.7          81.8             4.8 
##  6 econom… NEW …      0.76      2.35         71.3          56.4             5.67
##  7 econom… NORT…      0.53      2.43         48.6          54.6             5.1 
##  8 econom… OHIO…      0.51      1.09         42            32.3             5.75
##  9 econom… STAN…      0.52      2.73         45.3          58.7             6   
## 10 econom… TEXA…      0.46      0.73         43.9          44.1             5   
## # … with 46 more rows, and 9 more variables: PctMinorityFac <dbl>,
## #   PctFemaleFac <dbl>, PctFemaleStud <dbl>, PctIntlStud <dbl>,
## #   AvNumPhDs <dbl>, AvGREs <dbl>, TotFac <dbl>, PctAsstProf <dbl>,
## #   NumStud <dbl>
grad  %>%
  group_by(Inst)%>%
  filter(NumStud>mean(NumStud))
## # A tibble: 170 x 16
## # Groups:   Inst [127]
##    subject Inst  AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg…
##    <chr>   <chr>     <dbl>     <dbl>        <dbl>         <dbl>            <dbl>
##  1 econom… BOST…      0.51      1.03         43.5          46.8             5   
##  2 econom… BOST…      0.49      2.66         36.9          34.2             5.5 
##  3 econom… BRAN…      0.3       3.03         36.8          48.7             5.29
##  4 econom… BROW…      0.84      2.31         27.1          54.6             6   
##  5 econom… CALI…      0.99      2.31         56.4          83.3             4   
##  6 econom… CARN…      0.43      1.67         35.2          45.6             5.05
##  7 econom… CITY…      0.35      1.06         38.1          27.9             5.2 
##  8 econom… COLU…      0.62      1.52         60.2          43.4             6   
##  9 econom… CORN…      0.71      1.61         59.7          67.9             5.4 
## 10 econom… FORD…      0.27      0.23         16.7          30.1             5.5 
## # … with 160 more rows, and 9 more variables: PctMinorityFac <dbl>,
## #   PctFemaleFac <dbl>, PctFemaleStud <dbl>, PctIntlStud <dbl>,
## #   AvNumPhDs <dbl>, AvGREs <dbl>, TotFac <dbl>, PctAsstProf <dbl>,
## #   NumStud <dbl>
grad  %>% select(Inst)
## # A tibble: 412 x 1
##    Inst                                    
##    <chr>                                   
##  1 ARIZONA STATE UNIVERSITY                
##  2 AUBURN UNIVERSITY                       
##  3 BOSTON COLLEGE                          
##  4 BOSTON UNIVERSITY                       
##  5 BRANDEIS UNIVERSITY                     
##  6 BROWN UNIVERSITY                        
##  7 CALIFORNIA INSTITUTE OF TECHNOLOGY      
##  8 CARNEGIE MELLON UNIVERSITY              
##  9 CITY UNIVERSITY OF NEW YORK GRAD. CENTER
## 10 CLAREMONT GRADUATE UNIVERSITY           
## # … with 402 more rows
grad  %>% select(subject, Inst, NumStud)
## # A tibble: 412 x 3
##    subject   Inst                                     NumStud
##    <chr>     <chr>                                      <dbl>
##  1 economics ARIZONA STATE UNIVERSITY                      33
##  2 economics AUBURN UNIVERSITY                             21
##  3 economics BOSTON COLLEGE                                64
##  4 economics BOSTON UNIVERSITY                            148
##  5 economics BRANDEIS UNIVERSITY                           24
##  6 economics BROWN UNIVERSITY                              81
##  7 economics CALIFORNIA INSTITUTE OF TECHNOLOGY            32
##  8 economics CARNEGIE MELLON UNIVERSITY                    35
##  9 economics CITY UNIVERSITY OF NEW YORK GRAD. CENTER      96
## 10 economics CLAREMONT GRADUATE UNIVERSITY                 76
## # … with 402 more rows
grad  %>% slice_min(TotFac, n=5)
## # A tibble: 5 x 16
##   subject Inst  AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg…
##   <chr>   <chr>     <dbl>     <dbl>        <dbl>         <dbl>            <dbl>
## 1 psycho… STAT…      1.05     2.27         100            50.6             5.5 
## 2 psycho… AUBU…      0.52     0.290         37.7          37.5             5.88
## 3 psycho… SETO…      0.04     0              0             0              NA   
## 4 psycho… STAT…      1.14     2.21          50            38.9             6.25
## 5 psycho… STAT…      0.45     1.86           0            44               7.75
## # … with 9 more variables: PctMinorityFac <dbl>, PctFemaleFac <dbl>,
## #   PctFemaleStud <dbl>, PctIntlStud <dbl>, AvNumPhDs <dbl>, AvGREs <dbl>,
## #   TotFac <dbl>, PctAsstProf <dbl>, NumStud <dbl>
grad  %>% slice_max(NumStud, n=4)
## # A tibble: 4 x 16
##   subject Inst  AvNumPubs AvNumCits PctFacGrants PctCompletion MedianTimetoDeg…
##   <chr>   <chr>     <dbl>     <dbl>        <dbl>         <dbl>            <dbl>
## 1 econom… UNIV…      0.61      3.44         54.8          62.5             5.5 
## 2 psycho… UNIV…      1.39      3.3          57.2          28.1             6.17
## 3 econom… UNIV…      0.79      2.68         71.4          42.6             5.7 
## 4 psycho… UNIV…      1.05      1.72         65.6          34.2             6   
## # … with 9 more variables: PctMinorityFac <dbl>, PctFemaleFac <dbl>,
## #   PctFemaleStud <dbl>, PctIntlStud <dbl>, AvNumPhDs <dbl>, AvGREs <dbl>,
## #   TotFac <dbl>, PctAsstProf <dbl>, NumStud <dbl>