Import data

# excel file
data <- read_excel("../00_data/myData_charts.xls")
data
## # A tibble: 16,383 × 29
##     ...1  year unitid institution_name         city_txt state_cd zip_text
##    <dbl> <dbl>  <dbl> <chr>                    <chr>    <chr>       <dbl>
##  1     1  2015 100654 Alabama A & M University Normal   AL          35762
##  2     2  2015 100654 Alabama A & M University Normal   AL          35762
##  3     3  2015 100654 Alabama A & M University Normal   AL          35762
##  4     4  2015 100654 Alabama A & M University Normal   AL          35762
##  5     5  2015 100654 Alabama A & M University Normal   AL          35762
##  6     6  2015 100654 Alabama A & M University Normal   AL          35762
##  7     7  2015 100654 Alabama A & M University Normal   AL          35762
##  8     8  2015 100654 Alabama A & M University Normal   AL          35762
##  9     9  2015 100654 Alabama A & M University Normal   AL          35762
## 10    10  2015 100654 Alabama A & M University Normal   AL          35762
## # ℹ 16,373 more rows
## # ℹ 22 more variables: classification_code <dbl>, classification_name <chr>,
## #   classification_other <chr>, ef_male_count <dbl>, ef_female_count <dbl>,
## #   ef_total_count <dbl>, sector_cd <dbl>, sector_name <chr>, sportscode <dbl>,
## #   partic_men <chr>, partic_women <chr>, partic_coed_men <chr>,
## #   partic_coed_women <chr>, sum_partic_men <dbl>, sum_partic_women <dbl>,
## #   rev_men <chr>, rev_women <chr>, total_rev_menwomen <chr>, exp_men <chr>, …

Apply the following dplyr verbs to your data

Filter rows

filter(data, institution_name == "University of Denver")
## # A tibble: 10 × 29
##     ...1  year unitid institution_name     city_txt state_cd zip_text
##    <dbl> <dbl>  <dbl> <chr>                <chr>    <chr>       <dbl>
##  1  2218  2015 127060 University of Denver Denver   CO          80208
##  2  2219  2015 127060 University of Denver Denver   CO          80208
##  3  2220  2015 127060 University of Denver Denver   CO          80208
##  4  2221  2015 127060 University of Denver Denver   CO          80208
##  5  2222  2015 127060 University of Denver Denver   CO          80208
##  6  2223  2015 127060 University of Denver Denver   CO          80208
##  7  2224  2015 127060 University of Denver Denver   CO          80208
##  8  2225  2015 127060 University of Denver Denver   CO          80208
##  9  2226  2015 127060 University of Denver Denver   CO          80208
## 10  2227  2015 127060 University of Denver Denver   CO          80208
## # ℹ 22 more variables: classification_code <dbl>, classification_name <chr>,
## #   classification_other <chr>, ef_male_count <dbl>, ef_female_count <dbl>,
## #   ef_total_count <dbl>, sector_cd <dbl>, sector_name <chr>, sportscode <dbl>,
## #   partic_men <chr>, partic_women <chr>, partic_coed_men <chr>,
## #   partic_coed_women <chr>, sum_partic_men <dbl>, sum_partic_women <dbl>,
## #   rev_men <chr>, rev_women <chr>, total_rev_menwomen <chr>, exp_men <chr>,
## #   exp_women <chr>, total_exp_menwomen <chr>, sports <chr>

Arrange rows

arrange(data, desc(total_rev_menwomen))
## # A tibble: 16,383 × 29
##     ...1  year unitid institution_name                city_txt state_cd zip_text
##    <dbl> <dbl>  <dbl> <chr>                           <chr>    <chr>       <dbl>
##  1    15  2015 100663 University of Alabama at Birmi… Birming… AL         3.53e8
##  2   141  2015 101480 Jacksonville State University   Jackson… AL         3.63e4
##  3   174  2015 101587 University of West Alabama      Livings… AL         3.55e4
##  4   307  2015 102614 University of Alaska Fairbanks  Fairban… AK         9.98e8
##  5  2104  2015 125462 West Hills College-Coalinga     Coalinga CA         9.32e4
##  6  2271  2015 127732 Northeastern Junior College     Sterling CO         8.08e8
##  7  2658  2015 131469 George Washington University    Washing… DC         2.01e4
##  8  3000  2015 136950 Rollins College                 Winter … FL         3.28e8
##  9  3259  2015 139630 Emmanuel College                Frankli… GA         3.06e4
## 10  3260  2015 139630 Emmanuel College                Frankli… GA         3.06e4
## # ℹ 16,373 more rows
## # ℹ 22 more variables: classification_code <dbl>, classification_name <chr>,
## #   classification_other <chr>, ef_male_count <dbl>, ef_female_count <dbl>,
## #   ef_total_count <dbl>, sector_cd <dbl>, sector_name <chr>, sportscode <dbl>,
## #   partic_men <chr>, partic_women <chr>, partic_coed_men <chr>,
## #   partic_coed_women <chr>, sum_partic_men <dbl>, sum_partic_women <dbl>,
## #   rev_men <chr>, rev_women <chr>, total_rev_menwomen <chr>, exp_men <chr>, …

Select columns

select(data, institution_name, state_cd, sports, total_rev_menwomen)
## # A tibble: 16,383 × 4
##    institution_name         state_cd sports             total_rev_menwomen
##    <chr>                    <chr>    <chr>              <chr>             
##  1 Alabama A & M University AL       Baseball           345592            
##  2 Alabama A & M University AL       Basketball         1959928           
##  3 Alabama A & M University AL       All Track Combined 498907            
##  4 Alabama A & M University AL       Football           2808949           
##  5 Alabama A & M University AL       Golf               78270             
##  6 Alabama A & M University AL       Soccer             410717            
##  7 Alabama A & M University AL       Softball           298164            
##  8 Alabama A & M University AL       Tennis             209419            
##  9 Alabama A & M University AL       Volleyball         321329            
## 10 Alabama A & M University AL       Bowling            127376            
## # ℹ 16,373 more rows

Add columns

mutate(data,
           student_nb = ef_male_count + ef_female_count) %>%
    select(institution_name, sports, student_nb)
## # A tibble: 16,383 × 3
##    institution_name         sports             student_nb
##    <chr>                    <chr>                   <dbl>
##  1 Alabama A & M University Baseball                 4223
##  2 Alabama A & M University Basketball               4223
##  3 Alabama A & M University All Track Combined       4223
##  4 Alabama A & M University Football                 4223
##  5 Alabama A & M University Golf                     4223
##  6 Alabama A & M University Soccer                   4223
##  7 Alabama A & M University Softball                 4223
##  8 Alabama A & M University Tennis                   4223
##  9 Alabama A & M University Volleyball               4223
## 10 Alabama A & M University Bowling                  4223
## # ℹ 16,373 more rows

Summarize by groups

data %>%
    
    # Group by sports
    group_by(sports) %>%
    
    # Calculate student number
    summarize(total_students = sum(ef_male_count + ef_female_count, na.rm = TRUE), .groups = "drop") %>%
    
    # Sort it
    arrange(total_students) %>%
ungroup()
## # A tibble: 36 × 2
##    sports                total_students
##    <chr>                          <dbl>
##  1 Table Tennis                    7346
##  2 Archery                        19024
##  3 Synchronized Swimming          46698
##  4 Badminton                      78373
##  5 Diving                         86982
##  6 Rodeo                          93789
##  7 Squash                        143422
##  8 Sailing                       167597
##  9 Skiing                        198322
## 10 Other Sports                  245166
## # ℹ 26 more rows