Import data
# excel file
data <- read_excel("../00_data/myData_charts.xls")
data
## # A tibble: 16,383 × 29
## ...1 year unitid institution_name city_txt state_cd zip_text
## <dbl> <dbl> <dbl> <chr> <chr> <chr> <dbl>
## 1 1 2015 100654 Alabama A & M University Normal AL 35762
## 2 2 2015 100654 Alabama A & M University Normal AL 35762
## 3 3 2015 100654 Alabama A & M University Normal AL 35762
## 4 4 2015 100654 Alabama A & M University Normal AL 35762
## 5 5 2015 100654 Alabama A & M University Normal AL 35762
## 6 6 2015 100654 Alabama A & M University Normal AL 35762
## 7 7 2015 100654 Alabama A & M University Normal AL 35762
## 8 8 2015 100654 Alabama A & M University Normal AL 35762
## 9 9 2015 100654 Alabama A & M University Normal AL 35762
## 10 10 2015 100654 Alabama A & M University Normal AL 35762
## # ℹ 16,373 more rows
## # ℹ 22 more variables: classification_code <dbl>, classification_name <chr>,
## # classification_other <chr>, ef_male_count <dbl>, ef_female_count <dbl>,
## # ef_total_count <dbl>, sector_cd <dbl>, sector_name <chr>, sportscode <dbl>,
## # partic_men <chr>, partic_women <chr>, partic_coed_men <chr>,
## # partic_coed_women <chr>, sum_partic_men <dbl>, sum_partic_women <dbl>,
## # rev_men <chr>, rev_women <chr>, total_rev_menwomen <chr>, exp_men <chr>, …
Apply the following dplyr verbs to your data
Filter rows
filter(data, institution_name == "University of Denver")
## # A tibble: 10 × 29
## ...1 year unitid institution_name city_txt state_cd zip_text
## <dbl> <dbl> <dbl> <chr> <chr> <chr> <dbl>
## 1 2218 2015 127060 University of Denver Denver CO 80208
## 2 2219 2015 127060 University of Denver Denver CO 80208
## 3 2220 2015 127060 University of Denver Denver CO 80208
## 4 2221 2015 127060 University of Denver Denver CO 80208
## 5 2222 2015 127060 University of Denver Denver CO 80208
## 6 2223 2015 127060 University of Denver Denver CO 80208
## 7 2224 2015 127060 University of Denver Denver CO 80208
## 8 2225 2015 127060 University of Denver Denver CO 80208
## 9 2226 2015 127060 University of Denver Denver CO 80208
## 10 2227 2015 127060 University of Denver Denver CO 80208
## # ℹ 22 more variables: classification_code <dbl>, classification_name <chr>,
## # classification_other <chr>, ef_male_count <dbl>, ef_female_count <dbl>,
## # ef_total_count <dbl>, sector_cd <dbl>, sector_name <chr>, sportscode <dbl>,
## # partic_men <chr>, partic_women <chr>, partic_coed_men <chr>,
## # partic_coed_women <chr>, sum_partic_men <dbl>, sum_partic_women <dbl>,
## # rev_men <chr>, rev_women <chr>, total_rev_menwomen <chr>, exp_men <chr>,
## # exp_women <chr>, total_exp_menwomen <chr>, sports <chr>
Arrange rows
arrange(data, desc(total_rev_menwomen))
## # A tibble: 16,383 × 29
## ...1 year unitid institution_name city_txt state_cd zip_text
## <dbl> <dbl> <dbl> <chr> <chr> <chr> <dbl>
## 1 15 2015 100663 University of Alabama at Birmi… Birming… AL 3.53e8
## 2 141 2015 101480 Jacksonville State University Jackson… AL 3.63e4
## 3 174 2015 101587 University of West Alabama Livings… AL 3.55e4
## 4 307 2015 102614 University of Alaska Fairbanks Fairban… AK 9.98e8
## 5 2104 2015 125462 West Hills College-Coalinga Coalinga CA 9.32e4
## 6 2271 2015 127732 Northeastern Junior College Sterling CO 8.08e8
## 7 2658 2015 131469 George Washington University Washing… DC 2.01e4
## 8 3000 2015 136950 Rollins College Winter … FL 3.28e8
## 9 3259 2015 139630 Emmanuel College Frankli… GA 3.06e4
## 10 3260 2015 139630 Emmanuel College Frankli… GA 3.06e4
## # ℹ 16,373 more rows
## # ℹ 22 more variables: classification_code <dbl>, classification_name <chr>,
## # classification_other <chr>, ef_male_count <dbl>, ef_female_count <dbl>,
## # ef_total_count <dbl>, sector_cd <dbl>, sector_name <chr>, sportscode <dbl>,
## # partic_men <chr>, partic_women <chr>, partic_coed_men <chr>,
## # partic_coed_women <chr>, sum_partic_men <dbl>, sum_partic_women <dbl>,
## # rev_men <chr>, rev_women <chr>, total_rev_menwomen <chr>, exp_men <chr>, …
Select columns
select(data, institution_name, state_cd, sports, total_rev_menwomen)
## # A tibble: 16,383 × 4
## institution_name state_cd sports total_rev_menwomen
## <chr> <chr> <chr> <chr>
## 1 Alabama A & M University AL Baseball 345592
## 2 Alabama A & M University AL Basketball 1959928
## 3 Alabama A & M University AL All Track Combined 498907
## 4 Alabama A & M University AL Football 2808949
## 5 Alabama A & M University AL Golf 78270
## 6 Alabama A & M University AL Soccer 410717
## 7 Alabama A & M University AL Softball 298164
## 8 Alabama A & M University AL Tennis 209419
## 9 Alabama A & M University AL Volleyball 321329
## 10 Alabama A & M University AL Bowling 127376
## # ℹ 16,373 more rows
Add columns
mutate(data,
student_nb = ef_male_count + ef_female_count) %>%
select(institution_name, sports, student_nb)
## # A tibble: 16,383 × 3
## institution_name sports student_nb
## <chr> <chr> <dbl>
## 1 Alabama A & M University Baseball 4223
## 2 Alabama A & M University Basketball 4223
## 3 Alabama A & M University All Track Combined 4223
## 4 Alabama A & M University Football 4223
## 5 Alabama A & M University Golf 4223
## 6 Alabama A & M University Soccer 4223
## 7 Alabama A & M University Softball 4223
## 8 Alabama A & M University Tennis 4223
## 9 Alabama A & M University Volleyball 4223
## 10 Alabama A & M University Bowling 4223
## # ℹ 16,373 more rows
Summarize by groups
data %>%
# Group by sports
group_by(sports) %>%
# Calculate student number
summarize(total_students = sum(ef_male_count + ef_female_count, na.rm = TRUE), .groups = "drop") %>%
# Sort it
arrange(total_students) %>%
ungroup()
## # A tibble: 36 × 2
## sports total_students
## <chr> <dbl>
## 1 Table Tennis 7346
## 2 Archery 19024
## 3 Synchronized Swimming 46698
## 4 Badminton 78373
## 5 Diving 86982
## 6 Rodeo 93789
## 7 Squash 143422
## 8 Sailing 167597
## 9 Skiing 198322
## 10 Other Sports 245166
## # ℹ 26 more rows