# excel file
NCAA <- read_excel("../00_data/myData_charts.xls")
## New names:
## • `` -> `...1`
NCAA
## # A tibble: 16,383 × 29
## ...1 year unitid institution_name city_txt state_cd zip_text
## <dbl> <dbl> <dbl> <chr> <chr> <chr> <dbl>
## 1 1 2015 100654 Alabama A & M University Normal AL 35762
## 2 2 2015 100654 Alabama A & M University Normal AL 35762
## 3 3 2015 100654 Alabama A & M University Normal AL 35762
## 4 4 2015 100654 Alabama A & M University Normal AL 35762
## 5 5 2015 100654 Alabama A & M University Normal AL 35762
## 6 6 2015 100654 Alabama A & M University Normal AL 35762
## 7 7 2015 100654 Alabama A & M University Normal AL 35762
## 8 8 2015 100654 Alabama A & M University Normal AL 35762
## 9 9 2015 100654 Alabama A & M University Normal AL 35762
## 10 10 2015 100654 Alabama A & M University Normal AL 35762
## # ℹ 16,373 more rows
## # ℹ 22 more variables: classification_code <dbl>, classification_name <chr>,
## # classification_other <chr>, ef_male_count <dbl>, ef_female_count <dbl>,
## # ef_total_count <dbl>, sector_cd <dbl>, sector_name <chr>, sportscode <dbl>,
## # partic_men <chr>, partic_women <chr>, partic_coed_men <chr>,
## # partic_coed_women <chr>, sum_partic_men <dbl>, sum_partic_women <dbl>,
## # rev_men <chr>, rev_women <chr>, total_rev_menwomen <chr>, exp_men <chr>, …
NCAA_small <- NCAA %>%
select(institution_name, sports, rev_women, sum_partic_men, sum_partic_women) %>%
filter(institution_name %in% c("University of Denver"))
I’ll use sports
NCAA_small %>% count(sports)
## # A tibble: 10 × 2
## sports n
## <chr> <int>
## 1 Basketball 1
## 2 Golf 1
## 3 Gymnastics 1
## 4 Ice Hockey 1
## 5 Lacrosse 1
## 6 Skiing 1
## 7 Soccer 1
## 8 Swimming and Diving 1
## 9 Tennis 1
## 10 Volleyball 1
x1 <- c("Lacrosse", "Swimming and Diving", "Ice Hockey", "Soccer", "Basketball", "Tenis", "Golf", "Gymnastic", "Volleyball")
sports_levels <- c("Lacrosse", "Swimming and Diving", "Ice Hockey", "Soccer", "Basketball", "Tenis", "Golf", "Gymnastic", "Volleyball")
NCAA_rev <- NCAA_small %>%
mutate(sports = sports %>% factor(levels = sports_levels))
NCAA_rev
## # A tibble: 10 × 5
## institution_name sports rev_women sum_partic_men sum_partic_women
## <chr> <fct> <chr> <dbl> <dbl>
## 1 University of Denver Basketball 1729128 17 18
## 2 University of Denver Golf 617182 7 7
## 3 University of Denver <NA> 1388115 0 14
## 4 University of Denver Ice Hockey NA 27 0
## 5 University of Denver Lacrosse 1271011 51 32
## 6 University of Denver <NA> 812527 9 14
## 7 University of Denver Soccer 1356455 25 24
## 8 University of Denver Swimming and … 1122277 28 27
## 9 University of Denver <NA> 796467 10 8
## 10 University of Denver Volleyball 1349643 0 15
y1 <- factor(x1, levels = sports_levels)
y1
## [1] Lacrosse Swimming and Diving Ice Hockey
## [4] Soccer Basketball Tenis
## [7] Golf Gymnastic Volleyball
## 9 Levels: Lacrosse Swimming and Diving Ice Hockey Soccer Basketball ... Volleyball
Make two bar charts here - one before ordering another after
Unordered
# Transform data: calculate average revenue in women sports by sports
partic_women_by_sports <- NCAA %>%
group_by(sports) %>%
summarise(
avg_partic_women = mean(sum_partic_women, na.rm = TRUE)
)
partic_women_by_sports
## # A tibble: 36 × 2
## sports avg_partic_women
## <chr> <dbl>
## 1 All Track Combined 77.1
## 2 Archery 8.82
## 3 Badminton 9
## 4 Baseball 0
## 5 Basketball 14.4
## 6 Beach Volleyball 15.5
## 7 Bowling 9.35
## 8 Diving 3.14
## 9 Equestrian 31.0
## 10 Fencing 16.5
## # ℹ 26 more rows
ggplot(partic_women_by_sports, aes(avg_partic_women, sports)) + geom_point()
ggplot(partic_women_by_sports, aes(avg_partic_women, fct_reorder(sports, avg_partic_women))) + geom_point()
Show examples of three functions:
NCAA %>%
mutate(sports = fct_recode(sports,
"basketball" = "Basketball",
"golf" = "Golf",
"gymnastics" = "Gymnastics",
"ice hockey" = "Ice Hockey",
"lacrosse" = "Lacrosse",
"skiing" = "Skiing",
"soccer" = "Soccer",
"swimming and diving" = "Swimming and Diving",
"tennis" = "Tennis",
"volleyball" = "Volleyball",
"wrestling" = "Wrestling",
"all track combined" = "All Track Combined",
"badminton" = "Badminton",
"baseball" = "Baseball",
"beach volleyball" = "Beach Volleyball",
"bowling" = "Bowling",
"diving" = "Diving",
"equestrian" = "Equestrian",
"fencing" = "Fencing",
"archery" = "Archery",
"rowing" = "Rowing",
"track and field, outdoor" = "Track and Field, Outdoor",
"field hockey" = "Field Hockey",
"softball" = "Softball",
"sailing" = "Sailing",
"rodeo" = "Rodeo",
"other sports" = "Other Sports",
"squash" = "Squash",
"rifle" = "Rifle",
"table tennis" = "Table Tennis",
"water polo" = "Water Polo",
"swimming" = "Swimming",
"track and field, x-country" = "Track and Field, X-Country",
"football" = "Football"
)) %>%
count(sports)
## # A tibble: 36 × 2
## sports n
## <fct> <int>
## 1 all track combined 733
## 2 archery 11
## 3 badminton 14
## 4 baseball 1583
## 5 basketball 1878
## 6 beach volleyball 77
## 7 bowling 133
## 8 diving 7
## 9 equestrian 67
## 10 fencing 41
## # ℹ 26 more rows
NCAA_small %>%
mutate(sports = fct_collapse(sports,
Lacrosse = "Lacrosse",
Soccer = "Soccer",
Other = c("Basketball", "Volleyball", "Gymnastics", "Skiing", "Tennis", "Golf", "Ice Hockey", "Swimming and Diving"))) %>%
count(sports)
## # A tibble: 3 × 2
## sports n
## <fct> <int>
## 1 Other 8
## 2 Lacrosse 1
## 3 Soccer 1
NCAA_small %>%
mutate(sports = fct_lump(sports)) %>%
count(sports)
## # A tibble: 10 × 2
## sports n
## <fct> <int>
## 1 Basketball 1
## 2 Golf 1
## 3 Gymnastics 1
## 4 Ice Hockey 1
## 5 Lacrosse 1
## 6 Skiing 1
## 7 Soccer 1
## 8 Swimming and Diving 1
## 9 Tennis 1
## 10 Volleyball 1
No need to do anything here.