Import your data

# excel file
NCAA <- read_excel("../00_data/myData_charts.xls")
## New names:
## • `` -> `...1`
NCAA
## # A tibble: 16,383 × 29
##     ...1  year unitid institution_name         city_txt state_cd zip_text
##    <dbl> <dbl>  <dbl> <chr>                    <chr>    <chr>       <dbl>
##  1     1  2015 100654 Alabama A & M University Normal   AL          35762
##  2     2  2015 100654 Alabama A & M University Normal   AL          35762
##  3     3  2015 100654 Alabama A & M University Normal   AL          35762
##  4     4  2015 100654 Alabama A & M University Normal   AL          35762
##  5     5  2015 100654 Alabama A & M University Normal   AL          35762
##  6     6  2015 100654 Alabama A & M University Normal   AL          35762
##  7     7  2015 100654 Alabama A & M University Normal   AL          35762
##  8     8  2015 100654 Alabama A & M University Normal   AL          35762
##  9     9  2015 100654 Alabama A & M University Normal   AL          35762
## 10    10  2015 100654 Alabama A & M University Normal   AL          35762
## # ℹ 16,373 more rows
## # ℹ 22 more variables: classification_code <dbl>, classification_name <chr>,
## #   classification_other <chr>, ef_male_count <dbl>, ef_female_count <dbl>,
## #   ef_total_count <dbl>, sector_cd <dbl>, sector_name <chr>, sportscode <dbl>,
## #   partic_men <chr>, partic_women <chr>, partic_coed_men <chr>,
## #   partic_coed_women <chr>, sum_partic_men <dbl>, sum_partic_women <dbl>,
## #   rev_men <chr>, rev_women <chr>, total_rev_menwomen <chr>, exp_men <chr>, …

Pivoting

NCAA_small <- NCAA %>%
    
    select(institution_name, sports, rev_women, sum_partic_men, sum_partic_women) %>%
    
    filter(institution_name %in% c("University of Denver"))

long to wide form

NCAA_long <- NCAA_small %>%
    
    pivot_longer(cols = c(sum_partic_men, sum_partic_women),
               names_to = "gender",
               values_to = "participation_count")
NCAA_long
## # A tibble: 20 × 5
##    institution_name     sports              rev_women gender participation_count
##    <chr>                <chr>               <chr>     <chr>                <dbl>
##  1 University of Denver Basketball          1729128   sum_p…                  17
##  2 University of Denver Basketball          1729128   sum_p…                  18
##  3 University of Denver Golf                617182    sum_p…                   7
##  4 University of Denver Golf                617182    sum_p…                   7
##  5 University of Denver Gymnastics          1388115   sum_p…                   0
##  6 University of Denver Gymnastics          1388115   sum_p…                  14
##  7 University of Denver Ice Hockey          NA        sum_p…                  27
##  8 University of Denver Ice Hockey          NA        sum_p…                   0
##  9 University of Denver Lacrosse            1271011   sum_p…                  51
## 10 University of Denver Lacrosse            1271011   sum_p…                  32
## 11 University of Denver Skiing              812527    sum_p…                   9
## 12 University of Denver Skiing              812527    sum_p…                  14
## 13 University of Denver Soccer              1356455   sum_p…                  25
## 14 University of Denver Soccer              1356455   sum_p…                  24
## 15 University of Denver Swimming and Diving 1122277   sum_p…                  28
## 16 University of Denver Swimming and Diving 1122277   sum_p…                  27
## 17 University of Denver Tennis              796467    sum_p…                  10
## 18 University of Denver Tennis              796467    sum_p…                   8
## 19 University of Denver Volleyball          1349643   sum_p…                   0
## 20 University of Denver Volleyball          1349643   sum_p…                  15

wide to long form

NCAA_wide <- NCAA_small %>%
    
    pivot_wider(names_from = sports,
                values_from = rev_women)

NCAA_wide
## # A tibble: 10 × 13
##    institution_name  sum_partic_men sum_partic_women Basketball Golf  Gymnastics
##    <chr>                      <dbl>            <dbl> <chr>      <chr> <chr>     
##  1 University of De…             17               18 1729128    <NA>  <NA>      
##  2 University of De…              7                7 <NA>       6171… <NA>      
##  3 University of De…              0               14 <NA>       <NA>  1388115   
##  4 University of De…             27                0 <NA>       <NA>  <NA>      
##  5 University of De…             51               32 <NA>       <NA>  <NA>      
##  6 University of De…              9               14 <NA>       <NA>  <NA>      
##  7 University of De…             25               24 <NA>       <NA>  <NA>      
##  8 University of De…             28               27 <NA>       <NA>  <NA>      
##  9 University of De…             10                8 <NA>       <NA>  <NA>      
## 10 University of De…              0               15 <NA>       <NA>  <NA>      
## # ℹ 7 more variables: `Ice Hockey` <chr>, Lacrosse <chr>, Skiing <chr>,
## #   Soccer <chr>, `Swimming and Diving` <chr>, Tennis <chr>, Volleyball <chr>

Separating and Uniting

NCAA_UD <- NCAA %>%
    
    select(institution_name, sports, rev_women, rev_men, total_rev_menwomen) %>%
    
    filter(institution_name %in% c("University of Denver"))

NCAA_UD
## # A tibble: 10 × 5
##    institution_name     sports              rev_women rev_men total_rev_menwomen
##    <chr>                <chr>               <chr>     <chr>   <chr>             
##  1 University of Denver Basketball          1729128   3361111 5090239           
##  2 University of Denver Golf                617182    604826  1222008           
##  3 University of Denver Gymnastics          1388115   NA      1388115           
##  4 University of Denver Ice Hockey          NA        3021672 3021672           
##  5 University of Denver Lacrosse            1271011   2051197 3322208           
##  6 University of Denver Skiing              812527    812526  1625053           
##  7 University of Denver Soccer              1356455   1161051 2517506           
##  8 University of Denver Swimming and Diving 1122277   964444  2086721           
##  9 University of Denver Tennis              796467    626981  1423448           
## 10 University of Denver Volleyball          1349643   NA      1349643

Unite two columns

NCAA_1 <- NCAA_UD %>%
    
    unite(col = Sport_Institution, c(institution_name,sports), sep = ":", )
NCAA_1
## # A tibble: 10 × 4
##    Sport_Institution                        rev_women rev_men total_rev_menwomen
##    <chr>                                    <chr>     <chr>   <chr>             
##  1 University of Denver:Basketball          1729128   3361111 5090239           
##  2 University of Denver:Golf                617182    604826  1222008           
##  3 University of Denver:Gymnastics          1388115   NA      1388115           
##  4 University of Denver:Ice Hockey          NA        3021672 3021672           
##  5 University of Denver:Lacrosse            1271011   2051197 3322208           
##  6 University of Denver:Skiing              812527    812526  1625053           
##  7 University of Denver:Soccer              1356455   1161051 2517506           
##  8 University of Denver:Swimming and Diving 1122277   964444  2086721           
##  9 University of Denver:Tennis              796467    626981  1423448           
## 10 University of Denver:Volleyball          1349643   NA      1349643

Separate a column

NCAA_2 <- NCAA_1 %>%
    
    separate(col = Sport_Institution, 
             into = c("institution_name", "sports"),
             sep = ":")

NCAA_2
## # A tibble: 10 × 5
##    institution_name     sports              rev_women rev_men total_rev_menwomen
##    <chr>                <chr>               <chr>     <chr>   <chr>             
##  1 University of Denver Basketball          1729128   3361111 5090239           
##  2 University of Denver Golf                617182    604826  1222008           
##  3 University of Denver Gymnastics          1388115   NA      1388115           
##  4 University of Denver Ice Hockey          NA        3021672 3021672           
##  5 University of Denver Lacrosse            1271011   2051197 3322208           
##  6 University of Denver Skiing              812527    812526  1625053           
##  7 University of Denver Soccer              1356455   1161051 2517506           
##  8 University of Denver Swimming and Diving 1122277   964444  2086721           
##  9 University of Denver Tennis              796467    626981  1423448           
## 10 University of Denver Volleyball          1349643   NA      1349643
# key thing: have the same value for sep (:), otherwise it won't split the data too

Missing Values

NCAA_final <- NCAA_UD %>%
    
    mutate(rev_women = as.numeric(rev_women), rev_men = as.numeric(rev_men)) %>%
    
    complete(institution_name, sports) %>% 
    
    fill(institution_name, .direction = "down") %>%
    
    replace_na(list(rev_women = 0, rev_men = 0, total_rev_menwomen = 0))
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `rev_women = as.numeric(rev_women)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
NCAA_final
## # A tibble: 10 × 5
##    institution_name     sports              rev_women rev_men total_rev_menwomen
##    <chr>                <chr>                   <dbl>   <dbl> <chr>             
##  1 University of Denver Basketball            1729128 3361111 5090239           
##  2 University of Denver Golf                   617182  604826 1222008           
##  3 University of Denver Gymnastics            1388115       0 1388115           
##  4 University of Denver Ice Hockey                  0 3021672 3021672           
##  5 University of Denver Lacrosse              1271011 2051197 3322208           
##  6 University of Denver Skiing                 812527  812526 1625053           
##  7 University of Denver Soccer                1356455 1161051 2517506           
##  8 University of Denver Swimming and Diving   1122277  964444 2086721           
##  9 University of Denver Tennis                 796467  626981 1423448           
## 10 University of Denver Volleyball            1349643       0 1349643