Import your data

# excel file
NCAA <- read_excel("../00_data/myData_charts.xls")
## New names:
## • `` -> `...1`
NCAA
## # A tibble: 16,383 × 29
##     ...1  year unitid institution_name         city_txt state_cd zip_text
##    <dbl> <dbl>  <dbl> <chr>                    <chr>    <chr>       <dbl>
##  1     1  2015 100654 Alabama A & M University Normal   AL          35762
##  2     2  2015 100654 Alabama A & M University Normal   AL          35762
##  3     3  2015 100654 Alabama A & M University Normal   AL          35762
##  4     4  2015 100654 Alabama A & M University Normal   AL          35762
##  5     5  2015 100654 Alabama A & M University Normal   AL          35762
##  6     6  2015 100654 Alabama A & M University Normal   AL          35762
##  7     7  2015 100654 Alabama A & M University Normal   AL          35762
##  8     8  2015 100654 Alabama A & M University Normal   AL          35762
##  9     9  2015 100654 Alabama A & M University Normal   AL          35762
## 10    10  2015 100654 Alabama A & M University Normal   AL          35762
## # ℹ 16,373 more rows
## # ℹ 22 more variables: classification_code <dbl>, classification_name <chr>,
## #   classification_other <chr>, ef_male_count <dbl>, ef_female_count <dbl>,
## #   ef_total_count <dbl>, sector_cd <dbl>, sector_name <chr>, sportscode <dbl>,
## #   partic_men <chr>, partic_women <chr>, partic_coed_men <chr>,
## #   partic_coed_women <chr>, sum_partic_men <dbl>, sum_partic_women <dbl>,
## #   rev_men <chr>, rev_women <chr>, total_rev_menwomen <chr>, exp_men <chr>, …

Pivoting

NCAA_small <- NCAA %>%
    
    select(institution_name, sports, rev_women, sum_partic_men, sum_partic_women) %>%
    
    filter(institution_name %in% c("University of Denver"))

long to wide form

NCAA_long <- NCAA_small %>%
    
    pivot_longer(cols = c(sum_partic_men, sum_partic_women),
               names_to = "gender",
               values_to = "participation_count")
NCAA_long
## # A tibble: 20 × 5
##    institution_name     sports              rev_women gender participation_count
##    <chr>                <chr>               <chr>     <chr>                <dbl>
##  1 University of Denver Basketball          1729128   sum_p…                  17
##  2 University of Denver Basketball          1729128   sum_p…                  18
##  3 University of Denver Golf                617182    sum_p…                   7
##  4 University of Denver Golf                617182    sum_p…                   7
##  5 University of Denver Gymnastics          1388115   sum_p…                   0
##  6 University of Denver Gymnastics          1388115   sum_p…                  14
##  7 University of Denver Ice Hockey          NA        sum_p…                  27
##  8 University of Denver Ice Hockey          NA        sum_p…                   0
##  9 University of Denver Lacrosse            1271011   sum_p…                  51
## 10 University of Denver Lacrosse            1271011   sum_p…                  32
## 11 University of Denver Skiing              812527    sum_p…                   9
## 12 University of Denver Skiing              812527    sum_p…                  14
## 13 University of Denver Soccer              1356455   sum_p…                  25
## 14 University of Denver Soccer              1356455   sum_p…                  24
## 15 University of Denver Swimming and Diving 1122277   sum_p…                  28
## 16 University of Denver Swimming and Diving 1122277   sum_p…                  27
## 17 University of Denver Tennis              796467    sum_p…                  10
## 18 University of Denver Tennis              796467    sum_p…                   8
## 19 University of Denver Volleyball          1349643   sum_p…                   0
## 20 University of Denver Volleyball          1349643   sum_p…                  15

wide to long form

NCAA_wide <- NCAA_small %>%
    
    pivot_wider(names_from = sports,
                values_from = rev_women)

NCAA_wide
## # A tibble: 10 × 13
##    institution_name  sum_partic_men sum_partic_women Basketball Golf  Gymnastics
##    <chr>                      <dbl>            <dbl> <chr>      <chr> <chr>     
##  1 University of De…             17               18 1729128    <NA>  <NA>      
##  2 University of De…              7                7 <NA>       6171… <NA>      
##  3 University of De…              0               14 <NA>       <NA>  1388115   
##  4 University of De…             27                0 <NA>       <NA>  <NA>      
##  5 University of De…             51               32 <NA>       <NA>  <NA>      
##  6 University of De…              9               14 <NA>       <NA>  <NA>      
##  7 University of De…             25               24 <NA>       <NA>  <NA>      
##  8 University of De…             28               27 <NA>       <NA>  <NA>      
##  9 University of De…             10                8 <NA>       <NA>  <NA>      
## 10 University of De…              0               15 <NA>       <NA>  <NA>      
## # ℹ 7 more variables: `Ice Hockey` <chr>, Lacrosse <chr>, Skiing <chr>,
## #   Soccer <chr>, `Swimming and Diving` <chr>, Tennis <chr>, Volleyball <chr>

Separating and Uniting

NCAA_UD <- NCAA %>%
    
    select(institution_name, sports, rev_women, rev_men, total_rev_menwomen) %>%
    
    filter(institution_name %in% c("University of Denver"))

Separate a column

NCAA_1 <- NCAA_UD %>%
    
    separate(col = total_rev_menwomen, 
             into = c("total_rev_women", "total_rev_men"))
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 10 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10].
NCAA_1
## # A tibble: 10 × 6
##    institution_name     sports   rev_women rev_men total_rev_women total_rev_men
##    <chr>                <chr>    <chr>     <chr>   <chr>           <chr>        
##  1 University of Denver Basketb… 1729128   3361111 5090239         <NA>         
##  2 University of Denver Golf     617182    604826  1222008         <NA>         
##  3 University of Denver Gymnast… 1388115   NA      1388115         <NA>         
##  4 University of Denver Ice Hoc… NA        3021672 3021672         <NA>         
##  5 University of Denver Lacrosse 1271011   2051197 3322208         <NA>         
##  6 University of Denver Skiing   812527    812526  1625053         <NA>         
##  7 University of Denver Soccer   1356455   1161051 2517506         <NA>         
##  8 University of Denver Swimmin… 1122277   964444  2086721         <NA>         
##  9 University of Denver Tennis   796467    626981  1423448         <NA>         
## 10 University of Denver Volleyb… 1349643   NA      1349643         <NA>

Unite two columns

NCAA_1 %>%
    
    unite(col = "Sport Institution", c(institution_name,sports), sep = ":", )
## # A tibble: 10 × 5
##    `Sport Institution`           rev_women rev_men total_rev_women total_rev_men
##    <chr>                         <chr>     <chr>   <chr>           <chr>        
##  1 University of Denver:Basketb… 1729128   3361111 5090239         <NA>         
##  2 University of Denver:Golf     617182    604826  1222008         <NA>         
##  3 University of Denver:Gymnast… 1388115   NA      1388115         <NA>         
##  4 University of Denver:Ice Hoc… NA        3021672 3021672         <NA>         
##  5 University of Denver:Lacrosse 1271011   2051197 3322208         <NA>         
##  6 University of Denver:Skiing   812527    812526  1625053         <NA>         
##  7 University of Denver:Soccer   1356455   1161051 2517506         <NA>         
##  8 University of Denver:Swimmin… 1122277   964444  2086721         <NA>         
##  9 University of Denver:Tennis   796467    626981  1423448         <NA>         
## 10 University of Denver:Volleyb… 1349643   NA      1349643         <NA>

Missing Values

NCAA_final <- NCAA_UD %>%
    
    mutate(rev_women = as.numeric(rev_women), rev_men = as.numeric(rev_men)) %>%
    
    complete(institution_name, sports) %>% 
    
    fill(institution_name, .direction = "down") %>%
    
    replace_na(list(rev_women = 0, rev_men = 0, total_rev_menwomen = 0))
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `rev_women = as.numeric(rev_women)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
NCAA_final
## # A tibble: 10 × 5
##    institution_name     sports              rev_women rev_men total_rev_menwomen
##    <chr>                <chr>                   <dbl>   <dbl> <chr>             
##  1 University of Denver Basketball            1729128 3361111 5090239           
##  2 University of Denver Golf                   617182  604826 1222008           
##  3 University of Denver Gymnastics            1388115       0 1388115           
##  4 University of Denver Ice Hockey                  0 3021672 3021672           
##  5 University of Denver Lacrosse              1271011 2051197 3322208           
##  6 University of Denver Skiing                 812527  812526 1625053           
##  7 University of Denver Soccer                1356455 1161051 2517506           
##  8 University of Denver Swimming and Diving   1122277  964444 2086721           
##  9 University of Denver Tennis                 796467  626981 1423448           
## 10 University of Denver Volleyball            1349643       0 1349643