# Load package
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl) # for importing excel files
# excel file
Movies <- read_excel("../00_data/MyData.xlsx")
## New names:
## • `` -> `...1`
Movies
## # A tibble: 3,401 × 9
##     ...1 release_date movie     production_budget domestic_gross worldwide_gross
##    <dbl> <chr>        <chr>                 <dbl>          <dbl>           <dbl>
##  1     1 6/22/2007    Evan Alm…         175000000      100289690       174131329
##  2     2 7/28/1995    Waterwor…         175000000       88246220       264246220
##  3     3 5/12/2017    King Art…         175000000       39175066       139950708
##  4     4 12/25/2013   47 Ronin          175000000       38362475       151716815
##  5     5 6/22/2018    Jurassic…         170000000      416769345      1304866322
##  6     6 8/1/2014     Guardian…         170000000      333172112       771051335
##  7     7 5/7/2010     Iron Man…         170000000      312433331       621156389
##  8     8 4/4/2014     Captain …         170000000      259746958       714401889
##  9     9 7/11/2014    Dawn of …         170000000      208545589       710644566
## 10    10 11/10/2004   The Pola…         170000000      186493587       310634169
## # ℹ 3,391 more rows
## # ℹ 3 more variables: distributor <chr>, mpaa_rating <chr>, genre <chr>
set.seed(1234)

Small_Movie <- Movies %>%
    select(genre, worldwide_gross, domestic_gross) %>%
    sample_n(5)
Small_Movie
## # A tibble: 5 × 3
##   genre     worldwide_gross domestic_gross
##   <chr>               <dbl>          <dbl>
## 1 Drama              407100         343706
## 2 Comedy             351416         351416
## 3 Drama             7785229        1445366
## 4 Drama            20278055       20278055
## 5 Adventure        80767884       26483452

#longer

Small_Movie %>% 
  pivot_longer(c(`worldwide_gross`, `domestic_gross`), names_to = "gross", values_to = "amount")
## # A tibble: 10 × 3
##    genre     gross             amount
##    <chr>     <chr>              <dbl>
##  1 Drama     worldwide_gross   407100
##  2 Drama     domestic_gross    343706
##  3 Comedy    worldwide_gross   351416
##  4 Comedy    domestic_gross    351416
##  5 Drama     worldwide_gross  7785229
##  6 Drama     domestic_gross   1445366
##  7 Drama     worldwide_gross 20278055
##  8 Drama     domestic_gross  20278055
##  9 Adventure worldwide_gross 80767884
## 10 Adventure domestic_gross  26483452

#wider

Small_Movie %>%
    pivot_wider(names_from = genre, values_from = worldwide_gross)
## # A tibble: 5 × 4
##   domestic_gross    Drama Comedy Adventure
##            <dbl>    <dbl>  <dbl>     <dbl>
## 1         343706   407100     NA        NA
## 2         351416       NA 351416        NA
## 3        1445366  7785229     NA        NA
## 4       20278055 20278055     NA        NA
## 5       26483452       NA     NA  80767884

#separate

Small_Movie %>% 
  separate(worldwide_gross, into = c("worldwide_gross", "nothing"))
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 5 rows [1, 2, 3,
## 4, 5].
## # A tibble: 5 × 4
##   genre     worldwide_gross nothing domestic_gross
##   <chr>     <chr>           <chr>            <dbl>
## 1 Drama     407100          <NA>            343706
## 2 Comedy    351416          <NA>            351416
## 3 Drama     7785229         <NA>           1445366
## 4 Drama     20278055        <NA>          20278055
## 5 Adventure 80767884        <NA>          26483452
Small_Movie %>% separate(worldwide_gross, into = c("worldwide_gross_thousands", "worldwide_gross_ones"), sep = -3)
## # A tibble: 5 × 4
##   genre     worldwide_gross_thousands worldwide_gross_ones domestic_gross
##   <chr>     <chr>                     <chr>                         <dbl>
## 1 Drama     407                       100                          343706
## 2 Comedy    351                       416                          351416
## 3 Drama     7785                      229                         1445366
## 4 Drama     20278                     055                        20278055
## 5 Adventure 80767                     884                        26483452

#unite

Small_Movie %>% 
  unite(worldwide_gross, domestic_gross)
## # A tibble: 5 × 2
##   genre     worldwide_gross
##   <chr>     <chr>          
## 1 Drama     343706         
## 2 Comedy    351416         
## 3 Drama     1445366        
## 4 Drama     20278055       
## 5 Adventure 26483452
Small_Movie %>%
    unite(worldwide_gross, domestic_gross, col = "total_gross", sep = "+")
## # A tibble: 5 × 2
##   genre     total_gross      
##   <chr>     <chr>            
## 1 Drama     407100+343706    
## 2 Comedy    351416+351416    
## 3 Drama     7785229+1445366  
## 4 Drama     20278055+20278055
## 5 Adventure 80767884+26483452