# Load package
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl) # for importing excel files
# excel file
Movies <- read_excel("../00_data/MyData.xlsx")
## New names:
## • `` -> `...1`
Movies
## # A tibble: 3,401 × 9
## ...1 release_date movie production_budget domestic_gross worldwide_gross
## <dbl> <chr> <chr> <dbl> <dbl> <dbl>
## 1 1 6/22/2007 Evan Alm… 175000000 100289690 174131329
## 2 2 7/28/1995 Waterwor… 175000000 88246220 264246220
## 3 3 5/12/2017 King Art… 175000000 39175066 139950708
## 4 4 12/25/2013 47 Ronin 175000000 38362475 151716815
## 5 5 6/22/2018 Jurassic… 170000000 416769345 1304866322
## 6 6 8/1/2014 Guardian… 170000000 333172112 771051335
## 7 7 5/7/2010 Iron Man… 170000000 312433331 621156389
## 8 8 4/4/2014 Captain … 170000000 259746958 714401889
## 9 9 7/11/2014 Dawn of … 170000000 208545589 710644566
## 10 10 11/10/2004 The Pola… 170000000 186493587 310634169
## # ℹ 3,391 more rows
## # ℹ 3 more variables: distributor <chr>, mpaa_rating <chr>, genre <chr>
set.seed(1234)
Small_Movie <- Movies %>%
select(genre, worldwide_gross, domestic_gross) %>%
sample_n(5)
Small_Movie
## # A tibble: 5 × 3
## genre worldwide_gross domestic_gross
## <chr> <dbl> <dbl>
## 1 Drama 407100 343706
## 2 Comedy 351416 351416
## 3 Drama 7785229 1445366
## 4 Drama 20278055 20278055
## 5 Adventure 80767884 26483452
#longer
Small_Movie %>%
pivot_longer(c(`worldwide_gross`, `domestic_gross`), names_to = "gross", values_to = "amount")
## # A tibble: 10 × 3
## genre gross amount
## <chr> <chr> <dbl>
## 1 Drama worldwide_gross 407100
## 2 Drama domestic_gross 343706
## 3 Comedy worldwide_gross 351416
## 4 Comedy domestic_gross 351416
## 5 Drama worldwide_gross 7785229
## 6 Drama domestic_gross 1445366
## 7 Drama worldwide_gross 20278055
## 8 Drama domestic_gross 20278055
## 9 Adventure worldwide_gross 80767884
## 10 Adventure domestic_gross 26483452
#wider
Small_Movie %>%
pivot_wider(names_from = genre, values_from = worldwide_gross)
## # A tibble: 5 × 4
## domestic_gross Drama Comedy Adventure
## <dbl> <dbl> <dbl> <dbl>
## 1 343706 407100 NA NA
## 2 351416 NA 351416 NA
## 3 1445366 7785229 NA NA
## 4 20278055 20278055 NA NA
## 5 26483452 NA NA 80767884
#separate
Small_Movie %>%
separate(worldwide_gross, into = c("worldwide_gross", "nothing"))
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 5 rows [1, 2, 3,
## 4, 5].
## # A tibble: 5 × 4
## genre worldwide_gross nothing domestic_gross
## <chr> <chr> <chr> <dbl>
## 1 Drama 407100 <NA> 343706
## 2 Comedy 351416 <NA> 351416
## 3 Drama 7785229 <NA> 1445366
## 4 Drama 20278055 <NA> 20278055
## 5 Adventure 80767884 <NA> 26483452
Small_Movie %>% separate(worldwide_gross, into = c("worldwide_gross_thousands", "worldwide_gross_ones"), sep = -3)
## # A tibble: 5 × 4
## genre worldwide_gross_thousands worldwide_gross_ones domestic_gross
## <chr> <chr> <chr> <dbl>
## 1 Drama 407 100 343706
## 2 Comedy 351 416 351416
## 3 Drama 7785 229 1445366
## 4 Drama 20278 055 20278055
## 5 Adventure 80767 884 26483452
#unite
Small_Movie %>%
unite(worldwide_gross, domestic_gross)
## # A tibble: 5 × 2
## genre worldwide_gross
## <chr> <chr>
## 1 Drama 343706
## 2 Comedy 351416
## 3 Drama 1445366
## 4 Drama 20278055
## 5 Adventure 26483452
Small_Movie %>%
unite(worldwide_gross, domestic_gross, col = "total_gross", sep = "+")
## # A tibble: 5 × 2
## genre total_gross
## <chr> <chr>
## 1 Drama 407100+343706
## 2 Comedy 351416+351416
## 3 Drama 7785229+1445366
## 4 Drama 20278055+20278055
## 5 Adventure 80767884+26483452