# excel file
data <- read_excel("../00_data/MyData.xlsx")
data
## # A tibble: 3,401 × 9
## ...1 release_date movie production_budget domestic_gross worldwide_gross
## <dbl> <chr> <chr> <dbl> <dbl> <dbl>
## 1 1 6/22/2007 Evan Alm… 175000000 100289690 174131329
## 2 2 7/28/1995 Waterwor… 175000000 88246220 264246220
## 3 3 5/12/2017 King Art… 175000000 39175066 139950708
## 4 4 12/25/2013 47 Ronin 175000000 38362475 151716815
## 5 5 6/22/2018 Jurassic… 170000000 416769345 1304866322
## 6 6 8/1/2014 Guardian… 170000000 333172112 771051335
## 7 7 5/7/2010 Iron Man… 170000000 312433331 621156389
## 8 8 4/4/2014 Captain … 170000000 259746958 714401889
## 9 9 7/11/2014 Dawn of … 170000000 208545589 710644566
## 10 10 11/10/2004 The Pola… 170000000 186493587 310634169
## # ℹ 3,391 more rows
## # ℹ 3 more variables: distributor <chr>, mpaa_rating <chr>, genre <chr>
The higher the production budget, the higher the movie profits.
# excel file
ggplot(data = data) +
geom_point(alpha = .2, mapping = aes(x = production_budget, y = domestic_gross))
ggplot(data = data) +
geom_point(mapping = aes(x = production_budget, y = domestic_gross), alpha = 0.3) +
facet_wrap(~ genre, nrow = 2)
ggplot(data = data) +
geom_smooth(mapping = aes(x = production_budget, y = domestic_gross))
There seems to be evidence that as the production budget increases, so does the domestic gross of the movie.