R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
movies <- read.csv('C:/Users/Prasad/Downloads/Book1 half.csv')
str(movies)
## 'data.frame':    399 obs. of  12 variables:
##  $ names       : chr  "Creed III" "Avatar: The Way of Water" "The Super Mario Bros. Movie" "Mummies" ...
##  $ Release.Date: chr  "03-02-2023" "12/15/2022 " "04-05-2023" "01-05-2023" ...
##  $ score       : int  73 78 76 70 61 66 80 83 59 58 ...
##  $ genre       : chr  "Drama,?\xffAction" "Science Fiction,?\xffAdventure,?\xffAction" "Animation,?\xffAdventure,?\xffFamily,?\xffFantasy,?\xffComedy" "Animation,?\xffComedy,?\xffFamily,?\xffAdventure,?\xffFantasy" ...
##  $ overview    : chr  "After dominating the boxing world, Adonis Creed has been thriving in both his career and family life. When a childhood friend a "Set more than a decade after the events of the first film, learn the story of the Sully family (Jake, Neytiri, and their kids), "While working underground to fix a water main, Brooklyn plumbers\x83??and brothers\x83??Mario and Luigi are transported down a  "Through a series of unfortunate events, three mummies end up in present-day London and embark on a wacky and hilarious journey  ...
##  $ crew        : chr  "Michael B. Jordan, Adonis Creed, Tessa Thompson, Bianca Taylor, Jonathan Majors, Damien Anderson, Wood Harris, Tony 'Little Duk "Sam Worthington, Jake Sully, Zoe Salda?\xf1a, Neytiri, Sigourney Weaver, Kiri / Dr. Grace Augustine, Stephen Lang, Colonel Mile "Chris Pratt, Mario (voice), Anya Taylor-Joy, Princess Peach (voice), Charlie Day, Luigi (voice), Jack Black, Bowser (voice), Ke "??scar Barber?\xadn, Thut (voice), Ana Esther Alborg, Nefer (voice), Luis P??rez Reina, Carnaby (voice), Mar??a Luisa Sol?\xad, ...
##  $ orig_title  : chr  "Creed III" "Avatar: The Way of Water" "The Super Mario Bros. Movie" " Momias" ...
##  $ status      : chr  " Released" " Released" " Released" " Released" ...
##  $ orig_lang   : chr  " English" " English" " English" " Spanish, Castilian" ...
##  $ budget_x    : num  7.50e+07 4.60e+08 1.00e+08 1.23e+07 7.70e+07 ...
##  $ revenue     : num  2.72e+08 2.32e+09 7.24e+08 3.42e+07 3.41e+08 ...
##  $ country     : chr  "AU" "AU" "AU" "AU" ...
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.4
## ✔ ggplot2   3.4.3     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(ggpubr)

movies <- movies %>%
  mutate(profit = revenue - budget_x)

ggplot(data = movies, mapping = aes(x = budget_x, y = revenue)) + 
  geom_point() +
  geom_smooth(method="lm") +
  labs(title="Budget vs Revenue",
       x="Budget",
       y="Revenue")
## `geom_smooth()` using formula = 'y ~ x'

cor(movies$budget_x, movies$revenue)       
## [1] 0.6679224
ggplot(data = movies, mapping = aes(x = budget_x, y = profit)) + 
  geom_point() +
  geom_smooth(method="lm") +
  labs(title="Budget vs Profit",
       x="Budget",
       y="Profit")
## `geom_smooth()` using formula = 'y ~ x'

cor(movies$budget_x, movies$profit)       
## [1] 0.5489757
movies <- movies %>%
  mutate(Release.Date = as.Date(Release.Date, "%m-%d-%Y"))

ggplot(data = movies, mapping = aes(x = Release.Date, y = revenue)) +
  geom_point() +
  labs(title="Release Date vs Revenue",
       x="Release Date",
       y="Revenue")
## Warning: Removed 247 rows containing missing values (`geom_point()`).

revenue_mean <- mean(movies$revenue)
revenue_mean
## [1] 406100085
revenue_sd <- sd(movies$revenue)
revenue_sd
## [1] 400131204
margin_error <- qnorm(0.975) * (revenue_sd / sqrt(nrow(movies)))
margin_error
## [1] 39261245
lower <- revenue_mean - margin_error 
upper <- revenue_mean + margin_error

paste0("95% CI: [",lower, ", ", upper,"]")
## [1] "95% CI: [366838840.735446, 445361330.243]"