This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
movies <- read.csv('C:/Users/Prasad/Downloads/Book1 half.csv')
str(movies)
## 'data.frame': 399 obs. of 12 variables:
## $ names : chr "Creed III" "Avatar: The Way of Water" "The Super Mario Bros. Movie" "Mummies" ...
## $ Release.Date: chr "03-02-2023" "12/15/2022 " "04-05-2023" "01-05-2023" ...
## $ score : int 73 78 76 70 61 66 80 83 59 58 ...
## $ genre : chr "Drama,?\xffAction" "Science Fiction,?\xffAdventure,?\xffAction" "Animation,?\xffAdventure,?\xffFamily,?\xffFantasy,?\xffComedy" "Animation,?\xffComedy,?\xffFamily,?\xffAdventure,?\xffFantasy" ...
## $ overview : chr "After dominating the boxing world, Adonis Creed has been thriving in both his career and family life. When a childhood friend a "Set more than a decade after the events of the first film, learn the story of the Sully family (Jake, Neytiri, and their kids), "While working underground to fix a water main, Brooklyn plumbers\x83??and brothers\x83??Mario and Luigi are transported down a "Through a series of unfortunate events, three mummies end up in present-day London and embark on a wacky and hilarious journey ...
## $ crew : chr "Michael B. Jordan, Adonis Creed, Tessa Thompson, Bianca Taylor, Jonathan Majors, Damien Anderson, Wood Harris, Tony 'Little Duk "Sam Worthington, Jake Sully, Zoe Salda?\xf1a, Neytiri, Sigourney Weaver, Kiri / Dr. Grace Augustine, Stephen Lang, Colonel Mile "Chris Pratt, Mario (voice), Anya Taylor-Joy, Princess Peach (voice), Charlie Day, Luigi (voice), Jack Black, Bowser (voice), Ke "??scar Barber?\xadn, Thut (voice), Ana Esther Alborg, Nefer (voice), Luis P??rez Reina, Carnaby (voice), Mar??a Luisa Sol?\xad, ...
## $ orig_title : chr "Creed III" "Avatar: The Way of Water" "The Super Mario Bros. Movie" " Momias" ...
## $ status : chr " Released" " Released" " Released" " Released" ...
## $ orig_lang : chr " English" " English" " English" " Spanish, Castilian" ...
## $ budget_x : num 7.50e+07 4.60e+08 1.00e+08 1.23e+07 7.70e+07 ...
## $ revenue : num 2.72e+08 2.32e+09 7.24e+08 3.42e+07 3.41e+08 ...
## $ country : chr "AU" "AU" "AU" "AU" ...
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ ggplot2 3.4.3 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(ggpubr)
movies <- movies %>%
mutate(profit = revenue - budget_x)
ggplot(data = movies, mapping = aes(x = budget_x, y = revenue)) +
geom_point() +
geom_smooth(method="lm") +
labs(title="Budget vs Revenue",
x="Budget",
y="Revenue")
## `geom_smooth()` using formula = 'y ~ x'
cor(movies$budget_x, movies$revenue)
## [1] 0.6679224
ggplot(data = movies, mapping = aes(x = budget_x, y = profit)) +
geom_point() +
geom_smooth(method="lm") +
labs(title="Budget vs Profit",
x="Budget",
y="Profit")
## `geom_smooth()` using formula = 'y ~ x'
cor(movies$budget_x, movies$profit)
## [1] 0.5489757
movies <- movies %>%
mutate(Release.Date = as.Date(Release.Date, "%m-%d-%Y"))
ggplot(data = movies, mapping = aes(x = Release.Date, y = revenue)) +
geom_point() +
labs(title="Release Date vs Revenue",
x="Release Date",
y="Revenue")
## Warning: Removed 247 rows containing missing values (`geom_point()`).
revenue_mean <- mean(movies$revenue)
revenue_mean
## [1] 406100085
revenue_sd <- sd(movies$revenue)
revenue_sd
## [1] 400131204
margin_error <- qnorm(0.975) * (revenue_sd / sqrt(nrow(movies)))
margin_error
## [1] 39261245
lower <- revenue_mean - margin_error
upper <- revenue_mean + margin_error
paste0("95% CI: [",lower, ", ", upper,"]")
## [1] "95% CI: [366838840.735446, 445361330.243]"