Code will be revealed after the submission deadline.
#Load packages and import data
library(ggplot2)
library(dplyr)
library(lubridate)
library(readr)
october=read.csv('october_psichiR_movies.csv')
octBudget=october %>%
subset(!is.na(Budget))
octRunTimeNA =octBudget%>%
subset(!is.na(RunTime))
oct_descriptive= summary(octRunTimeNA$RunTime)
print(oct_descriptive)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 45.0 82.0 90.0 90.3 96.0 170.0
sd(oct_descriptive)
## [1] 40.84176
range(oct_descriptive)
## [1] 45 170
#Mean = 90.3, STD = 40.84176, Median = 90, Range = 45 to 170
oct_budget_description= summary(octRunTimeNA$Budget)
print(oct_budget_description)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 100 40000 500000 3828124 3000000 70000000
sd(oct_budget_description)
## [1] 28022369
range(oct_budget_description)
## [1] 100 70000000
#Mean = 3828124, STD = 28022369, Median = 500000, Range = 100 to 70000000
oct_review_description = summary(octRunTimeNA$Review)
print(oct_review_description)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 3.900 4.900 4.955 6.000 9.600 20
sd(oct_review_description)
## [1] 6.200773
range(oct_review_description)
## [1] 1 20
#Mean = 4.955, STD = 6.200773, Median = 4.9, Range = 1 - 20
#Transform dates
octRunTimeNA$Date=mdy(octRunTimeNA$Release_Date)
octRunTimeNA=octRunTimeNA %>%
select(Release_Date,Date,everything())
range(octRunTimeNA$Date) #Shows 1905 to 2017
## [1] "1905-07-04" "2017-10-31"
octRunTimeNA %>%
ggplot(aes(y=Budget,x=Date))+
geom_line(col='darkgreen')+
geom_point()+
labs(title='Movie Budget Over the Years',subtitle="From 1905 to 2017",y=' ',x='Years')+
theme_bw()+
theme(plot.title = element_text(hjust=.5),plot.subtitle = element_text(hjust=.5))+
scale_y_continuous(labels=scales::dollar_format())+
expand_limits(y=80000000)
shapiro.test(octRunTimeNA$Review)
##
## Shapiro-Wilk normality test
##
## data: octRunTimeNA$Review
## W = 0.99551, p-value = 0.03097
shapiro.test(octRunTimeNA$Budget)
##
## Shapiro-Wilk normality test
##
## data: octRunTimeNA$Budget
## W = 0.42656, p-value < 2.2e-16
shapiro.test(octRunTimeNA$RunTime)
##
## Shapiro-Wilk normality test
##
## data: octRunTimeNA$RunTime
## W = 0.93213, p-value < 2.2e-16
cor.test(octRunTimeNA$Review,octRunTimeNA$Budget,method='spearman')
##
## Spearman's rank correlation rho
##
## data: octRunTimeNA$Review and octRunTimeNA$Budget
## S = 65296279, p-value = 0.5632
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.02132607
#p-value = 0.5632, rho 0.02132607
cor.test(octRunTimeNA$Review,octRunTimeNA$RunTime,method='spearman')
##
## Spearman's rank correlation rho
##
## data: octRunTimeNA$Review and octRunTimeNA$RunTime
## S = 53524003, p-value = 0.00000006181
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.1977713
#p-value = 0.00000006181, rho 0.1977713
cor.test(octRunTimeNA$Budget,octRunTimeNA$RunTime,method='spearman')
##
## Spearman's rank correlation rho
##
## data: octRunTimeNA$Budget and octRunTimeNA$RunTime
## S = 48588830, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.3279512
#p-value < 2.2e-16, rho 0.3279512