OSCAR WINS

library(stringr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.2
oscar<-read.table('https://raw.githubusercontent.com/pm0kjp/IS607_Project3/master/data/Combo_Oscar.csv',header=TRUE,sep=",",quote="\"",stringsAsFactors = FALSE)

oscar<-data.frame(oscar)

oscar[is.na(oscar)] <- ""

#CHART 1: BP Overlaps
BP_BE<-str_trim(oscar$Best_Picture)==str_trim(oscar$Best_Editing)
BP_BD<-str_trim(oscar$Best_Picture)==str_trim(oscar$Best_Directing)
BP_BA<-str_trim(oscar$Best_Picture)==str_trim(oscar$Best_Actor)
BP_BSA<-str_trim(oscar$Best_Picture)==str_trim(oscar$Best_Supporting_Actor)
BP_BAC<-str_trim(oscar$Best_Picture)==str_trim(oscar$Best_Actress)
BP_BSAC<-str_trim(oscar$Best_Picture)==str_trim(oscar$Best_Supporting_Actress)
BP_BS<-str_trim(oscar$Best_Picture)==str_trim(oscar$Best_Sound)
BP_BSG<-str_trim(oscar$Best_Picture)==str_trim(oscar$Best_Song)

df<-data.frame(BP_BE,BP_BD,BP_BA,BP_BSA,BP_BAC,BP_BSAC,BP_BS,BP_BSG)  #Create dataframe of BP Overlaps

count_doubles<-data.frame(names(df),colSums(df))   #Count BP overlaps.
names(count_doubles)<-c("Overlap","Count")   #Rename column headings.

ggplot(data=count_doubles,aes(x=Overlap,y=Count,fill=Overlap))+geom_bar(stat='identity')+ggtitle('Best Picture Overlaps Most with Best Director, \nFollowed by Best Editor and Best Actor.')+scale_fill_discrete(name="Best Picture \nOverlaps With...",breaks=c("BP_BA","BP_BAC","BP_BD","BP_BE","BP_BS","BP_BSA","BP_BSAC","BP_BSG"),labels=c("Best Actor","Best Actress","Best Directing","Best Editing","Best Sound","Best Supporting Actor","Best Supporting Actress","Best Song"))+theme(legend.position="right", axis.text.x=element_text(angle=90, hjust=1))

#CHART 2: BE Overlaps
BE_BP<-str_trim(oscar$Best_Editing)==str_trim(oscar$Best_Picture)
BE_BD<-str_trim(oscar$Best_Editing)==str_trim(oscar$Best_Directing)
BE_BA<-str_trim(oscar$Best_Editing)==str_trim(oscar$Best_Actor)
BE_BSA<-str_trim(oscar$Best_Editing)==str_trim(oscar$Best_Supporting_Actor)
BE_BAC<-str_trim(oscar$Best_Editing)==str_trim(oscar$Best_Actress)
BE_BSAC<-str_trim(oscar$Best_Editing)==str_trim(oscar$Best_Supporting_Actress)
BE_BS<-str_trim(oscar$Best_Editing)==str_trim(oscar$Best_Sound)
BE_BSG<-str_trim(oscar$Best_Editing)==str_trim(oscar$Best_Song)

df2<-data.frame(BE_BP,BE_BD,BE_BA,BE_BSA,BE_BAC,BE_BSAC,BE_BS,BE_BSG)    #Create dataframe of BE Overlaps

count_doubles2<-data.frame(names(df2),colSums(df2))  #Count BE overlaps.
names(count_doubles2)<-c("Overlap","Count")     #Rename column headings.

ggplot(data = count_doubles2,aes(x=Overlap,y=Count,fill=Overlap))+geom_bar(stat = 'identity')+ggtitle('Best Editor Overlaps Most with Best Director, \nFollowed by Best Picture and Best Sound')+scale_fill_discrete(name="Best Editor \nOverlaps With...",breaks=c("BE_BA","BE_BAC","BE_BD","BE_BP","BE_BS","BE_BSA","BE_BSAC","BE_BSG"),labels=c("Best Actor","Best Actress","Best Directing","Best Picture","Best Sound","Best Supporting Actor","Best Supporting Actress","Best Song"))+theme(legend.position="right", axis.text.x=element_text(angle=90, hjust=1))

#CHART 3: BP Total Wins
BP_totalwins<-data.frame(oscar$Year,rowSums(df))   #Count Total Wins.
names(BP_totalwins)<-c("Year","Wins")      #Rename column headings.

BP_totalwins$Wins<-BP_totalwins$Wins+1  #Adds back BP.
mean(BP_totalwins$Wins)
## [1] 3.345679
ggplot(data = BP_totalwins,aes(x=Year,y=Wins))+geom_bar(stat='identity')+ggtitle('Total Awards Won By The Best Picture; \nAverages 3.3 Awards Per Year; Is Best Editing one of them?')

#CHART 4: Distribution of BP Total Wins

ggplot(BP_totalwins, aes(Wins)) + geom_histogram(fill="black", color="white",binwidth = .88) + theme_bw()+ggtitle('Distribution of Total Awards Won By The Best Picture; \nLooks Normal.')

OSCAR NOMINATIONS

library(dplyr)
## Warning: package 'dplyr' was built under R version 3.2.2
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

oscar_noms<-read.csv('https://raw.githubusercontent.com/pm0kjp/IS607_Project3/master/data/tidyoscarnoms.csv',header=TRUE,sep=",")

#NOTE: LISTS ALL NOMINATED MOVIES; 19 CATEGORIES; 0=NOT NOMINATED, 1=WON OSCAR, 2=LOST OSCAR

#CHART 5: BP Win vs. Nomination (win or lose)
BP_LACTOR <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & actor.in.a.leading.role>0)
BP_SACTOR <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & actor.in.a.supporting.role>0)
BP_LACTRESS <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & actress.in.a.leading.role>0)
BP_SACTRESS <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & actress.in.a.supporting.role>0)
BP_ART <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & art.direction>0)
BP_CINEMA <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & cinematography>0)
BP_COSTUME <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & costume.design>0)
BP_DIRECTING <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & directing>0)
BP_FILMEDITING <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & film.editing>0)
BP_MAKEUP <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & makeup>0)
BP_MUSICSCORE <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & music..score.>0)
BP_MUSICSONG <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & music..song.>0)
BP_SOUNDEDITING <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & sound.editing>0)
BP_SOUNDMIXING <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & sound.mixing>0)
BP_VISUALEFF <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & visual.effects>0)
BP_WRTG <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & writing>0)
BP_WRTGADPTSPLAY <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & writing..adapted.screenplay.>0)
BP_WRTGORIGSPLAY <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & writing..original.screenplay.>0)

overlap_counts<-data.frame(c('BP_LACTOR','BP_SACTOR','BP_LACTRESS','BP_SACTRESS','BP_ART',
                             'BP_CINEMA','BP_COSTUME','BP_DIRECTING','BP_FILMEDITING','BP_MAKEUP',
                             'BP_MUSICSCORE','BP_MUSICSONG','BP_SOUNDEDITING','BP_SOUNDMIXING','BP_VISUALEFF',
                             'BP_WRTG','BP_WRTGADPTSPLAY','BP_WRTGORIGSPLAY'),
                           c(nrow(BP_LACTOR),nrow(BP_SACTOR),nrow(BP_LACTRESS),nrow(BP_SACTRESS),nrow(BP_ART),
                             nrow(BP_CINEMA),nrow(BP_COSTUME),nrow(BP_DIRECTING),nrow(BP_FILMEDITING),
                             nrow(BP_MAKEUP),nrow(BP_MUSICSCORE),nrow(BP_MUSICSONG),nrow(BP_SOUNDEDITING),
                             nrow(BP_SOUNDMIXING),nrow(BP_VISUALEFF),nrow(BP_WRTG),nrow(BP_WRTGADPTSPLAY),
                             nrow(BP_WRTGORIGSPLAY)))

names(overlap_counts)<-c('Overlap','Counts')   #Rename column headings.

ggplot(data=overlap_counts,aes(x=Overlap,y=Counts,fill=Overlap))+geom_bar(stat = 'identity')+ggtitle('Best Picture Overlaps Most with Nominations \nfor Directing, Writing and Film Editing')+theme(legend.position="none", axis.text.x = element_text(angle = 90, hjust = 1))