OSCAR WINS
library(stringr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.2
oscar<-read.table('https://raw.githubusercontent.com/pm0kjp/IS607_Project3/master/data/Combo_Oscar.csv',header=TRUE,sep=",",quote="\"",stringsAsFactors = FALSE)
oscar<-data.frame(oscar)
oscar[is.na(oscar)] <- ""
#CHART 1: BP Overlaps
BP_BE<-str_trim(oscar$Best_Picture)==str_trim(oscar$Best_Editing)
BP_BD<-str_trim(oscar$Best_Picture)==str_trim(oscar$Best_Directing)
BP_BA<-str_trim(oscar$Best_Picture)==str_trim(oscar$Best_Actor)
BP_BSA<-str_trim(oscar$Best_Picture)==str_trim(oscar$Best_Supporting_Actor)
BP_BAC<-str_trim(oscar$Best_Picture)==str_trim(oscar$Best_Actress)
BP_BSAC<-str_trim(oscar$Best_Picture)==str_trim(oscar$Best_Supporting_Actress)
BP_BS<-str_trim(oscar$Best_Picture)==str_trim(oscar$Best_Sound)
BP_BSG<-str_trim(oscar$Best_Picture)==str_trim(oscar$Best_Song)
df<-data.frame(BP_BE,BP_BD,BP_BA,BP_BSA,BP_BAC,BP_BSAC,BP_BS,BP_BSG) #Create dataframe of BP Overlaps
count_doubles<-data.frame(names(df),colSums(df)) #Count BP overlaps.
names(count_doubles)<-c("Overlap","Count") #Rename column headings.
ggplot(data=count_doubles,aes(x=Overlap,y=Count,fill=Overlap))+geom_bar(stat='identity')+ggtitle('Best Picture Overlaps Most with Best Director, \nFollowed by Best Editor and Best Actor.')+scale_fill_discrete(name="Best Picture \nOverlaps With...",breaks=c("BP_BA","BP_BAC","BP_BD","BP_BE","BP_BS","BP_BSA","BP_BSAC","BP_BSG"),labels=c("Best Actor","Best Actress","Best Directing","Best Editing","Best Sound","Best Supporting Actor","Best Supporting Actress","Best Song"))+theme(legend.position="right", axis.text.x=element_text(angle=90, hjust=1))

#CHART 2: BE Overlaps
BE_BP<-str_trim(oscar$Best_Editing)==str_trim(oscar$Best_Picture)
BE_BD<-str_trim(oscar$Best_Editing)==str_trim(oscar$Best_Directing)
BE_BA<-str_trim(oscar$Best_Editing)==str_trim(oscar$Best_Actor)
BE_BSA<-str_trim(oscar$Best_Editing)==str_trim(oscar$Best_Supporting_Actor)
BE_BAC<-str_trim(oscar$Best_Editing)==str_trim(oscar$Best_Actress)
BE_BSAC<-str_trim(oscar$Best_Editing)==str_trim(oscar$Best_Supporting_Actress)
BE_BS<-str_trim(oscar$Best_Editing)==str_trim(oscar$Best_Sound)
BE_BSG<-str_trim(oscar$Best_Editing)==str_trim(oscar$Best_Song)
df2<-data.frame(BE_BP,BE_BD,BE_BA,BE_BSA,BE_BAC,BE_BSAC,BE_BS,BE_BSG) #Create dataframe of BE Overlaps
count_doubles2<-data.frame(names(df2),colSums(df2)) #Count BE overlaps.
names(count_doubles2)<-c("Overlap","Count") #Rename column headings.
ggplot(data = count_doubles2,aes(x=Overlap,y=Count,fill=Overlap))+geom_bar(stat = 'identity')+ggtitle('Best Editor Overlaps Most with Best Director, \nFollowed by Best Picture and Best Sound')+scale_fill_discrete(name="Best Editor \nOverlaps With...",breaks=c("BE_BA","BE_BAC","BE_BD","BE_BP","BE_BS","BE_BSA","BE_BSAC","BE_BSG"),labels=c("Best Actor","Best Actress","Best Directing","Best Picture","Best Sound","Best Supporting Actor","Best Supporting Actress","Best Song"))+theme(legend.position="right", axis.text.x=element_text(angle=90, hjust=1))

#CHART 3: BP Total Wins
BP_totalwins<-data.frame(oscar$Year,rowSums(df)) #Count Total Wins.
names(BP_totalwins)<-c("Year","Wins") #Rename column headings.
BP_totalwins$Wins<-BP_totalwins$Wins+1 #Adds back BP.
mean(BP_totalwins$Wins)
## [1] 3.345679
ggplot(data = BP_totalwins,aes(x=Year,y=Wins))+geom_bar(stat='identity')+ggtitle('Total Awards Won By The Best Picture; \nAverages 3.3 Awards Per Year; Is Best Editing one of them?')

#CHART 4: Distribution of BP Total Wins
ggplot(BP_totalwins, aes(Wins)) + geom_histogram(fill="black", color="white",binwidth = .88) + theme_bw()+ggtitle('Distribution of Total Awards Won By The Best Picture; \nLooks Normal.')

OSCAR NOMINATIONS
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.2.2
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
oscar_noms<-read.csv('https://raw.githubusercontent.com/pm0kjp/IS607_Project3/master/data/tidyoscarnoms.csv',header=TRUE,sep=",")
#NOTE: LISTS ALL NOMINATED MOVIES; 19 CATEGORIES; 0=NOT NOMINATED, 1=WON OSCAR, 2=LOST OSCAR
#CHART 5: BP Win vs. Nomination (win or lose)
BP_LACTOR <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & actor.in.a.leading.role>0)
BP_SACTOR <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & actor.in.a.supporting.role>0)
BP_LACTRESS <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & actress.in.a.leading.role>0)
BP_SACTRESS <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & actress.in.a.supporting.role>0)
BP_ART <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & art.direction>0)
BP_CINEMA <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & cinematography>0)
BP_COSTUME <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & costume.design>0)
BP_DIRECTING <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & directing>0)
BP_FILMEDITING <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & film.editing>0)
BP_MAKEUP <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & makeup>0)
BP_MUSICSCORE <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & music..score.>0)
BP_MUSICSONG <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & music..song.>0)
BP_SOUNDEDITING <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & sound.editing>0)
BP_SOUNDMIXING <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & sound.mixing>0)
BP_VISUALEFF <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & visual.effects>0)
BP_WRTG <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & writing>0)
BP_WRTGADPTSPLAY <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & writing..adapted.screenplay.>0)
BP_WRTGORIGSPLAY <- oscar_noms %>% group_by(Year) %>% filter(best.picture==1 & writing..original.screenplay.>0)
overlap_counts<-data.frame(c('BP_LACTOR','BP_SACTOR','BP_LACTRESS','BP_SACTRESS','BP_ART',
'BP_CINEMA','BP_COSTUME','BP_DIRECTING','BP_FILMEDITING','BP_MAKEUP',
'BP_MUSICSCORE','BP_MUSICSONG','BP_SOUNDEDITING','BP_SOUNDMIXING','BP_VISUALEFF',
'BP_WRTG','BP_WRTGADPTSPLAY','BP_WRTGORIGSPLAY'),
c(nrow(BP_LACTOR),nrow(BP_SACTOR),nrow(BP_LACTRESS),nrow(BP_SACTRESS),nrow(BP_ART),
nrow(BP_CINEMA),nrow(BP_COSTUME),nrow(BP_DIRECTING),nrow(BP_FILMEDITING),
nrow(BP_MAKEUP),nrow(BP_MUSICSCORE),nrow(BP_MUSICSONG),nrow(BP_SOUNDEDITING),
nrow(BP_SOUNDMIXING),nrow(BP_VISUALEFF),nrow(BP_WRTG),nrow(BP_WRTGADPTSPLAY),
nrow(BP_WRTGORIGSPLAY)))
names(overlap_counts)<-c('Overlap','Counts') #Rename column headings.
ggplot(data=overlap_counts,aes(x=Overlap,y=Counts,fill=Overlap))+geom_bar(stat = 'identity')+ggtitle('Best Picture Overlaps Most with Nominations \nfor Directing, Writing and Film Editing')+theme(legend.position="none", axis.text.x = element_text(angle = 90, hjust = 1))
