Visualization 1 Project 3

library(dplyr)

## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
library(ggplot2)
library(ggthemes)

Oscar_table <- read.csv(file="https://raw.githubusercontent.com/pm0kjp/IS607_Project3/master/data/tidy_oscar_winners.csv", header=TRUE, sep=",")

Table that has only the information about all the movies that won best picture, best editing or did not win either of them.

Oscar_tsub <- data.frame(Oscar_table[c(2:6)])

wp_we <- subset(Oscar_tsub, Best_Picture == 1 & Best_Editing == 1 & Best_Directing == 0) #Years the movie that BOTH best picture and editing, but not directing.
lp_we <- subset(Oscar_tsub, Best_Picture == 0 & Best_Editing == 1 & Best_Directing == 0) # Years the movie won ONLY best editing but NOT best Picture or directing.
wp_wd <- subset(Oscar_tsub, Best_Picture == 1 & Best_Editing ==0 & Best_Directing != 0) # Years the movie won best directing & best picture but NOT best editing.
lp_wd <- subset(Oscar_tsub, Best_Picture == 0 & Best_Editing == 0 & Best_Directing == 1) # Years the movie ONLY won best directing but NOT best editing & best picture.
wp_we_wd <- subset(Oscar_tsub, Best_Picture == 1 & Best_Editing != 0 & Best_Directing != 0) #Years the movie won best picture, best directing and best editing.
lp_we_wd <- subset(Oscar_tsub, Best_Picture == 0 & Best_Editing != 0 & Best_Directing != 0) #Years the movie won best directing and best editing but NOT best picture.
wp_le_ld <- subset(Oscar_tsub, Best_Picture == 1 & Best_Editing == 0 & Best_Directing == 0) #Years the movie won best picture but NOT best directing and best editing.


x = as.character(c("Won Only Best Editing", "Won Only Best Directing", "Won Both Best Editing and Directing", "Lost Both Editing and Directing"))
Won_Best_Picture = as.numeric(c(nrow(wp_we), nrow(wp_wd), nrow(wp_we_wd), nrow(wp_le_ld)))
Lost_Best_Picture = as.numeric(c(nrow(lp_we), nrow(lp_wd), nrow(lp_we_wd), "NA"))

## Warning: NAs introduced by coercion

ct <- data.frame(x, Won_Best_Picture, Lost_Best_Picture)
colnames(ct)[1] <- ""

library(knitr)
kable(ct, align = "c", caption = "Summary of Oscars' winners for best picture, editing and directing.")

Summary of Oscars’ winners for best picture, editing and directing.
	Won_Best_Picture	Lost_Best_Picture
Won Only Best Editing	5	41
Won Only Best Directing	33	13
Won Both Best Editing and Directing	29	6
Lost Both Editing and Directing	14	NA

ct1 <- ct %>% t() %>% data.frame()
ct1 <- cbind(Best_Picture = rownames(ct1), ct1)
rownames(ct1) <- NULL
colnames(ct1)[2:5] <- x
ct1 <- ct1[-1,]
ct2 <- ct1 %>% gather("Award", "Number_of_Years", 2:5)
ct2$Number_of_Years <- as.numeric(format(ct2$Number_of_Years))


#Bar Graph1:
ggplot(data = ct2, aes(y = Number_of_Years, x =Best_Picture, fill=Award)) + geom_bar(stat="identity", position="dodge") + theme_hc(bgcolor = "darkunica") + scale_x_discrete(breaks=c("Lost_Best_Picture", "Won_Best_Picture"), labels=c("Lost", "Won")) + ylab("Years") + xlab("Best Pictures") + scale_fill_discrete(name="Best Editing/\nDirecting", breaks=c("Won Only Best Editing", "Won Only Best Directing", "Won Both Best Editing and Directing", "Lost Both Editing and Directing"), labels=c("Won Only Best Editing", "Won Only Best Directing", "Won Both", "Lost Both")) + ggtitle("Best Pictures Vs Best Editing and Directing Picture") + theme(legend.position = "top")

#Bar Graph2:
ggplot(data = ct2, aes(y = Number_of_Years, x = Award, fill=Best_Picture)) + geom_bar(stat="identity", position="dodge")  + theme_hc(bgcolor = "darkunica") + coord_cartesian(ylim = c(0, 45)) + ggtitle("Best Editing and Directing Pictures Vs Best Pictures")+ theme(axis.text.x  = element_text(angle=10, vjust=.9, hjust=.6)) + theme(legend.position = "top") +  scale_fill_discrete(name="Best Picture", labels=c("Lost", "Won")) + ylab("Years") + xlab("Best Editing/Directing")

#Finding the percentages for pie chart 
ct3 <- ct2 %>% subset(Best_Picture=="Won_Best_Picture") %>% mutate( Percent_of_Years = (Number_of_Years / 81)* 100 ) %>% data.frame()
kable(ct3, digits = 2, align = "c", caption = "Table 2")

Table 2
Best_Picture	Award	Number_of_Years	Percent_of_Years
Won_Best_Picture	Won Only Best Editing	5	6.17
Won_Best_Picture	Won Only Best Directing	33	40.74
Won_Best_Picture	Won Both Best Editing and Directing	29	35.80
Won_Best_Picture	Lost Both Editing and Directing	14	17.28

#Pie Chart:
ggplot(data = ct3, aes(y = Percent_of_Years, x = "", fill=Award)) + geom_bar(stat = "identity", color = 'black') + coord_polar(theta="y") + ggtitle("Percentage of Best Picture Winners Having\nBest Directing and Best Editing Awards.") + scale_fill_discrete(name="Best Editing/Directing", breaks=c("Won Only Best Editing", "Won Only Best Directing", "Won Both Best Editing and Directing", "Lost Both Editing and Directing"), labels=c("Won Only Best Editing", "Won Only Best Directing", "Won Both", "Lost Both")) + guides(fill=guide_legend(override.aes=list(colour=NA))) + theme_igray() + theme(legend.position = "left") + ylab("Won Best Pictures")

#Scatter Plot1
Oscar_table2 <- read.csv(file="https://raw.githubusercontent.com/pm0kjp/IS607_Project3/master/data/Combo_Oscar.csv", header=TRUE, sep=",")

ggplot(data = Oscar_table2, aes(y = Best_Editing, x = Best_Picture))+ geom_point(aes(color = Year)) + theme_igray() + ggtitle("Oscar's Winners Best Picture Vs Best Editing") + ylab("Won Best Editing Pictures") + xlab("Won Best Pictures") + theme(axis.text.x = element_blank(), axis.text.y = element_blank())

#Scatter Plot2
ggplot(data = Oscar_table2, aes(y = Best_Directing, x = Best_Picture))+ geom_point(aes(color = Year)) + theme_igray() + ggtitle("Oscar's Winners Best Picture Vs Best Directing") + ylab("Won Best Directing Pictures") + xlab("Won Best Pictures") + theme(axis.text.x = element_blank(), axis.text.y = element_blank())

Visualization 1 Project 3

Nabila Hossain

October 20, 2015