library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
mydata <- read.csv("C:/Users/Diego Diaz/Documents/project3.csv", stringsAsFactors = FALSE)
head(mydata)
## Year Category Nominee Won
## 1 1981 ACTOR IN A LEADING ROLE Reds no
## 2 1981 ACTOR IN A LEADING ROLE On Golden Pond yes
## 3 1981 ACTOR IN A LEADING ROLE Atlantic City no
## 4 1981 ACTOR IN A LEADING ROLE Arthur no
## 5 1981 ACTOR IN A LEADING ROLE Absence of Malice no
## 6 1981 ACTOR IN A SUPPORTING ROLE Only When I Laugh no
str(mydata)
## 'data.frame': 1660 obs. of 4 variables:
## $ Year : int 1981 1981 1981 1981 1981 1981 1981 1981 1981 1981 ...
## $ Category: chr "ACTOR IN A LEADING ROLE" "ACTOR IN A LEADING ROLE" "ACTOR IN A LEADING ROLE" "ACTOR IN A LEADING ROLE" ...
## $ Nominee : chr "Reds " "On Golden Pond " "Atlantic City " "Arthur " ...
## $ Won : chr "no" "yes" "no" "no" ...
mydata2 <- mutate(mydata, TEST = ifelse(Won == "yes", 1, 0))
head(mydata2)
## Year Category Nominee Won TEST
## 1 1981 ACTOR IN A LEADING ROLE Reds no 0
## 2 1981 ACTOR IN A LEADING ROLE On Golden Pond yes 1
## 3 1981 ACTOR IN A LEADING ROLE Atlantic City no 0
## 4 1981 ACTOR IN A LEADING ROLE Arthur no 0
## 5 1981 ACTOR IN A LEADING ROLE Absence of Malice no 0
## 6 1981 ACTOR IN A SUPPORTING ROLE Only When I Laugh no 0
#Rest
rt <- filter(mydata2, Category != "FILM EDITING" & Category != "BEST PICTURE")
#Film Editing
ed <- filter(mydata2, Category == "FILM EDITING")
#Best Picture
bp <- filter(mydata2, Category == "BEST PICTURE")
#Cinematography
cm <- filter(mydata2, Category == "CINEMATOGRAPHY")
#Directing
dr <- filter(mydata2, Category == "DIRECTING")
#Special Effects
sp <- filter(mydata2, Category == "SPECIAL EFFECTS")
#Sound Editing
sm <- filter(mydata2, Category == "SOUND MIXING")
#Actor in leading role
ar <- filter(mydata2, Category == "ACTOR IN A LEADING ROLE")
Rest vs. Best Picture
#Left join for Rest vs. Best Picture
lj1 <- left_join(rt,bp, by = "Nominee")
#Selecting the TEST column
lj1 <- unlist(select(lj1,TEST.y))
#Converting NAs to zeros
lj1[is.na(lj1)] <- 0
Film Editing vs. Best Picture
#Left join for Film Editing vs. Best Picture
lj2 <- left_join(ed,bp, by = "Nominee")
#Selecting the TEST column
lj2 <- unlist(select(lj2,TEST.y))
#Converting NAs to zeros
lj2[is.na(lj2)] <- 0
Cinematography vs. Best Picture
#Left join for Cinematography vs. Best Picture
lj3 <- left_join(cm,bp, by = "Nominee")
#Selecting the TEST column
lj3 <- unlist(select(lj3,TEST.y))
#Converting NAs to zeros
lj3[is.na(lj3)] <- 0
Directing vs. Best Picture
#Left join for Directing vs. Best Picture
lj4 <- left_join(dr,bp, by = "Nominee")
#Selecting the TEST column
lj4 <- unlist(select(lj4,TEST.y))
#Converting NAs to zeros
lj4[is.na(lj4)] <- 0
Special Effects vs. Best Picture
#Left join for Special Effects vs. Best Picture
lj5 <- left_join(sp,bp, by = "Nominee")
#Selecting the TEST column
lj5 <- unlist(select(lj5,TEST.y))
#Converting NAs to zeros
lj5[is.na(lj5)] <- 0
lj5 <- unlist(lj5)
Sound Mixing vs. Best Picture
#Left join for Sound Sound Mixing vs. Best Picture
lj6 <- left_join(sm,bp, by = "Nominee")
#Selecting the TEST column
lj6 <- unlist(select(lj6,TEST.y))
#Converting NAs to zeros
lj6[is.na(lj6)] <- 0
Actor in Leading Role vs. Best Picture
#Left join for Actor in Laeding Role vs. Best Picture
lj7 <- left_join(ar,bp, by = "Nominee")
#Selecting the TEST column
lj7 <- unlist(select(lj7,TEST.y))
#Converting NAs to zeros
lj7[is.na(lj7)] <- 0
Film Editing vs. Rest
F-Test
var.test(lj2,lj1)
##
## F test to compare two variances
##
## data: lj2 and lj1
## F = 2.5919, num df = 165, denom df = 1301, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 2.081404 3.295127
## sample estimates:
## ratio of variances
## 2.591937
T-Test
t.test(lj2,lj1, var.equal=FALSE, paired=FALSE)
##
## Welch Two Sample t-test
##
## data: lj2 and lj1
## t = 4.0773, df = 181.58, p-value = 6.809e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.06618854 0.19032137
## sample estimates:
## mean of x mean of y
## 0.19277108 0.06451613
Film Editing vs. Cinematography
F-Test
var.test(lj2,lj3)
##
## F test to compare two variances
##
## data: lj2 and lj3
## F = 1.3037, num df = 165, denom df = 165, p-value = 0.08942
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.9598425 1.7708502
## sample estimates:
## ratio of variances
## 1.30374
T-Test
t.test(lj2,lj3, var.equal=FALSE, paired=FALSE)
##
## Welch Two Sample t-test
##
## data: lj2 and lj3
## t = 1.3281, df = 324.36, p-value = 0.1851
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.02609324 0.13452697
## sample estimates:
## mean of x mean of y
## 0.1927711 0.1385542
Film Editing vs. Directing
F-Test
var.test(lj2,lj4)
##
## F test to compare two variances
##
## data: lj2 and lj4
## F = 1.0294, num df = 165, denom df = 166, p-value = 0.8522
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.7582784 1.3976771
## sample estimates:
## ratio of variances
## 1.029406
T-Test
t.test(lj2,lj4, var.equal=FALSE, paired=FALSE)
##
## Welch Two Sample t-test
##
## data: lj2 and lj4
## t = 0.16589, df = 330.86, p-value = 0.8683
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.07755435 0.09183903
## sample estimates:
## mean of x mean of y
## 0.1927711 0.1856287
Fild Editing vs. Special Effects
#var.test(lj2,lj5)
T-Test
#t.test(lj2,lj5, var.equal=FALSE, paired=FALSE)
Film Editing vs. Sound Mixing
var.test(lj2,lj6)
##
## F test to compare two variances
##
## data: lj2 and lj6
## F = 1.4498, num df = 165, denom df = 57, p-value = 0.106
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.9227056 2.1771428
## sample estimates:
## ratio of variances
## 1.449764
T-Test
t.test(lj2,lj6, var.equal=FALSE, paired=FALSE)
##
## Welch Two Sample t-test
##
## data: lj2 and lj6
## t = 1.361, df = 118.84, p-value = 0.1761
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.03278894 0.17695180
## sample estimates:
## mean of x mean of y
## 0.1927711 0.1206897
Film Editing vs. Actor in Leading Role
var.test(lj2,lj7)
##
## F test to compare two variances
##
## data: lj2 and lj7
## F = Inf, num df = 165, denom df = 169, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## Inf Inf
## sample estimates:
## ratio of variances
## Inf
T-Test
t.test(lj2,lj7, var.equal=FALSE, paired=FALSE)
##
## Welch Two Sample t-test
##
## data: lj2 and lj7
## t = 6.2772, df = 165, p-value = 2.931e-09
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1321362 0.2534060
## sample estimates:
## mean of x mean of y
## 0.1927711 0.0000000