Objective

Required Libraries

library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Loading the Data

mydata <- read.csv("C:/Users/Diego Diaz/Documents/project3.csv", stringsAsFactors = FALSE)

head(mydata)
##   Year                   Category            Nominee Won
## 1 1981    ACTOR IN A LEADING ROLE              Reds   no
## 2 1981    ACTOR IN A LEADING ROLE    On Golden Pond  yes
## 3 1981    ACTOR IN A LEADING ROLE     Atlantic City   no
## 4 1981    ACTOR IN A LEADING ROLE            Arthur   no
## 5 1981    ACTOR IN A LEADING ROLE Absence of Malice   no
## 6 1981 ACTOR IN A SUPPORTING ROLE Only When I Laugh   no
str(mydata)
## 'data.frame':    1660 obs. of  4 variables:
##  $ Year    : int  1981 1981 1981 1981 1981 1981 1981 1981 1981 1981 ...
##  $ Category: chr  "ACTOR IN A LEADING ROLE" "ACTOR IN A LEADING ROLE" "ACTOR IN A LEADING ROLE" "ACTOR IN A LEADING ROLE" ...
##  $ Nominee : chr  "Reds " "On Golden Pond " "Atlantic City " "Arthur " ...
##  $ Won     : chr  "no" "yes" "no" "no" ...

Data Transformation

mydata2 <- mutate(mydata, TEST = ifelse(Won == "yes", 1, 0))

head(mydata2)
##   Year                   Category            Nominee Won TEST
## 1 1981    ACTOR IN A LEADING ROLE              Reds   no    0
## 2 1981    ACTOR IN A LEADING ROLE    On Golden Pond  yes    1
## 3 1981    ACTOR IN A LEADING ROLE     Atlantic City   no    0
## 4 1981    ACTOR IN A LEADING ROLE            Arthur   no    0
## 5 1981    ACTOR IN A LEADING ROLE Absence of Malice   no    0
## 6 1981 ACTOR IN A SUPPORTING ROLE Only When I Laugh   no    0

Subsetting

#Rest
rt <- filter(mydata2, Category != "FILM EDITING" & Category != "BEST PICTURE")

#Film Editing
ed <- filter(mydata2, Category == "FILM EDITING")

#Best Picture
bp <- filter(mydata2, Category == "BEST PICTURE")

#Cinematography
cm <- filter(mydata2, Category == "CINEMATOGRAPHY")

#Directing
dr <- filter(mydata2, Category == "DIRECTING")


#Special Effects
sp <- filter(mydata2, Category == "SPECIAL EFFECTS")


#Sound Editing
sm <- filter(mydata2, Category == "SOUND MIXING")


#Actor in leading role
ar <- filter(mydata2, Category == "ACTOR IN A LEADING ROLE")

Left Joins

Rest vs. Best Picture

#Left join for Rest vs. Best Picture
lj1 <- left_join(rt,bp, by = "Nominee")

#Selecting the TEST column
lj1 <- unlist(select(lj1,TEST.y))

#Converting NAs to zeros
lj1[is.na(lj1)] <- 0 

Film Editing vs. Best Picture

#Left join for Film Editing vs. Best Picture
lj2 <- left_join(ed,bp, by = "Nominee")


#Selecting the TEST column
lj2 <- unlist(select(lj2,TEST.y))


#Converting NAs to zeros
lj2[is.na(lj2)] <- 0

Cinematography vs. Best Picture

#Left join for Cinematography vs. Best Picture
lj3 <- left_join(cm,bp, by = "Nominee")

#Selecting the TEST column
lj3 <- unlist(select(lj3,TEST.y))

#Converting NAs to zeros
lj3[is.na(lj3)] <- 0 

Directing vs. Best Picture

#Left join for Directing vs. Best Picture
lj4 <- left_join(dr,bp, by = "Nominee")

#Selecting the TEST column
lj4 <- unlist(select(lj4,TEST.y))

#Converting NAs to zeros
lj4[is.na(lj4)] <- 0 

Special Effects vs. Best Picture

#Left join for Special Effects vs. Best Picture
lj5 <- left_join(sp,bp, by = "Nominee")

#Selecting the TEST column
lj5 <- unlist(select(lj5,TEST.y))

#Converting NAs to zeros
lj5[is.na(lj5)] <- 0 

lj5 <- unlist(lj5)

Sound Mixing vs. Best Picture

#Left join for Sound Sound Mixing vs. Best Picture
lj6 <- left_join(sm,bp, by = "Nominee")

#Selecting the TEST column
lj6 <- unlist(select(lj6,TEST.y))

#Converting NAs to zeros
lj6[is.na(lj6)] <- 0

Actor in Leading Role vs. Best Picture

#Left join for Actor in Laeding Role vs. Best Picture
lj7 <- left_join(ar,bp, by = "Nominee")

#Selecting the TEST column
lj7 <- unlist(select(lj7,TEST.y))

#Converting NAs to zeros
lj7[is.na(lj7)] <- 0 

F and T-Test Analysis

Film Editing vs. Rest

F-Test

var.test(lj2,lj1)
## 
##  F test to compare two variances
## 
## data:  lj2 and lj1
## F = 2.5919, num df = 165, denom df = 1301, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  2.081404 3.295127
## sample estimates:
## ratio of variances 
##           2.591937

T-Test

t.test(lj2,lj1, var.equal=FALSE, paired=FALSE)
## 
##  Welch Two Sample t-test
## 
## data:  lj2 and lj1
## t = 4.0773, df = 181.58, p-value = 6.809e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.06618854 0.19032137
## sample estimates:
##  mean of x  mean of y 
## 0.19277108 0.06451613

Film Editing vs. Cinematography

F-Test

var.test(lj2,lj3)
## 
##  F test to compare two variances
## 
## data:  lj2 and lj3
## F = 1.3037, num df = 165, denom df = 165, p-value = 0.08942
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.9598425 1.7708502
## sample estimates:
## ratio of variances 
##            1.30374

T-Test

t.test(lj2,lj3, var.equal=FALSE, paired=FALSE)
## 
##  Welch Two Sample t-test
## 
## data:  lj2 and lj3
## t = 1.3281, df = 324.36, p-value = 0.1851
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.02609324  0.13452697
## sample estimates:
## mean of x mean of y 
## 0.1927711 0.1385542

Film Editing vs. Directing

F-Test

var.test(lj2,lj4)
## 
##  F test to compare two variances
## 
## data:  lj2 and lj4
## F = 1.0294, num df = 165, denom df = 166, p-value = 0.8522
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.7582784 1.3976771
## sample estimates:
## ratio of variances 
##           1.029406

T-Test

t.test(lj2,lj4, var.equal=FALSE, paired=FALSE)
## 
##  Welch Two Sample t-test
## 
## data:  lj2 and lj4
## t = 0.16589, df = 330.86, p-value = 0.8683
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.07755435  0.09183903
## sample estimates:
## mean of x mean of y 
## 0.1927711 0.1856287

Fild Editing vs. Special Effects

#var.test(lj2,lj5)

T-Test

#t.test(lj2,lj5, var.equal=FALSE, paired=FALSE)

Film Editing vs. Sound Mixing

var.test(lj2,lj6)
## 
##  F test to compare two variances
## 
## data:  lj2 and lj6
## F = 1.4498, num df = 165, denom df = 57, p-value = 0.106
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.9227056 2.1771428
## sample estimates:
## ratio of variances 
##           1.449764

T-Test

t.test(lj2,lj6, var.equal=FALSE, paired=FALSE)
## 
##  Welch Two Sample t-test
## 
## data:  lj2 and lj6
## t = 1.361, df = 118.84, p-value = 0.1761
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.03278894  0.17695180
## sample estimates:
## mean of x mean of y 
## 0.1927711 0.1206897

Film Editing vs. Actor in Leading Role

var.test(lj2,lj7)
## 
##  F test to compare two variances
## 
## data:  lj2 and lj7
## F = Inf, num df = 165, denom df = 169, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  Inf Inf
## sample estimates:
## ratio of variances 
##                Inf

T-Test

t.test(lj2,lj7, var.equal=FALSE, paired=FALSE)
## 
##  Welch Two Sample t-test
## 
## data:  lj2 and lj7
## t = 6.2772, df = 165, p-value = 2.931e-09
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1321362 0.2534060
## sample estimates:
## mean of x mean of y 
## 0.1927711 0.0000000

Conclusion