library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)

url <- "https://raw.githubusercontent.com/mkds/IS607_Project3/gh-pages/Data/Data_All_MASTER_edit.csv"

data <- read.csv(url, stringsAsFactors=FALSE, header= TRUE)
View(data)

data <- data[-1:-2269,]
unique(data$Category)
##   [1] "ACTOR"                                                                                                                                               
##   [2] "ACTOR IN A SUPPORTING ROLE"                                                                                                                          
##   [3] "ACTRESS"                                                                                                                                             
##   [4] "ACTRESS IN A SUPPORTING ROLE"                                                                                                                        
##   [5] "ART DIRECTION (Black-and-White)"                                                                                                                     
##   [6] "ART DIRECTION (Color)"                                                                                                                               
##   [7] "CINEMATOGRAPHY (Black-and-White)"                                                                                                                    
##   [8] "CINEMATOGRAPHY (Color)"                                                                                                                              
##   [9] "COSTUME DESIGN (Black-and-White)"                                                                                                                    
##  [10] "COSTUME DESIGN (Color)"                                                                                                                              
##  [11] "DIRECTING"                                                                                                                                           
##  [12] "DOCUMENTARY (Feature)"                                                                                                                               
##  [13] "DOCUMENTARY (Short Subject)"                                                                                                                         
##  [14] "FILM EDITING"                                                                                                                                        
##  [15] "MUSIC (Music Score of a Dramatic or Comedy Picture)"                                                                                                 
##  [16] "MUSIC (Scoring of a Musical Picture)"                                                                                                                
##  [17] "MUSIC (Song)"                                                                                                                                        
##  [18] "BEST MOTION PICTURE"                                                                                                                                 
##  [19] "SHORT SUBJECT (Cartoon)"                                                                                                                             
##  [20] "SHORT SUBJECT (One-reel)"                                                                                                                            
##  [21] "SHORT SUBJECT (Two-reel)"                                                                                                                            
##  [22] "SOUND RECORDING"                                                                                                                                     
##  [23] "SPECIAL EFFECTS"                                                                                                                                     
##  [24] "WRITING (Motion Picture Story)"                                                                                                                      
##  [25] "WRITING (Screenplay)"                                                                                                                                
##  [26] "WRITING (Story and Screenplay)"                                                                                                                      
##  [27] "HONORARY FOREIGN LANGUAGE FILM AWARD"                                                                                                                
##  [28] "HONORARY AWARD"                                                                                                                                      
##  [29] "IRVING G. THALBERG MEMORIAL AWARD"                                                                                                                   
##  [30] "SCIENTIFIC OR TECHNICAL AWARD (Class II)"                                                                                                            
##  [31] "SCIENTIFIC OR TECHNICAL AWARD (Class III)"                                                                                                           
##  [32] "SCIENTIFIC OR TECHNICAL AWARD (Class I)"                                                                                                             
##  [33] "FOREIGN LANGUAGE FILM"                                                                                                                               
##  [34] "WRITING (Screenplay--Adapted)"                                                                                                                       
##  [35] "WRITING (Screenplay--Original)"                                                                                                                      
##  [36] "JEAN HERSHOLT HUMANITARIAN AWARD"                                                                                                                    
##  [37] "ART DIRECTION"                                                                                                                                       
##  [38] "CINEMATOGRAPHY"                                                                                                                                      
##  [39] "COSTUME DESIGN"                                                                                                                                      
##  [40] "MUSIC (Scoring)"                                                                                                                                     
##  [41] "SHORT SUBJECT (Live Action)"                                                                                                                         
##  [42] "WRITING (Screenplay--based on material from another medium)"                                                                                         
##  [43] "WRITING (Story and Screenplay--written directly for the screen)"                                                                                     
##  [44] "SOUND"                                                                                                                                               
##  [45] "The Longest Day -- Jean Bourgoin; Walter Wottitz; (Henri Persin)"                                                                                    
##  [46] "MUSIC (Music Score--substantially original)"                                                                                                         
##  [47] "MUSIC (Scoring of Music--adaptation or treatment)"                                                                                                   
##  [48] "BEST PICTURE"                                                                                                                                        
##  [49] "SOUND EFFECTS"                                                                                                                                       
##  [50] "SPECIAL VISUAL EFFECTS"                                                                                                                              
##  [51] "MUSIC (Original Music Score)"                                                                                                                        
##  [52] "MUSIC (Original Score--for a motion picture [not a musical])"                                                                                        
##  [53] "MUSIC (Score of a Musical Picture--original or adaptation)"                                                                                          
##  [54] "MUSIC (Song--Original for the Picture)"                                                                                                              
##  [55] "WRITING (Story and Screenplay--based on material not previously published or produced)"                                                              
##  [56] "MUSIC (Original Score)"                                                                                                                              
##  [57] "MUSIC (Original Song Score)"                                                                                                                         
##  [58] "For All We Know from Lovers and Other Strangers -- Music by Fred Karlin; Lyrics by Robb Royer (aka Robb Wilson) and James Griffin (aka Arthur James)"
##  [59] "WRITING (Story and Screenplay--based on factual material or material not previously published or produced)"                                          
##  [60] "MUSIC (Original Dramatic Score)"                                                                                                                     
##  [61] "MUSIC (Scoring: Adaptation and Original Song Score)"                                                                                                 
##  [62] "SHORT SUBJECT (Animated)"                                                                                                                            
##  [63] "SPECIAL ACHIEVEMENT AWARD (Visual Effects)"                                                                                                          
##  [64] "MUSIC (Scoring: Original Song Score and Adaptation -or- Scoring: Adaptation)"                                                                        
##  [65] "SHORT FILM (Animated)"                                                                                                                               
##  [66] "SHORT FILM (Live Action)"                                                                                                                            
##  [67] "WRITING (Original Screenplay)"                                                                                                                       
##  [68] "WRITING (Screenplay Adapted from Other Material)"                                                                                                    
##  [69] "MUSIC (Original Song)"                                                                                                                               
##  [70] "SPECIAL ACHIEVEMENT AWARD (Sound Effects)"                                                                                                           
##  [71] "ACTOR IN A LEADING ROLE"                                                                                                                             
##  [72] "ACTRESS IN A LEADING ROLE"                                                                                                                           
##  [73] "MUSIC (Original Song Score and Its Adaptation or Adaptation Score)"                                                                                  
##  [74] "WRITING (Screenplay Written Directly for the Screen--based on factual material or on story material not previously published or produced)"           
##  [75] "VISUAL EFFECTS"                                                                                                                                      
##  [76] "SPECIAL ACHIEVEMENT AWARD"                                                                                                                           
##  [77] "SPECIAL ACHIEVEMENT AWARD (Sound Effects Editing)"                                                                                                   
##  [78] "MEDAL OF COMMENDATION"                                                                                                                               
##  [79] "MUSIC (Adaptation Score)"                                                                                                                            
##  [80] "WRITING (Screenplay Based on Material from Another Medium)"                                                                                          
##  [81] "WRITING (Screenplay Written Directly for the Screen)"                                                                                                
##  [82] "SCIENTIFIC OR TECHNICAL AWARD (Academy Award of Merit)"                                                                                              
##  [83] "SCIENTIFIC OR TECHNICAL AWARD (Scientific and Engineering Award)"                                                                                    
##  [84] "SCIENTIFIC OR TECHNICAL AWARD (Technical Achievement Award)"                                                                                         
##  [85] "MUSIC (Original Song Score and Its Adaptation -or- Adaptation Score)"                                                                                
##  [86] "SPECIAL ACHIEVEMENT AWARD (Sound Editing)"                                                                                                           
##  [87] "SHORT FILM (Dramatic Live Action)"                                                                                                                   
##  [88] "MAKEUP"                                                                                                                                              
##  [89] "GORDON E. SAWYER AWARD"                                                                                                                              
##  [90] "SOUND EFFECTS EDITING"                                                                                                                               
##  [91] "MUSIC (Original Song Score or Adaptation Score)"                                                                                                     
##  [92] "AWARD OF COMMENDATION"                                                                                                                               
##  [93] "JFK -- Robert Richardson"                                                                                                                            
##  [94] "JFK -- Oliver Stone"                                                                                                                                 
##  [95] "JFK -- Joe Hutshing; Pietro Scalia"                                                                                                                  
##  [96] "JFK -- John Williams"                                                                                                                                
##  [97] "JFK -- A. Kitman Ho and Oliver Stone; Producers"                                                                                                     
##  [98] "JFK -- Michael Minkler; Gregg Landaker; Tod A. Maitland"                                                                                             
##  [99] "WRITING (Screenplay Based on Material Previously Produced or Published)"                                                                             
## [100] "JFK -- Oliver Stone; Zachary Sklar"                                                                                                                  
## [101] "SCIENTIFIC AND TECHNICAL AWARD (Academy Award of Merit)"                                                                                             
## [102] "SCIENTIFIC AND TECHNICAL AWARD (Scientific and Engineering Award)"                                                                                   
## [103] "SCIENTIFIC AND TECHNICAL AWARD (Technical Achievement Award)"                                                                                        
## [104] "MUSIC (Original Musical or Comedy Score)"                                                                                                            
## [105] "JOHN A. BONNER MEDAL OF COMMENDATION"                                                                                                                
## [106] "SOUND EDITING"                                                                                                                                       
## [107] "ANIMATED FEATURE FILM"                                                                                                                               
## [108] "WRITING (Adapted Screenplay)"                                                                                                                        
## [109] "SOUND MIXING"                                                                                                                                        
## [110] "WALL-E -- Ben Burtt and Matthew Wood"                                                                                                                
## [111] "WALL-E -- Tom Myers; Michael Semanick and Ben Burtt"                                                                                                 
## [112] "MAKEUP AND HAIRSTYLING"                                                                                                                              
## [113] "PRODUCTION DESIGN"
#A lot of awards for the same thing are labeled differently throughout the years This corrects that. Sound
#Sound editing encompasses all recording of audio, whereas sound mixing is deciding how to merge them for the finished product

data$Category <- str_replace_all(data$Category, "(MUSIC).*", replacement = "MUSIC")
data$Category <- str_replace_all(data$Category, "(CINEMATOGRAPHY).*", replacement = "CINEMATOGRAPHY")
data$Category <- str_replace_all(data$Category, "(COSTUME DESIGN).*", replacement = "COSTUME DESIGN")
data$Category <- str_replace_all(data$Category, "(ART DIRECTION).*", replacement = "ART DIRECTION")
data$Category <- str_replace_all(data$Category, ".*(VISUAL EFFECTS)", replacement = "SPECIAL EFFECTS")
data$Category <- str_replace_all(data$Category, "(WRITING).*", replacement = "WRITING")
data$Category <- str_replace_all(data$Category, "(MAKEUP).*", replacement = "MAKEUP")
data$Category <- str_replace_all(data$Category, "(ACTOR IN A LEADING ROLE)", replacement = "ACTOR")
data$Category <- str_replace_all(data$Category, "(ACTRESS IN A LEADING ROLE)", replacement = "ACTRESS")
data$Category <- str_replace_all(data$Category, "(BEST MOTION PICTURE)", replacement = "BEST PICTURE")
data$Category <- str_replace_all(data$Category, "(SOUND EFFECTS EDITING)", replacement = "SOUND EDITING")
data$Category <- str_replace_all(data$Category, "(SOUND RECORDING)", replacement = "SOUND EDITING")
data$Category <- str_replace_all(data$Category, "(SOUND EFFECTS)", replacement = "SOUND EDITING")
data$Category <- str_replace_all(data$Category, "(SOUND$)", replacement = "SOUND MIXING")
data$Category <- str_replace_all(data$Category, "(For All We Know).*", replacement = "MUSIC")
data$Nominee <- str_replace_all(data$Nominee, "(For All We Know).*", replacement = "For All We Know from Lovers and Other Strangers")


#Had some issuees with the actors and actresses not pulling properly to the totals
trim <- function(x) gsub(",|^[[:space:]]+|[[:space:]]+$", "", x)
data <- data.frame(sapply(data, trim), check.names = FALSE)

#Filter to choose the categories that we will evaluate
datanew <- filter(data, Category == "ACTOR" | Category == "ACTOR IN A SUPPORTING ROLE" 
                  | Category == "ACTRESS" | Category == "ACTRESS IN A SUPPORTING ROLE" 
                  | Category ==  "ART DIRECTION" | Category == "CINEMATOGRAPHY"
                  | Category == "COSTUME DESIGN" | Category == "DIRECTING"
                  | Category == "FILM EDITING" | Category == "MUSIC"
                  | Category == "BEST PICTURE" | Category == "SPECIAL EFFECTS"
                  | Category == "WRITING" | Category == "MAKEUP" | Category == "SOUND EDITING")

head(datanew)
##          Year                   Category                Nominee
## 1 1950 (23rd)                      ACTOR The Magnificent Yankee
## 2 1950 (23rd)                      ACTOR     Cyrano de Bergerac
## 3 1950 (23rd)                      ACTOR           Sunset Blvd.
## 4 1950 (23rd)                      ACTOR                 Harvey
## 5 1950 (23rd)                      ACTOR    Father of the Bride
## 6 1950 (23rd) ACTOR IN A SUPPORTING ROLE           Broken Arrow
##   Additional.Info Won.
## 1   Louis Calhern   no
## 2   Jos� Ferrer  yes
## 3  William Holden   no
## 4   James Stewart   no
## 5   Spencer Tracy   no
## 6   Jeff Chandler   no
#Much better in terms of the count of categories
unique(datanew$Category)
##  [1] ACTOR                        ACTOR IN A SUPPORTING ROLE  
##  [3] ACTRESS                      ACTRESS IN A SUPPORTING ROLE
##  [5] ART DIRECTION                CINEMATOGRAPHY              
##  [7] COSTUME DESIGN               DIRECTING                   
##  [9] FILM EDITING                 MUSIC                       
## [11] BEST PICTURE                 SOUND EDITING               
## [13] SPECIAL EFFECTS              WRITING                     
## [15] MAKEUP                      
## 61 Levels: ACTOR ACTOR IN A SUPPORTING ROLE ... WRITING
write.csv(datanew, file = "data_categories.csv",row.names=FALSE)

#remove Best Picture from the df as that is no longer a category we want as an observation
#convert the values to integers 
main <- filter(datanew, Category != "BEST PICTURE")
colnames(main)[5] <- "won category"
main$`won category` <- str_replace_all(main$`won category`, "yes", "1")
main$`won category` <- str_replace_all(main$`won category`, "no", "0")
main$`won category` <- as.integer(main$`won category`)
head(main)
##          Year                   Category                Nominee
## 1 1950 (23rd)                      ACTOR The Magnificent Yankee
## 2 1950 (23rd)                      ACTOR     Cyrano de Bergerac
## 3 1950 (23rd)                      ACTOR           Sunset Blvd.
## 4 1950 (23rd)                      ACTOR                 Harvey
## 5 1950 (23rd)                      ACTOR    Father of the Bride
## 6 1950 (23rd) ACTOR IN A SUPPORTING ROLE           Broken Arrow
##   Additional.Info won category
## 1   Louis Calhern            0
## 2   Jos� Ferrer            1
## 3  William Holden            0
## 4   James Stewart            0
## 5   Spencer Tracy            0
## 6   Jeff Chandler            0
#create a seperate df so we can merge the two properly
bp <- filter(datanew, Category == "BEST PICTURE")
bp <- bp[,-4]
bp <- bp[,-1:-2]
colnames(bp)[2] <- "bp won"

#all of the movies here were nominated so we can give them a 1
bp$`bp nomination` <- "1"
bp$`bp nomination` <- as.integer(bp$`bp nomination`)

head(bp)
##                Nominee bp won bp nomination
## 1        All about Eve    yes             1
## 2       Born Yesterday     no             1
## 3  Father of the Bride     no             1
## 4 King Solomon's Mines     no             1
## 5         Sunset Blvd.     no             1
## 6 An American in Paris    yes             1
#again make the values integers to get a later frequency count
bp$`bp won` <- str_replace_all(bp$`bp won`, "yes", "1")
bp$`bp won` <- str_replace_all(bp$`bp won`, "no", "0")

bp$`bp won` <- as.integer(bp$`bp won`)

#merge the two to apply the best picture info to our                         
combined <- merge(main, bp, by = 'Nominee', all.x = TRUE)


combined$Nominee <- as.character(combined$Nominee)
combined$Year <- as.character(combined$Year)
combined$Category <- as.character(combined$Category)
combined$Additional.Info <- as.character(combined$Additional.Info)

#replace NAs with zeros
combined[is.na(combined)] <- 0

#all of these observations represent category nominations so we can assign a 1
combined$`category nomination` <- 1
combined$`category nomination` <- as.integer(combined$`category nomination`)
combined$`bp nomination` <- as.integer(combined$`bp nomination`)
combined$`bp won` <- as.integer(combined$`bp won`)

#we can see how many movies lost the bp nomination by subtracting these two:
combined <- combined %>% 
  mutate(`bp lost` = `bp nomination` - `bp won`)

#now we are ready to get our frequency counts
summary <- combined %>% 
  group_by(Category) %>% 
  summarize(sum(`category nomination`), sum(`won category`), sum(`bp nomination`), 
                sum(`bp won`), sum(`bp lost`))

names(summary) <- c("category", "category nomination", "won category", "bp nomination","bp won", "bp lost")

View(summary)

write.csv(summary, file = "frequencies.csv",row.names=FALSE)