#OVERVIEW

For Assignment 1, I chose to work on a dataset behind the article ‘How To Spot A Front-Runner On The ‘Bachelor’ Or ‘Bachelorette’ published on May 2017 by FiveThirtyEight. The article looked at 33 seasons of the show to understand if getting a rose in Week 1 of the show was associated with a higher chance of winning the season. Their findings indicated that there wasn’t a strong association between the two.

For this assignment, I would like to look at a smaller subset of their data (only one season) to try to identify if there are any patterns to elimination and procession for the particular season of the show.

Data for the article was scraped from Bachelor Nation Wiki

Article:https://fivethirtyeight.com/features/the-bachelorette/ Data:https://github.com/fivethirtyeight/data/blob/master/bachelorette/bachelorette.csv Data Dictionary: https://github.com/fivethirtyeight/data/tree/master/bachelorette

#Code

library(devtools)
## Loading required package: usethis
library(RCurl)

#Getting the data from the site

library(RCurl)
x <- getURL("https://raw.githubusercontent.com/fivethirtyeight/data/master/bachelorette/bachelorette.csv")
y <- data.frame(read.csv(text=x))
summary(y)
##      SHOW              SEASON           CONTESTANT        ELIMINATION.1     
##  Length:921         Length:921         Length:921         Length:921        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##  ELIMINATION.2      ELIMINATION.3      ELIMINATION.4      ELIMINATION.5     
##  Length:921         Length:921         Length:921         Length:921        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##  ELIMINATION.6      ELIMINATION.7      ELIMINATION.8      ELIMINATION.9     
##  Length:921         Length:921         Length:921         Length:921        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##  ELIMINATION.10       DATES.1            DATES.2            DATES.3         
##  Length:921         Length:921         Length:921         Length:921        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##    DATES.4            DATES.5            DATES.6            DATES.7         
##  Length:921         Length:921         Length:921         Length:921        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##    DATES.8            DATES.9            DATES.10        
##  Length:921         Length:921         Length:921        
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character

#Creating a subset of the data to show only the contestants from Season 13

Season13 <- subset(y, SHOW == 'Bachelor' & SEASON==13)
summary(Season13)
##      SHOW              SEASON           CONTESTANT        ELIMINATION.1     
##  Length:25          Length:25          Length:25          Length:25         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##  ELIMINATION.2      ELIMINATION.3      ELIMINATION.4      ELIMINATION.5     
##  Length:25          Length:25          Length:25          Length:25         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##  ELIMINATION.6      ELIMINATION.7      ELIMINATION.8      ELIMINATION.9     
##  Length:25          Length:25          Length:25          Length:25         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##  ELIMINATION.10       DATES.1            DATES.2            DATES.3         
##  Length:25          Length:25          Length:25          Length:25         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##    DATES.4            DATES.5            DATES.6            DATES.7         
##  Length:25          Length:25          Length:25          Length:25         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##    DATES.8            DATES.9            DATES.10        
##  Length:25          Length:25          Length:25         
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character

#Renaming the columns

library (dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Season13 <-
head (rename (Season13,c("ELIMINATED IN WEEK 1 "="ELIMINATION.1","ELIMINATED IN WEEK 2 "="ELIMINATION.2","ELIMINATED IN WEEK 3 "="ELIMINATION.3","ELIMINATED IN WEEK 4 "="ELIMINATION.4","ELIMINATED IN WEEK 5 "="ELIMINATION.5","ELIMINATED IN WEEK 6 "="ELIMINATION.6","ELIMINATED IN WEEK 7 "="ELIMINATION.7","ELIMINATED IN WEEK 8 "="ELIMINATION.8","ELIMINATED IN WEEK 9 "="ELIMINATION.9","ELIMINATED IN WEEK 10 "="ELIMINATION.10")))
summary(Season13)
##      SHOW              SEASON           CONTESTANT        ELIMINATED IN WEEK 1 
##  Length:6           Length:6           Length:6           Length:6             
##  Class :character   Class :character   Class :character   Class :character     
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character     
##  ELIMINATED IN WEEK 2  ELIMINATED IN WEEK 3  ELIMINATED IN WEEK 4 
##  Length:6              Length:6              Length:6             
##  Class :character      Class :character      Class :character     
##  Mode  :character      Mode  :character      Mode  :character     
##  ELIMINATED IN WEEK 5  ELIMINATED IN WEEK 6  ELIMINATED IN WEEK 7 
##  Length:6              Length:6              Length:6             
##  Class :character      Class :character      Class :character     
##  Mode  :character      Mode  :character      Mode  :character     
##  ELIMINATED IN WEEK 8  ELIMINATED IN WEEK 9  ELIMINATED IN WEEK 10 
##  Length:6              Length:6              Length:6              
##  Class :character      Class :character      Class :character      
##  Mode  :character      Mode  :character      Mode  :character      
##    DATES.1            DATES.2            DATES.3            DATES.4         
##  Length:6           Length:6           Length:6           Length:6          
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##    DATES.5            DATES.6            DATES.7            DATES.8         
##  Length:6           Length:6           Length:6           Length:6          
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##    DATES.9            DATES.10        
##  Length:6           Length:6          
##  Class :character   Class :character  
##  Mode  :character   Mode  :character
glimpse(Season13)
## Rows: 6
## Columns: 23
## $ SHOW                     <chr> "Bachelor", "Bachelor", "Bachelor", "Bachelo…
## $ SEASON                   <chr> "13", "13", "13", "13", "13", "13"
## $ CONTESTANT               <chr> "13_MELISSA_R", "13_MOLLY_M", "13_JILLIAN_H"…
## $ `ELIMINATED IN WEEK 1 `  <chr> "", "", "", "", "", ""
## $ `ELIMINATED IN WEEK 2 `  <chr> "R", "R", "R", "", "", ""
## $ `ELIMINATED IN WEEK 3 `  <chr> "", "", "R", "", "R", ""
## $ `ELIMINATED IN WEEK 4 `  <chr> "", "R", "", "R", "R", "E"
## $ `ELIMINATED IN WEEK 5 `  <chr> "", "", "", "", "E", ""
## $ `ELIMINATED IN WEEK 6 `  <chr> "", "", "", "E", "", ""
## $ `ELIMINATED IN WEEK 7 `  <chr> "", "", "E", "", "", ""
## $ `ELIMINATED IN WEEK 8 `  <chr> "W", "E", "", "", "", ""
## $ `ELIMINATED IN WEEK 9 `  <chr> "", "", "", "", "", ""
## $ `ELIMINATED IN WEEK 10 ` <chr> "", "", "", "", "", ""
## $ DATES.1                  <chr> "", "", "", "", "", ""
## $ DATES.2                  <chr> "D1", "D8", "D1", "D8", "", "D8"
## $ DATES.3                  <chr> "D8", "", "D8", "D8", "D1", ""
## $ DATES.4                  <chr> "D6", "D1", "D6", "D6", "D2", "D6"
## $ DATES.5                  <chr> "D1", "D3", "D3", "D1", "D3", ""
## $ DATES.6                  <chr> "D1", "D1", "D1", "D1", "", ""
## $ DATES.7                  <chr> "D1", "D1", "D1", "", "", ""
## $ DATES.8                  <chr> "D1", "D1", "", "", "", ""
## $ DATES.9                  <chr> "", "", "", "", "", ""
## $ DATES.10                 <chr> "", "", "", "", "", ""
Season13$CONTESTANT
## [1] "13_MELISSA_R"   "13_MOLLY_M"     "13_JILLIAN_H"   "13_NAOMI_C"    
## [5] "13_STEPHANIE_H" "13_LAUREN_W"

#FURTHER ANALYSIS In this assignment, I have created a subset of the main dataset to only show us data for season 13. In addition, I have also renamed the column names for improved readability.

The next steps would be to explore and further transform the data (the dataset currently has no numeric data) to perform analyses.