#OVERVIEW
For Assignment 1, I chose to work on a dataset behind the article ‘How To Spot A Front-Runner On The ‘Bachelor’ Or ‘Bachelorette’ published on May 2017 by FiveThirtyEight. The article looked at 33 seasons of the show to understand if getting a rose in Week 1 of the show was associated with a higher chance of winning the season. Their findings indicated that there wasn’t a strong association between the two.
For this assignment, I would like to look at a smaller subset of their data (only one season) to try to identify if there are any patterns to elimination and procession for the particular season of the show.
Data for the article was scraped from Bachelor Nation Wiki
Article:https://fivethirtyeight.com/features/the-bachelorette/ Data:https://github.com/fivethirtyeight/data/blob/master/bachelorette/bachelorette.csv Data Dictionary: https://github.com/fivethirtyeight/data/tree/master/bachelorette
#Code
library(devtools)
## Loading required package: usethis
library(RCurl)
#Getting the data from the site
library(RCurl)
x <- getURL("https://raw.githubusercontent.com/fivethirtyeight/data/master/bachelorette/bachelorette.csv")
y <- data.frame(read.csv(text=x))
summary(y)
## SHOW SEASON CONTESTANT ELIMINATION.1
## Length:921 Length:921 Length:921 Length:921
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## ELIMINATION.2 ELIMINATION.3 ELIMINATION.4 ELIMINATION.5
## Length:921 Length:921 Length:921 Length:921
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## ELIMINATION.6 ELIMINATION.7 ELIMINATION.8 ELIMINATION.9
## Length:921 Length:921 Length:921 Length:921
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## ELIMINATION.10 DATES.1 DATES.2 DATES.3
## Length:921 Length:921 Length:921 Length:921
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## DATES.4 DATES.5 DATES.6 DATES.7
## Length:921 Length:921 Length:921 Length:921
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## DATES.8 DATES.9 DATES.10
## Length:921 Length:921 Length:921
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
#Creating a subset of the data to show only the contestants from Season 13
Season13 <- subset(y, SHOW == 'Bachelor' & SEASON==13)
summary(Season13)
## SHOW SEASON CONTESTANT ELIMINATION.1
## Length:25 Length:25 Length:25 Length:25
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## ELIMINATION.2 ELIMINATION.3 ELIMINATION.4 ELIMINATION.5
## Length:25 Length:25 Length:25 Length:25
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## ELIMINATION.6 ELIMINATION.7 ELIMINATION.8 ELIMINATION.9
## Length:25 Length:25 Length:25 Length:25
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## ELIMINATION.10 DATES.1 DATES.2 DATES.3
## Length:25 Length:25 Length:25 Length:25
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## DATES.4 DATES.5 DATES.6 DATES.7
## Length:25 Length:25 Length:25 Length:25
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## DATES.8 DATES.9 DATES.10
## Length:25 Length:25 Length:25
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
#Renaming the columns
library (dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Season13 <-
head (rename (Season13,c("ELIMINATED IN WEEK 1 "="ELIMINATION.1","ELIMINATED IN WEEK 2 "="ELIMINATION.2","ELIMINATED IN WEEK 3 "="ELIMINATION.3","ELIMINATED IN WEEK 4 "="ELIMINATION.4","ELIMINATED IN WEEK 5 "="ELIMINATION.5","ELIMINATED IN WEEK 6 "="ELIMINATION.6","ELIMINATED IN WEEK 7 "="ELIMINATION.7","ELIMINATED IN WEEK 8 "="ELIMINATION.8","ELIMINATED IN WEEK 9 "="ELIMINATION.9","ELIMINATED IN WEEK 10 "="ELIMINATION.10")))
summary(Season13)
## SHOW SEASON CONTESTANT ELIMINATED IN WEEK 1
## Length:6 Length:6 Length:6 Length:6
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## ELIMINATED IN WEEK 2 ELIMINATED IN WEEK 3 ELIMINATED IN WEEK 4
## Length:6 Length:6 Length:6
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
## ELIMINATED IN WEEK 5 ELIMINATED IN WEEK 6 ELIMINATED IN WEEK 7
## Length:6 Length:6 Length:6
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
## ELIMINATED IN WEEK 8 ELIMINATED IN WEEK 9 ELIMINATED IN WEEK 10
## Length:6 Length:6 Length:6
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
## DATES.1 DATES.2 DATES.3 DATES.4
## Length:6 Length:6 Length:6 Length:6
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## DATES.5 DATES.6 DATES.7 DATES.8
## Length:6 Length:6 Length:6 Length:6
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## DATES.9 DATES.10
## Length:6 Length:6
## Class :character Class :character
## Mode :character Mode :character
glimpse(Season13)
## Rows: 6
## Columns: 23
## $ SHOW <chr> "Bachelor", "Bachelor", "Bachelor", "Bachelo…
## $ SEASON <chr> "13", "13", "13", "13", "13", "13"
## $ CONTESTANT <chr> "13_MELISSA_R", "13_MOLLY_M", "13_JILLIAN_H"…
## $ `ELIMINATED IN WEEK 1 ` <chr> "", "", "", "", "", ""
## $ `ELIMINATED IN WEEK 2 ` <chr> "R", "R", "R", "", "", ""
## $ `ELIMINATED IN WEEK 3 ` <chr> "", "", "R", "", "R", ""
## $ `ELIMINATED IN WEEK 4 ` <chr> "", "R", "", "R", "R", "E"
## $ `ELIMINATED IN WEEK 5 ` <chr> "", "", "", "", "E", ""
## $ `ELIMINATED IN WEEK 6 ` <chr> "", "", "", "E", "", ""
## $ `ELIMINATED IN WEEK 7 ` <chr> "", "", "E", "", "", ""
## $ `ELIMINATED IN WEEK 8 ` <chr> "W", "E", "", "", "", ""
## $ `ELIMINATED IN WEEK 9 ` <chr> "", "", "", "", "", ""
## $ `ELIMINATED IN WEEK 10 ` <chr> "", "", "", "", "", ""
## $ DATES.1 <chr> "", "", "", "", "", ""
## $ DATES.2 <chr> "D1", "D8", "D1", "D8", "", "D8"
## $ DATES.3 <chr> "D8", "", "D8", "D8", "D1", ""
## $ DATES.4 <chr> "D6", "D1", "D6", "D6", "D2", "D6"
## $ DATES.5 <chr> "D1", "D3", "D3", "D1", "D3", ""
## $ DATES.6 <chr> "D1", "D1", "D1", "D1", "", ""
## $ DATES.7 <chr> "D1", "D1", "D1", "", "", ""
## $ DATES.8 <chr> "D1", "D1", "", "", "", ""
## $ DATES.9 <chr> "", "", "", "", "", ""
## $ DATES.10 <chr> "", "", "", "", "", ""
Season13$CONTESTANT
## [1] "13_MELISSA_R" "13_MOLLY_M" "13_JILLIAN_H" "13_NAOMI_C"
## [5] "13_STEPHANIE_H" "13_LAUREN_W"
#FURTHER ANALYSIS In this assignment, I have created a subset of the main dataset to only show us data for season 13. In addition, I have also renamed the column names for improved readability.
The next steps would be to explore and further transform the data (the dataset currently has no numeric data) to perform analyses.