Patrick
January 5, 2017
This plot aims to locate some key features that might predict the success or failure of a kickstarter project. The data for this analysis comes from Cathie So’s Kickstarter Project Statistics. We’ll only be analysing the 4000 most backed kickstarter projects.
We’ve made some modifications to the core data. And we’ve created 3 classes of projects: Games, Movies and Technology. This will help determine if the category of project has an imact on performance.
Much of this code won’t show due to slide format, but you should get a general idea for how we’ve cleaned up the data.
library(stringr)
# Data files must be downloaded into your working directory
x <- read.csv("most_backed.csv")
caliVar <- grepl(" CA$",x$location)
x <- cbind(x, caliVar)
# Convert all pricing to USD
x[grep("^aud$", x$currency),]$goal <- (x[grep("^aud$", x$currency),]$goal * 0.73)
x[grep("^gbp$", x$currency),]$goal <- (x[grep("^gbp$", x$currency),]$goal * 1.23)
x[grep("^cad$", x$currency),]$goal <- (x[grep("^cad$", x$currency),]$goal * 0.75)
x[grep("^eur$", x$currency),]$goal <- (x[grep("^eur$", x$currency),]$goal * 1.04)
x[grep("^sek$", x$currency),]$goal <- (x[grep("^sek$", x$currency),]$goal * 0.11)
x[grep("^nzd$", x$currency),]$goal <- (x[grep("^nzd$", x$currency),]$goal * 0.69)
x[grep("^dkk$", x$currency),]$goal <- (x[grep("^dkk$", x$currency),]$goal * 0.14)
x[grep("^chf$", x$currency),]$goal <- (x[grep("^chf$", x$currency),]$goal * 0.97)
x[grep("^aud$", x$currency),]$amt.pledged <- (x[grep("^aud$", x$currency),]$amt.pledged * 0.73)
x[grep("^gbp$", x$currency),]$amt.pledged <- (x[grep("^gbp$", x$currency),]$amt.pledged * 1.23)
x[grep("^cad$", x$currency),]$amt.pledged <- (x[grep("^cad$", x$currency),]$amt.pledged * 0.75)
x[grep("^eur$", x$currency),]$amt.pledged <- (x[grep("^eur$", x$currency),]$amt.pledged * 1.04)
x[grep("^sek$", x$currency),]$amt.pledged <- (x[grep("^sek$", x$currency),]$amt.pledged * 0.11)
x[grep("^nzd$", x$currency),]$amt.pledged <- (x[grep("^nzd$", x$currency),]$amt.pledged * 0.69)
x[grep("^dkk$", x$currency),]$amt.pledged <- (x[grep("^dkk$", x$currency),]$amt.pledged * 0.14)
x[grep("^chf$", x$currency),]$amt.pledged <- (x[grep("^chf$", x$currency),]$amt.pledged * 0.97)
x$currency <- "usd"
# Calculate important ratios
x$pledge.ratio <-x$amt.pledged/x$goal
x$per.backer.pledge <-x$amt.pledged/x$num.backers
# Create 3 classes of project and combine into new dataset
xmovie <- x[grepl("movie", x$blurb),]
xmovie$kickType <- "movie"
xgame <- x[grepl("game", x$blurb),]
xgame$kickType <- "game"
xtechnology <- x[grepl("technology", x$blurb),]
xtechnology$kickType <- "technology"
kicktypex <- rbind(xmovie, xgame, xtechnology)
kicktypex$kickType <- as.factor(kicktypex$kickType)
# Create new factors for further analysis
kicktypex$num.tiers <- str_count(kicktypex$pledge.tier, ',') + 1
kicktypex$per.backer.pledge.mse <- (kicktypex$per.backer.pledge - mean(kicktypex$per.backer.pledge))^2## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
## No scatter3d mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode