# Load libraries and set environment options
library(dplyr)
library(tidyr)
library(knitr)
library(readxl)
library(ggplot2)
# Use this option to supress scientific notation in printing values
options(scipen = 10, digits = 2)
# list of files in working directory
dir()
## [1] "A2_kickstart.xlsx" "Assignment1.png" "Assignment1.Rmd"
getwd()
## [1] "C:/Users/hmvsmith/Documents/MBA677/Assignment 1"
setwd("C:/Users/hmvsmith/Documents/MBA677/assignment 1")
getwd()
## [1] "C:/Users/hmvsmith/Documents/MBA677/assignment 1"
dir(getwd())
## [1] "A2_kickstart.xlsx" "Assignment1.png" "Assignment1.Rmd"
# list of files in "data files" directory
#dir("data files")
#excel_sheets("A2_kickstart.xlsx")
# Read downloaded Kickstart file using readxl. File saved as xlsx type
kickstart<-read_xlsx("A2_kickstart.xlsx", col_names = TRUE)
colnames(kickstart) <- c("Project_Type", "Num_Launches", "Num_Success", "Total_Pledge_Amt", "Num_Pledges", "Success_Rate", "Avg_Pledge")
note<-c("Success rate = fully funded projects as a % of all projects")
print(note)
## [1] "Success rate = fully funded projects as a % of all projects"
kickstart<- filter(kickstart, !is.na(Num_Launches))
# Take a peak at the data frame
print(kickstart)
## # A tibble: 13 x 7
## Project_Type Num_Launches Num_Success Total_Pledge_Amt Num_Pledges
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Games 2796 911 83144565 1378143
## 2 Film & Video 9600 3891 57951876 647361
## 3 Design 1882 759 50124041 536469
## 4 Music 9086 5067 34953600 522441
## 5 Technology 831 312 29003932 270912
## 6 Publishing 5634 1666 15311251 262738
## 7 Food 1828 688 11117486 138204
## 8 Art 3783 1837 10477939 155782
## 9 Comics 1170 542 9242233 177070
## 10 Theater 1787 1194 7084968 95225
## 11 Fashion 1659 434 6317799 83067
## 12 Photography 1197 427 3283635 46550
## 13 Dance 512 381 1773304 23807
## # ... with 2 more variables: Success_Rate <dbl>, Avg_Pledge <dbl>
str(kickstart)
## Classes 'tbl_df', 'tbl' and 'data.frame': 13 obs. of 7 variables:
## $ Project_Type : chr "Games" "Film & Video" "Design" "Music" ...
## $ Num_Launches : num 2796 9600 1882 9086 831 ...
## $ Num_Success : num 911 3891 759 5067 312 ...
## $ Total_Pledge_Amt: num 83144565 57951876 50124041 34953600 29003932 ...
## $ Num_Pledges : num 1378143 647361 536469 522441 270912 ...
## $ Success_Rate : num 32.6 40.5 40.3 55.8 37.5 29.6 37.6 48.6 46.3 66.8 ...
## $ Avg_Pledge : num 60.3 89.5 93.4 66.9 107.1 ...
names(kickstart)
## [1] "Project_Type" "Num_Launches" "Num_Success"
## [4] "Total_Pledge_Amt" "Num_Pledges" "Success_Rate"
## [7] "Avg_Pledge"
summarize(kickstart, min_pledged=min(Num_Pledges), max_pledged=max(Num_Pledges))
## # A tibble: 1 x 2
## min_pledged max_pledged
## <dbl> <dbl>
## 1 23807 1378143
#Plot everything
k_plot<-ggplot(kickstart)
k_plot +geom_point(aes(Num_Pledges, Project_Type))
k_plot +geom_point(aes(Num_Launches, Project_Type))
k_plot +geom_point(aes(Num_Success, Project_Type))
k_plot +geom_point(aes(Total_Pledge_Amt, Project_Type))
k_plot +geom_point(aes(Success_Rate, Project_Type))
k_plot +geom_point(aes(Avg_Pledge, Project_Type))
##
### Set up a function from RStudio to create multiple plots in a single frame. ###
# Multiple plot function (Written by Andrie, Solution Engineer at RStudio )
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols: Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
require(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
# Create graphics with ggplot
png("Assignment1.png")
k_plot1<-ggplot(kickstart, aes(reorder(Project_Type,Total_Pledge_Amt), Total_Pledge_Amt/100000, color=Success_Rate, fill= Success_Rate)) +
ggtitle("2012 Crowdfunded Projects") +
theme(plot.title = element_text(hjust = 0.5, size=rel(1.3), face="bold")) +
labs(x=NULL, y="Total pledge amount in $100K \n Average Pledge amount in green") +
ylim(0, 1000)
k_plot1<- k_plot1 + geom_bar(stat="identity") +
#geom_point(mapping=aes(y= Avg_Pledge), pch=5, color="red") +
theme(legend.position = "none") +
geom_text(aes(reorder(Project_Type,Total_Pledge_Amt), Total_Pledge_Amt/100000, label=Avg_Pledge), vjust=.5, hjust=-0.5, size=3.0, color="dark green") +
coord_flip()
k_plot2 <-ggplot(kickstart, aes(reorder(Project_Type,Total_Pledge_Amt), Num_Launches/100, color=Success_Rate, fill=Success_Rate)) +
ggtitle(" ") +
labs(x=NULL, y="Number of Launches (in hundreds) \n Number of success in blue") +
theme(plot.title = element_text(hjust = 0.5), axis.text.y=element_blank())
k_plot2 <- k_plot2 + geom_bar(stat="identity") +
geom_text(aes(reorder(Project_Type,Total_Pledge_Amt), Num_Launches/100, label=Num_Success), vjust=.5, hjust=-0.5, size=3.0, color="dark blue") +
ylim(0, 125) +
coord_flip()
multiplot(k_plot1,k_plot2, cols=2)
## Loading required package: grid