Assignment 1

CrowdFunded Projects

#  Load libraries and set environment options
library(dplyr)
library(tidyr)
library(knitr)
library(readxl)
library(ggplot2)

#  Use this option to supress scientific notation in printing values
options(scipen = 10, digits = 2)

Set up the code and data directory and environment

Then read in the kickstart xlsx file, omitting the incomplete rows

# list of files in working directory
dir()
## [1] "A2_kickstart.xlsx" "Assignment1.png"   "Assignment1.Rmd"
getwd()
## [1] "C:/Users/hmvsmith/Documents/MBA677/Assignment 1"
setwd("C:/Users/hmvsmith/Documents/MBA677/assignment 1")
getwd()
## [1] "C:/Users/hmvsmith/Documents/MBA677/assignment 1"
dir(getwd())
## [1] "A2_kickstart.xlsx" "Assignment1.png"   "Assignment1.Rmd"
# list of files in "data files" directory
#dir("data files")
#excel_sheets("A2_kickstart.xlsx")

#  Read downloaded Kickstart file using readxl.  File saved as xlsx type
kickstart<-read_xlsx("A2_kickstart.xlsx", col_names = TRUE)
colnames(kickstart) <- c("Project_Type", "Num_Launches", "Num_Success", "Total_Pledge_Amt", "Num_Pledges", "Success_Rate", "Avg_Pledge")
note<-c("Success rate = fully funded projects as a % of all projects")
print(note)
## [1] "Success rate = fully funded projects as a % of all projects"
kickstart<- filter(kickstart, !is.na(Num_Launches))

Get a brief look at the data, make sure the data frame is readable

# Take a peak at the data frame
print(kickstart)
## # A tibble: 13 x 7
##    Project_Type Num_Launches Num_Success Total_Pledge_Amt Num_Pledges
##           <chr>        <dbl>       <dbl>            <dbl>       <dbl>
##  1        Games         2796         911         83144565     1378143
##  2 Film & Video         9600        3891         57951876      647361
##  3       Design         1882         759         50124041      536469
##  4        Music         9086        5067         34953600      522441
##  5   Technology          831         312         29003932      270912
##  6   Publishing         5634        1666         15311251      262738
##  7         Food         1828         688         11117486      138204
##  8          Art         3783        1837         10477939      155782
##  9       Comics         1170         542          9242233      177070
## 10      Theater         1787        1194          7084968       95225
## 11      Fashion         1659         434          6317799       83067
## 12  Photography         1197         427          3283635       46550
## 13        Dance          512         381          1773304       23807
## # ... with 2 more variables: Success_Rate <dbl>, Avg_Pledge <dbl>
str(kickstart)
## Classes 'tbl_df', 'tbl' and 'data.frame':    13 obs. of  7 variables:
##  $ Project_Type    : chr  "Games" "Film & Video" "Design" "Music" ...
##  $ Num_Launches    : num  2796 9600 1882 9086 831 ...
##  $ Num_Success     : num  911 3891 759 5067 312 ...
##  $ Total_Pledge_Amt: num  83144565 57951876 50124041 34953600 29003932 ...
##  $ Num_Pledges     : num  1378143 647361 536469 522441 270912 ...
##  $ Success_Rate    : num  32.6 40.5 40.3 55.8 37.5 29.6 37.6 48.6 46.3 66.8 ...
##  $ Avg_Pledge      : num  60.3 89.5 93.4 66.9 107.1 ...
names(kickstart)
## [1] "Project_Type"     "Num_Launches"     "Num_Success"     
## [4] "Total_Pledge_Amt" "Num_Pledges"      "Success_Rate"    
## [7] "Avg_Pledge"
summarize(kickstart, min_pledged=min(Num_Pledges), max_pledged=max(Num_Pledges))
## # A tibble: 1 x 2
##   min_pledged max_pledged
##         <dbl>       <dbl>
## 1       23807     1378143

Run some quick plots of points to see what the data looks like

#Plot everything
k_plot<-ggplot(kickstart)
k_plot +geom_point(aes(Num_Pledges, Project_Type)) 

k_plot +geom_point(aes(Num_Launches, Project_Type)) 

k_plot +geom_point(aes(Num_Success, Project_Type)) 

k_plot +geom_point(aes(Total_Pledge_Amt, Project_Type)) 

k_plot +geom_point(aes(Success_Rate, Project_Type)) 

k_plot +geom_point(aes(Avg_Pledge, Project_Type)) 

##
### Set up a function from RStudio to create multiple plots in a single frame. ###

# Multiple plot function (Written by Andrie, Solution Engineer at RStudio )
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols:   Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  require(grid)

  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                    ncol = cols, nrow = ceiling(numPlots/cols))
  }

 if (numPlots==1) {
    print(plots[[1]])

  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

Create the final visual using two bar charts in one frame.

#  Create graphics with ggplot
png("Assignment1.png")
k_plot1<-ggplot(kickstart, aes(reorder(Project_Type,Total_Pledge_Amt), Total_Pledge_Amt/100000, color=Success_Rate, fill= Success_Rate)) + 
  ggtitle("2012 Crowdfunded Projects") +
  theme(plot.title = element_text(hjust = 0.5, size=rel(1.3), face="bold")) +
  labs(x=NULL, y="Total pledge amount in $100K \n Average Pledge amount in green") +
  ylim(0, 1000) 
k_plot1<- k_plot1 + geom_bar(stat="identity") +
  #geom_point(mapping=aes(y= Avg_Pledge), pch=5, color="red") +
  theme(legend.position = "none") +
  geom_text(aes(reorder(Project_Type,Total_Pledge_Amt), Total_Pledge_Amt/100000, label=Avg_Pledge), vjust=.5, hjust=-0.5, size=3.0, color="dark green") +

  coord_flip()

k_plot2 <-ggplot(kickstart, aes(reorder(Project_Type,Total_Pledge_Amt), Num_Launches/100, color=Success_Rate, fill=Success_Rate)) + 
  ggtitle(" ") +
  labs(x=NULL, y="Number of Launches (in hundreds) \n Number of success in blue") +
  theme(plot.title = element_text(hjust = 0.5), axis.text.y=element_blank()) 
k_plot2 <- k_plot2 + geom_bar(stat="identity") +
  geom_text(aes(reorder(Project_Type,Total_Pledge_Amt), Num_Launches/100, label=Num_Success), vjust=.5, hjust=-0.5, size=3.0, color="dark blue") +
  ylim(0, 125) +
 coord_flip()

   
multiplot(k_plot1,k_plot2, cols=2)
## Loading required package: grid