MPH 678 Assignment 1

## Load all Libraries needed:
library(dplyr)
library(ggplot2)
library(ggthemes)
library(lubridate)
library(gplots)
library(rworldmap)
library(knitr)
library(readxl)
library(reshape)

## Load and read the data to work on.
datax <- read_xlsx("C:/Users/mlapika/Downloads/A2_kickstart.xlsx")
str(datax)

## Classes 'tbl_df', 'tbl' and 'data.frame':    15 obs. of  7 variables:
##  $ Crowdfunded projects on Kickstarter, 2012: chr  "Games" "Film & Video" "Design" "Music" ...
##  $ Launched                                 : num  2796 9600 1882 9086 831 ...
##  $ Successful                               : num  911 3891 759 5067 312 ...
##  $ Money pledged, $                         : num  83144565 57951876 50124041 34953600 29003932 ...
##  $ Pledges                                  : num  1378143 647361 536469 522441 270912 ...
##  $ Success rate, %                          : num  32.6 40.5 40.3 55.8 37.5 29.6 37.6 48.6 46.3 66.8 ...
##  $ Average pledge, $                        : num  60.3 89.5 93.4 66.9 107.1 ...

#head(datax)
#kable(head(datax, 5))
#kable(tail(datax, 5))
# rename our dataset to kickstarter
 kickstarter <- datax

## data transformation
# Remove column we don't need 
kickstarter <- kickstarter[,-c(2,3,5,7)]
##Remove row containing NA value
kickstarter <- kickstarter[!is.na(kickstarter[,3]),]
#rename the colum to help divide the value and get the same as in the graph
colnames(kickstarter)[2] <- "money_pledge"
# Divide the column money pledge to get the value as in the graph
kickstarter <- transform(kickstarter, money_pledge = money_pledge/1000000)
# check if there is any column that need to be cleared
sapply(kickstarter, function(x) sum(is.na(x)))

## Crowdfunded.projects.on.Kickstarter..2012 
##                                         0 
##                              money_pledge 
##                                         0 
##                           Success.rate... 
##                                         0

# In this case we don't have a significative data missing as the number showing are just blank based on the graph

# Oder the data by order from top money pledge and succes rate
kickstarter <- data.frame(kickstarter)
#kickstarter
#(head(kickstarter[order(-kickstarter$Success.rate...), c(1,2,3)], 15))
#kable(head(kickstarter[order(-kickstarter$money_pledge), c(1,2,3)], 15))

data <- kickstarter
colnames(data) <- c("Crowdfunded_Projects_Kickstarter2012","Money Pledged($m)","Success Rate(%)")
data1 <- data[,-c(3)] 
data2 <- data[,-c(2)]
## melt the data to have a column category containing all value
mydata <- melt(data, Crowdfunded_Projects_Kickstarter2012 = c("Money Pledged($m)","Success Rate(%)"))

## Using Crowdfunded_Projects_Kickstarter2012 as id variables

mydata$Crowdfunded_Projects_Kickstarter2012 <- factor(mydata$Crowdfunded_Projects_Kickstarter2012)
### rename column name 
colnames(mydata) <- c("Crowdfunded_Projects_Kickstarter2012","Category","Value")
mydata <- within(mydata, Crowdfunded_Projects_Kickstarter2012 <- factor(Crowdfunded_Projects_Kickstarter2012, levels = c("Fashion","Publishing","Games","Photography","Technology","Food","Design","Film & Video","Comics", "Art", "Music", "Theater","Dance")))

 ggplot(mydata,aes(x=Crowdfunded_Projects_Kickstarter2012,y=Value,color = Category, label=round(Value,1)), size=8)  +
#ggplot(pledged.tot,aes(x=reorder(Crowdfunded_Projects_Kickstarter2012,+Value),y=Value,color = Category, label=round(Value,1)), size=8)  +
     geom_point(size = 8.2) + coord_flip() +
geom_text(color="black", size=3, fontface="bold") +
   ggtitle("Crowdfunded Projects on Kickstarter 2012") + xlab("") + 
  ylab("Money Pledged($m) and Success Rate(%)") +
  theme(legend.title = element_text(colour="black", size=10,face="bold"),legend.position="bottom",legend.background = element_rect(fill="lightblue", size=0.5, linetype="solid",colour ="lightblue"))+
  theme(plot.title=element_text(hjust=0.5,size=14,face="bold"), axis.title=element_text(size=8),plot.background = element_rect(fill = "#E0F2F7"),panel.grid.major.y  = element_line(colour = "lightblue", size = 0.1))

MPH 678 Assignment 1

Mike Lapika

27 July 2018