## Load all Libraries needed:
library(dplyr)
library(ggplot2)
library(ggthemes)
library(lubridate)
library(gplots)
library(rworldmap)
library(knitr)
library(readxl)
library(reshape)
## Load and read the data to work on.
datax <- read_xlsx("C:/Users/mlapika/Downloads/A2_kickstart.xlsx")
str(datax)
## Classes 'tbl_df', 'tbl' and 'data.frame': 15 obs. of 7 variables:
## $ Crowdfunded projects on Kickstarter, 2012: chr "Games" "Film & Video" "Design" "Music" ...
## $ Launched : num 2796 9600 1882 9086 831 ...
## $ Successful : num 911 3891 759 5067 312 ...
## $ Money pledged, $ : num 83144565 57951876 50124041 34953600 29003932 ...
## $ Pledges : num 1378143 647361 536469 522441 270912 ...
## $ Success rate, % : num 32.6 40.5 40.3 55.8 37.5 29.6 37.6 48.6 46.3 66.8 ...
## $ Average pledge, $ : num 60.3 89.5 93.4 66.9 107.1 ...
#head(datax)
#kable(head(datax, 5))
#kable(tail(datax, 5))
# rename our dataset to kickstarter
kickstarter <- datax
## data transformation
# Remove column we don't need
kickstarter <- kickstarter[,-c(2,3,5,7)]
##Remove row containing NA value
kickstarter <- kickstarter[!is.na(kickstarter[,3]),]
#rename the colum to help divide the value and get the same as in the graph
colnames(kickstarter)[2] <- "money_pledge"
# Divide the column money pledge to get the value as in the graph
kickstarter <- transform(kickstarter, money_pledge = money_pledge/1000000)
# check if there is any column that need to be cleared
sapply(kickstarter, function(x) sum(is.na(x)))
## Crowdfunded.projects.on.Kickstarter..2012
## 0
## money_pledge
## 0
## Success.rate...
## 0
# In this case we don't have a significative data missing as the number showing are just blank based on the graph
# Oder the data by order from top money pledge and succes rate
kickstarter <- data.frame(kickstarter)
#kickstarter
#(head(kickstarter[order(-kickstarter$Success.rate...), c(1,2,3)], 15))
#kable(head(kickstarter[order(-kickstarter$money_pledge), c(1,2,3)], 15))
data <- kickstarter
colnames(data) <- c("Crowdfunded_Projects_Kickstarter2012","Money Pledged($m)","Success Rate(%)")
data1 <- data[,-c(3)]
data2 <- data[,-c(2)]
## melt the data to have a column category containing all value
mydata <- melt(data, Crowdfunded_Projects_Kickstarter2012 = c("Money Pledged($m)","Success Rate(%)"))
## Using Crowdfunded_Projects_Kickstarter2012 as id variables
mydata$Crowdfunded_Projects_Kickstarter2012 <- factor(mydata$Crowdfunded_Projects_Kickstarter2012)
### rename column name
colnames(mydata) <- c("Crowdfunded_Projects_Kickstarter2012","Category","Value")
mydata <- within(mydata, Crowdfunded_Projects_Kickstarter2012 <- factor(Crowdfunded_Projects_Kickstarter2012, levels = c("Fashion","Publishing","Games","Photography","Technology","Food","Design","Film & Video","Comics", "Art", "Music", "Theater","Dance")))
ggplot(mydata,aes(x=Crowdfunded_Projects_Kickstarter2012,y=Value,color = Category, label=round(Value,1)), size=8) +
#ggplot(pledged.tot,aes(x=reorder(Crowdfunded_Projects_Kickstarter2012,+Value),y=Value,color = Category, label=round(Value,1)), size=8) +
geom_point(size = 8.2) + coord_flip() +
geom_text(color="black", size=3, fontface="bold") +
ggtitle("Crowdfunded Projects on Kickstarter 2012") + xlab("") +
ylab("Money Pledged($m) and Success Rate(%)") +
theme(legend.title = element_text(colour="black", size=10,face="bold"),legend.position="bottom",legend.background = element_rect(fill="lightblue", size=0.5, linetype="solid",colour ="lightblue"))+
theme(plot.title=element_text(hjust=0.5,size=14,face="bold"), axis.title=element_text(size=8),plot.background = element_rect(fill = "#E0F2F7"),panel.grid.major.y = element_line(colour = "lightblue", size = 0.1))
