Pada bagian ini saya mencoba belajar paket ggplot2. Mohon maaf kalau kurang komunikatif, karena niatnya hanya coba-coba.Pada bagian selanjuntya saya akan upload tentang data wrangling, yaitu tentang tibbles.
Walaupun biasa siapa tau mau dicoba sama rekan-rekan. Maaf, kalau grafiknya jelek (kurang cocok), disini hanya nyoba aja.
Sumber :
Dataset : Super Store Simple
Load Package
library(dplyr)
library(ggplot2)
library(GGally)
library(scales)
Baca Data
stores<-read.csv("superstore.csv")
stores$segment<-as.factor(stores$segment)
stores$category<-as.factor(stores$category)
stores$sub_category<-as.factor(stores$sub_category)
stores$order_date<-as.Date(stores$order_date)
plot(1, 1, xlim=c(1, 7.5), ylim=c(1,8), type="n")
points(1:7, rep(7, 7), cex=1:7, col=1:7, pch=0:6)
text(1:7,rep(5, 7), labels=paste(0:6), cex=1:7, col=1:7)
points(1:7,rep(3.5,7), pch=7:13, cex=1:7,col=1:7)
text((1:7)+0.25, rep(3.5,7), paste((0:6)+7))
points(1:7,rep(2,7), pch=(0:6)+14, cex=1:7,col=1:7)
text((1:7)+0.25, rep(2,7), paste((0:6)+14))
# memakai plot
attach(stores)
plot(sales, profit, pch=20, cex=1.2,
xlab=" Sales", ylab="Profit",
main="Sales dan profit",
col=c("slateblue","firebrick", "darkolivegreen")
[ as.numeric(segment)]
)
legend(0,5000,c("consumer", "corporate","home office"), cex = 1.2, pch = 20, col=c("slateblue","firebrick", "darkolivegreen")
)
# memakai quick plot
qplot(sales,profit,main = "sales dan profit", xlab = "sales", ylab = "profit", color=segment, shape=segment)
# memakai ggplot
ggplot(stores, aes(sales, profit ))+
geom_point(aes(color=segment,
shape=segment
), size=4)+
geom_smooth(method = "auto",color="red", linetype="dashed")+
labs(title = "sales dan profit",
subtitle = "nyobaan 1",
caption = "R lah")+
xlab("sales")+
ylab("profit")+
theme(plot.title = element_text(color = "blue",size = 17, face = "bold"),
plot.subtitle = element_text(size = 10, face = "bold"),
legend.position = c(0.9,0)
)
# diubah menjadi log
ggplot(stores, aes(sales, profit ))+
geom_point(aes(color=segment,
shape=segment
), size=4)+
scale_x_log10()+
scale_y_log10()+
geom_smooth(method = "auto",color="red", linetype="dashed")+
labs(title = "sales dan profit",
subtitle = "nyobaan 1",
caption = "R lah")+
xlab("sales")+
ylab("profit")+
theme(plot.title = element_text(color = "blue",size = 17, face = "bold"),
plot.subtitle = element_text(size = 10, face = "bold"),
legend.position = c(0.9,0)
)
# dipisah bisa
a<-ggplot(stores, aes(sales, profit ))+
geom_point(aes(color=segment,
shape=segment
), size=4)+
geom_smooth(method = "auto",color="red", linetype="dashed")+
labs(title = "sales dan profit",
subtitle = "nyobaan 1",
caption = "R lah")+
xlab("sales")+
ylab("profit")
a+facet_grid(.~segment)
# pairwise
ggpairs(stores[,c(7,8,9)])
#jitter (duka jitter teh naon)
ggplot(stores)+
geom_jitter(aes(quantity,sales,colour=segment))
# bar plot biasa (profit per segment)
c<- stores%>%
group_by(segment)%>%
summarise(total_sales=sum(sales))
sales1<-c$total_sales
segment2<-as.factor(c$segment)
segment1<-as.numeric(segment2)
barplot(sales1,segment1, main = "sales berdasarkan segment", xlab = "segment",
ylab = "sales",
names.arg = c("consumer","corporate","home offce"),
border = "blue",density = c(30,50,70))
# bar plot ggplot (profit per segment)
ggplot(stores,aes(segment, sales, fill=segment))+geom_bar(stat = "identity")
# stacked bar plot
ggplot(stores,aes(segment, sales, fill=category))+geom_bar(stat = "identity")
# dodge bar plot
ggplot(stores,aes(segment, sales, fill=category))+geom_bar(stat = "identity",
position=position_dodge())
# bar plot ggplot (profit per segment)
ggplot(data=stores,mapping=aes(x= reorder(sub_category, sales, FUN = sum),
y=sales))+
geom_bar(stat = "identity", fill="brown", alpha=0.8)+
coord_flip()+
labs(title = "Penjualan per subcategory")+
xlab("Subcategory")+
ylab("Penjualan")+
scale_y_continuous(labels = scales::number_format(big.mark = ","), limits = c(0,400000))+
theme_minimal()+
theme(panel.grid.major.y = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank())
# memakai perintah di graphics
pie(sales1, labels = segment1, main = "Data Penjualan Produk" ,col=heat.colors(3))
legend(1,0.5,c("consumer", "corporate", "home office"), cex = 0.8, fill = heat.colors(3))
# ggplot
ggplot(c,aes(x="", y=sales1, fill=segment))+
geom_bar(width = 1, stat = "identity")+
labs(title = "sales per segment")+
coord_polar("y",start = 0, direction = -1)+theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
panel.border = element_blank(),
panel.grid = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(size = 14, face = "bold")
)
#ggplot dan dilabeli persen
persen<-round(sales1/sum(sales1)*100,3)
ggplot(c,aes(x="", y=sales1, fill=segment))+
geom_bar(width = 1, stat = "identity", fill=heat.colors(3))+
coord_polar("y",start = 0, direction = 1)+
labs(title = "sales per segment")+
theme_void()+
geom_label(aes(y=sales1/2 +
c(0,cumsum(sales1)[-length(sales1)]),
label=paste0(segment,persen, "\n value: ", sales1)),
size=4)+
scale_fill_manual(values = c(heat.colors(3)))+
theme(legend.position = "none",
plot.title = element_text(size = 14, face = "bold", hjust = 0.5)
)
#ggplot donat
c$frac<-sales1/sum(sales1)
c$ymax<-cumsum(c$frac)
c$ymin<-c(0, head(c$ymax, n=-1))
c$labpo<-(c$ymax+c$ymin)/2
c$lab<-paste0(segment2,"\n value: ", sales1)
ggplot(c, aes(ymax=ymax,ymin=ymin,xmax=4, xmin=3, fill=segment ))+
geom_rect()+
geom_label(x=3.5,aes(y=labpo, label=lab), size=6)+
scale_fill_brewer(palette = 3)+
coord_polar("y")+
xlim(c(2,4))+theme_void()+
theme(legend.position = "none")
# data order date dibuat per-bulan
stores$order_date<-as.Date(stores$order_date)
stores$order_month<-as.Date(cut(stores$order_date, breaks = "month"))
stores2<-stores%>%
group_by(order_month)%>%
summarise(profit1=sum(profit))
# menggunakan graphics
attach(stores2)
plot(order_month,profit1,type="o",pch=22,lty=2, col="blue", xlab = "waktu", ylab = "profit")
title(main = "data profit per bulan", col.main="red", font.main=4)
# menggunakan ggplot
ggplot(stores2, aes(order_month, profit1))+geom_line()+geom_point()+
labs(title = "profit per bulan")+
theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5)
)+
xlab("waktu")+
ylab("profit")
#per segment? bisa
stores3<-stores%>%
group_by(order_month, segment)%>%
summarise(profit1=sum(profit))
ggplot(stores3, aes(order_month, profit1,colour=segment ))+geom_line()+geom_point()+
labs(title = "profit per bulan")+
theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5)
)+
xlab("waktu")+
ylab("profit")+
scale_x_date(date_breaks = "4 month", date_labels = "%m/%y")
datahis<- stores%>%
filter(sales<=100, sales>=0)
# menggunakan perintah pada graphic
hist(datahis$sales, main = "COntoh Histogram", ylab = "frekuensi", seq(0,100,10), probability = F)
# menggunakan ggplot
ggplot(datahis, aes(sales, fill=segment))+
geom_histogram(binwidth = 11,col="darkblue")
# ggplot2
ggplot(datahis, aes(sales, fill=segment))+
geom_histogram(bins = 10, col="red",aes(fill=..count..))+
labs(title = "contoh histogram")+labs(x="sales", y="jumlah")+
scale_fill_gradient("count", low = "yellow", high = "red")
datahis1<-sample(rownames(datahis),dim(datahis)[1]*0.8)
datahis2<-datahis[datahis1,]
ggplot(datahis2, aes(sales, fill=segment))+
geom_histogram(bins = 10, col="red",aes(fill=..count..))+
labs(title = "contoh histogram")+labs(x="sales", y="jumlah")+
scale_fill_gradient("count", low = "yellow", high = "red")
map
nz<- map_data("nz")
ggplot(nz, aes(long, lat, group=group))+
geom_polygon(fill="blue", color="black")+
coord_quickmap()
buble
library(packcircles)
dataku<- data.frame(group=paste("Group", letters[1:20]), value=sample(seq(1,100),20))
packing<-circleProgressiveLayout(dataku$value, sizetype = "area")
dataku<-cbind(dataku,packing)
dat.gg<-circleLayoutVertices(packing, npoints = 50)
ggplot()+
geom_polygon(data=dat.gg, aes(x,y,group=id,fill=as.factor(id)), colour= "black", alpha=0.6)+
geom_text(data = dataku, aes(x,y,size=value, label=group))+
scale_size_continuous(range=c(1,4))+
theme_void()+
theme(legend.position = "none")+
coord_equal()
Box Plot
ggplot(datahis) + geom_boxplot(aes(segment,sales))
heatmap corelation
dataheatmap<-stores
dataheatmap$category<-as.numeric(dataheatmap$category)
dataheatmap$segment<-as.numeric(dataheatmap$segment)
dataheatmap$sub_category<-as.numeric(dataheatmap$sub_category)
dataheatmap<-dataheatmap%>%
select_if(is.numeric)
heatmap(cor(dataheatmap), Rowv = NA, Colv = NA)
library(reshape)
##
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
##
## rename
cor.mat<-round(cor(dataheatmap),2)
melted.cor.mat<-melt(cor.mat)
ggplot(melted.cor.mat, aes(x=X1,y=X2,fill=value))+
geom_tile()+
geom_text(aes(x=X1,y=X2, label=value))+
scale_fill_gradient("count", low = "yellow", high = "red")
network graph
library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
stores$customer_id<-as.factor(stores$customer_id)
stores$order_id<-as.factor(stores$order_id)
stores4<-head(stores,100)
graph.edge<-as.matrix(stores4[,c(6,3)])
g<-graph.edgelist(graph.edge,directed = F)
isbuyer<-V(g)$name%in%graph.edge[,2]
plot(g,vertex.label=NA, vertex.color=ifelse(isbuyer,"gray","black"), vertex.size=ifelse(isbuyer,7,20))