Belajar Package ggplot

Edisi #TohNganggur

Pada bagian ini saya mencoba belajar paket ggplot2. Mohon maaf kalau kurang komunikatif, karena niatnya hanya coba-coba.Pada bagian selanjuntya saya akan upload tentang data wrangling, yaitu tentang tibbles.

Walaupun biasa siapa tau mau dicoba sama rekan-rekan. Maaf, kalau grafiknya jelek (kurang cocok), disini hanya nyoba aja.

Sumber :

  1. Sebagian dari Programming Foundation for Data Science di SKILL ACADEMY
  2. Buku Belajar Statistika dengan R dari Prana Ugiana Gio dan Buku Data Mining for Business Analytics (yang kaya kalau tertarik bisa dibeli bukunya, kalau tak mampu bisa dicari di google)
  3. Grup R di telegram (melihat-lihat hasil orang lain)
  4. Sisanya hasil coba-coba

Dataset : Super Store Simple

Load Package

library(dplyr)
library(ggplot2)
library(GGally)
library(scales)

Baca Data

stores<-read.csv("superstore.csv")

Part 1

  1. coba-coba
stores$segment<-as.factor(stores$segment)
stores$category<-as.factor(stores$category)
stores$sub_category<-as.factor(stores$sub_category)
stores$order_date<-as.Date(stores$order_date)
plot(1, 1, xlim=c(1, 7.5), ylim=c(1,8), type="n") 
points(1:7, rep(7, 7), cex=1:7, col=1:7, pch=0:6)
text(1:7,rep(5, 7), labels=paste(0:6), cex=1:7, col=1:7) 
points(1:7,rep(3.5,7), pch=7:13, cex=1:7,col=1:7)  
text((1:7)+0.25, rep(3.5,7), paste((0:6)+7)) 
points(1:7,rep(2,7), pch=(0:6)+14, cex=1:7,col=1:7) 
text((1:7)+0.25, rep(2,7), paste((0:6)+14)) 

  1. Scatter plot
# memakai plot
attach(stores)
plot(sales, profit, pch=20, cex=1.2,
     xlab=" Sales", ylab="Profit", 
     main="Sales dan profit", 
     col=c("slateblue","firebrick", "darkolivegreen") 
     [ as.numeric(segment)]
)
legend(0,5000,c("consumer", "corporate","home office"), cex = 1.2, pch = 20, col=c("slateblue","firebrick", "darkolivegreen") 
     )

# memakai quick plot
qplot(sales,profit,main = "sales dan profit", xlab = "sales", ylab = "profit", color=segment, shape=segment)

# memakai ggplot
ggplot(stores, aes(sales, profit ))+
  geom_point(aes(color=segment,
                 shape=segment
  ), size=4)+
  geom_smooth(method = "auto",color="red", linetype="dashed")+
  labs(title = "sales dan profit",
       subtitle = "nyobaan 1",
       caption = "R lah")+
  xlab("sales")+
  ylab("profit")+
  theme(plot.title = element_text(color = "blue",size = 17, face = "bold"),
        plot.subtitle = element_text(size = 10, face = "bold"),
        legend.position = c(0.9,0)
  )

# diubah menjadi log
ggplot(stores, aes(sales, profit ))+
  geom_point(aes(color=segment,
                 shape=segment
  ), size=4)+
  scale_x_log10()+
  scale_y_log10()+
  geom_smooth(method = "auto",color="red", linetype="dashed")+
  labs(title = "sales dan profit",
       subtitle = "nyobaan 1",
       caption = "R lah")+
  xlab("sales")+
  ylab("profit")+
  theme(plot.title = element_text(color = "blue",size = 17, face = "bold"),
        plot.subtitle = element_text(size = 10, face = "bold"),
        legend.position = c(0.9,0)
  )

# dipisah bisa
a<-ggplot(stores, aes(sales, profit ))+
  geom_point(aes(color=segment,
                 shape=segment
  ), size=4)+
  geom_smooth(method = "auto",color="red", linetype="dashed")+
  labs(title = "sales dan profit",
       subtitle = "nyobaan 1",
       caption = "R lah")+
  xlab("sales")+
  ylab("profit")
a+facet_grid(.~segment)

# pairwise
ggpairs(stores[,c(7,8,9)])

#jitter (duka jitter teh naon)
ggplot(stores)+
  geom_jitter(aes(quantity,sales,colour=segment))

  1. bar chart
# bar plot biasa (profit per segment)
c<- stores%>%
  group_by(segment)%>%
  summarise(total_sales=sum(sales))
sales1<-c$total_sales
segment2<-as.factor(c$segment)
segment1<-as.numeric(segment2)
barplot(sales1,segment1, main = "sales berdasarkan segment", xlab = "segment",
        ylab = "sales",
        names.arg = c("consumer","corporate","home offce"),
        border = "blue",density = c(30,50,70))

# bar plot ggplot (profit per segment)
ggplot(stores,aes(segment, sales, fill=segment))+geom_bar(stat = "identity")

# stacked bar plot 
ggplot(stores,aes(segment, sales, fill=category))+geom_bar(stat = "identity")

# dodge bar plot
ggplot(stores,aes(segment, sales, fill=category))+geom_bar(stat = "identity",
                                                           position=position_dodge())

# bar plot ggplot (profit per segment)
ggplot(data=stores,mapping=aes(x= reorder(sub_category, sales, FUN = sum),
                  y=sales))+
  geom_bar(stat = "identity", fill="brown", alpha=0.8)+
  coord_flip()+
  labs(title = "Penjualan per subcategory")+
  xlab("Subcategory")+
  ylab("Penjualan")+
  scale_y_continuous(labels = scales::number_format(big.mark = ","), limits = c(0,400000))+
  theme_minimal()+
  theme(panel.grid.major.y = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank())

  1. pie chart (paling pusing)
# memakai perintah di graphics

pie(sales1, labels = segment1, main = "Data Penjualan Produk" ,col=heat.colors(3))
legend(1,0.5,c("consumer", "corporate", "home office"), cex = 0.8, fill = heat.colors(3))

# ggplot
ggplot(c,aes(x="", y=sales1, fill=segment))+
  geom_bar(width = 1, stat = "identity")+
  labs(title = "sales per segment")+
  coord_polar("y",start = 0, direction = -1)+theme(
  axis.title.x = element_blank(),
  axis.title.y = element_blank(),
  axis.text.x = element_blank(),
  axis.text.y = element_blank(),
  panel.border = element_blank(),
  panel.grid = element_blank(),
  axis.ticks = element_blank(),
  plot.title = element_text(size = 14, face = "bold")
)

#ggplot dan dilabeli persen
persen<-round(sales1/sum(sales1)*100,3)
ggplot(c,aes(x="", y=sales1, fill=segment))+
  geom_bar(width = 1, stat = "identity", fill=heat.colors(3))+
  coord_polar("y",start = 0, direction = 1)+
  labs(title = "sales per segment")+
  theme_void()+
  geom_label(aes(y=sales1/2 +
                   c(0,cumsum(sales1)[-length(sales1)]),
                 label=paste0(segment,persen, "\n value: ", sales1)),
             size=4)+
  scale_fill_manual(values = c(heat.colors(3)))+
  theme(legend.position = "none",
        plot.title = element_text(size = 14, face = "bold", hjust = 0.5)
        )

#ggplot donat

c$frac<-sales1/sum(sales1)
c$ymax<-cumsum(c$frac)
c$ymin<-c(0, head(c$ymax, n=-1))
c$labpo<-(c$ymax+c$ymin)/2
c$lab<-paste0(segment2,"\n value: ", sales1)

ggplot(c, aes(ymax=ymax,ymin=ymin,xmax=4, xmin=3, fill=segment ))+
  geom_rect()+
  geom_label(x=3.5,aes(y=labpo, label=lab), size=6)+
  scale_fill_brewer(palette = 3)+
  coord_polar("y")+
  xlim(c(2,4))+theme_void()+
  theme(legend.position = "none")

  1. line chart
# data order date dibuat per-bulan
stores$order_date<-as.Date(stores$order_date)
stores$order_month<-as.Date(cut(stores$order_date, breaks = "month"))
stores2<-stores%>%
  group_by(order_month)%>%
  summarise(profit1=sum(profit))

# menggunakan graphics
attach(stores2)
plot(order_month,profit1,type="o",pch=22,lty=2, col="blue", xlab = "waktu", ylab = "profit")
title(main = "data profit per bulan", col.main="red", font.main=4)

# menggunakan ggplot
ggplot(stores2, aes(order_month, profit1))+geom_line()+geom_point()+
  labs(title = "profit per bulan")+
  theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5)
        )+
  xlab("waktu")+
  ylab("profit")

#per segment? bisa
stores3<-stores%>%
  group_by(order_month, segment)%>%
  summarise(profit1=sum(profit))
ggplot(stores3, aes(order_month, profit1,colour=segment  ))+geom_line()+geom_point()+
  labs(title = "profit per bulan")+
  theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5)
        )+
  xlab("waktu")+
  ylab("profit")+
  scale_x_date(date_breaks = "4 month", date_labels = "%m/%y")

  1. histogram
datahis<- stores%>%
  filter(sales<=100, sales>=0)
# menggunakan perintah pada graphic
hist(datahis$sales, main = "COntoh Histogram", ylab = "frekuensi", seq(0,100,10), probability = F)

# menggunakan ggplot
ggplot(datahis, aes(sales, fill=segment))+
  geom_histogram(binwidth = 11,col="darkblue")

# ggplot2
ggplot(datahis, aes(sales, fill=segment))+
  geom_histogram(bins = 10, col="red",aes(fill=..count..))+
  labs(title = "contoh histogram")+labs(x="sales", y="jumlah")+
  scale_fill_gradient("count", low = "yellow", high = "red")

datahis1<-sample(rownames(datahis),dim(datahis)[1]*0.8)
datahis2<-datahis[datahis1,]
ggplot(datahis2, aes(sales, fill=segment))+
  geom_histogram(bins = 10, col="red",aes(fill=..count..))+
  labs(title = "contoh histogram")+labs(x="sales", y="jumlah")+
  scale_fill_gradient("count", low = "yellow", high = "red")

map

nz<- map_data("nz")
ggplot(nz, aes(long, lat, group=group))+
  geom_polygon(fill="blue", color="black")+
  coord_quickmap()

buble

library(packcircles)
dataku<- data.frame(group=paste("Group", letters[1:20]), value=sample(seq(1,100),20))
packing<-circleProgressiveLayout(dataku$value, sizetype = "area")
dataku<-cbind(dataku,packing)
dat.gg<-circleLayoutVertices(packing, npoints = 50)
ggplot()+
  geom_polygon(data=dat.gg, aes(x,y,group=id,fill=as.factor(id)), colour= "black", alpha=0.6)+
  geom_text(data = dataku, aes(x,y,size=value, label=group))+
  scale_size_continuous(range=c(1,4))+
  theme_void()+
  theme(legend.position = "none")+
  coord_equal()

Box Plot

ggplot(datahis) + geom_boxplot(aes(segment,sales))

heatmap corelation

dataheatmap<-stores
dataheatmap$category<-as.numeric(dataheatmap$category)
dataheatmap$segment<-as.numeric(dataheatmap$segment)
dataheatmap$sub_category<-as.numeric(dataheatmap$sub_category)
dataheatmap<-dataheatmap%>%
  select_if(is.numeric)
  
heatmap(cor(dataheatmap), Rowv = NA, Colv = NA)

library(reshape)
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
## 
##     rename
cor.mat<-round(cor(dataheatmap),2)
melted.cor.mat<-melt(cor.mat)
ggplot(melted.cor.mat, aes(x=X1,y=X2,fill=value))+
  geom_tile()+
  geom_text(aes(x=X1,y=X2, label=value))+
  scale_fill_gradient("count", low = "yellow", high = "red")

network graph

library(igraph)
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
stores$customer_id<-as.factor(stores$customer_id)
stores$order_id<-as.factor(stores$order_id)
stores4<-head(stores,100)
graph.edge<-as.matrix(stores4[,c(6,3)])
g<-graph.edgelist(graph.edge,directed = F)
isbuyer<-V(g)$name%in%graph.edge[,2]
plot(g,vertex.label=NA, vertex.color=ifelse(isbuyer,"gray","black"), vertex.size=ifelse(isbuyer,7,20))

SEMOGA BERMANFAAT