Tugas Praktikum
1. Library
library(heatmaply) #Untuk plot heatmap Missing Data
library(visdat) #Untuk plot Missing Data
library(reshape2) #Modifikasi DataFrame
library(tidyr) #Modifikasi DataFrame
library(ggplot2) #Plot
library(psych) #Pair Plot
library(DataExplorer) #Corelation Plot
library(graphics)
library(readxl)
library(ggpubr)
library(RColorBrewer)
library(PerformanceAnalytics)
library(tidyverse)
library(hrbrthemes)
library(viridis)
library(gridExtra)
library(ggrepel)
library(plotly)
library(dplyr)2. Mengakses Data Set
data <- read_excel("newdata.xlsx")
data$Provinces <- gsub(", Prop.", "",data$Provinces)
head(data)## # A tibble: 6 x 8
## Provinces Provinces_ID H2O STA POV MORB POP POV_P
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Bali ID.BA 87.6 78.6 4.01 30.3 4292154 172115.
## 2 Banten ID.BT 74.9 81.5 5.24 32.4 12689736 664942.
## 3 Bengkulu ID.BE 51.7 80.3 15.4 27.1 1963300 302937.
## 4 DI Yogyakarta ID.YO 76.6 80.6 12.1 35.3 3802872 461288.
## 5 DKI Jakarta ID.JK 93.4 83.1 3.57 28.8 10467629 373694.
## 6 Gorontalo ID.GO 79.2 52.4 16.8 37.0 1185492 199281.
3. Rincian Variabel pada Dataset
Variabel-variabel yang terdapat pada dataset ini adalah sebagai
berikut : Â
1. MORB : Morbidity Rate (in %) Â
2. STA : Household Access to safe Sanitation (in % of total
Household) Â
3. H2O : Household Access to Safe Water (in % of total
household) Â
4. POV : Poverty Rate (in % of population) Â
5. POP : Total Population (in number of people) Â
6. POV_P: POV \(\times\)Â POP Â
4. Histogram
Akan ditampilkan tingkat penyakit di Indoneisa pada Tahun 2018
a <- ggplot(data = data,aes(x=MORB))+ geom_histogram(colour = "midnight blue", fill="brown")
b <- ggplot(data = data,aes(x=STA))+ geom_histogram(colour = "midnight blue", fill="green")
c <- ggplot(data = data, aes(x=H2O)) + geom_histogram(colour = "midnight blue", fill="black")
d <- ggplot(data = data, aes(x=POV)) + geom_histogram(colour = "midnight blue", fill="orange")
ggarrange(a,b,c,d)## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
5. Boxplot
e <- ggplot(data = data,aes(x=MORB))+ geom_boxplot(colour = "midnight blue", fill="brown")
f <- ggplot(data = data,aes(x=STA))+ geom_boxplot(colour = "midnight blue", fill="green")
g <- ggplot(data = data, aes(x=H2O)) + geom_boxplot(colour = "midnight blue", fill="black")
h <- ggplot(data = data, aes(x=POV)) + geom_boxplot(colour = "midnight blue", fill="orange")
ggarrange(e,f,g,h)6. Density
i <- ggplot(data = data,aes(x=MORB))+ geom_density(colour = "midnight blue", fill="brown")
j <- ggplot(data = data,aes(x=STA))+ geom_density(colour = "midnight blue", fill="green")
k <- ggplot(data = data, aes(x=H2O)) + geom_density(colour = "midnight blue", fill="black")
l <- ggplot(data = data, aes(x=POV)) + geom_density(colour = "midnight blue", fill="orange")
ggarrange(i,j,k,l)7. Pie Chart
z <- order(data$POV_P)
data_mis <- tail(data[z,],5)
p <- brewer.pal(5, "Pastel1")
pie(data_mis$POV_P, labels = c("NTT","Sumatera Utara", 'Jawa Barat', "Jawa Tengah", "Jawa Timur"), border="white", col=p,
main="Top 5 Poverty")8. Bar Chart
data_prop <- data[,-c(2,7,8)]
ggplot(data_prop,aes(x=Provinces,y=POV)) +
geom_bar(stat="identity",fill="Red") +
coord_flip() +
ylab("POV") + xlab("Provinsi") +
geom_text(aes(x = Provinces, y = POV, label = POV))9. Scatter Plot
POV vs H2O and STA :
q <- ggplot(data, aes(x=POV, y=H2O)) + geom_point(shape=18, color="red") + geom_smooth(method=lm, se=FALSE, linetype="dashed",
color="blue")
r <- ggplot(data, aes(x=POV, y=STA)) + geom_point(shape=18, color="purple") + geom_smooth(method=lm, se=FALSE, linetype="dashed",
color="green")
ggarrange(q,r)## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
10. Matrix Correlation
data_cor <- data[,-c(1,2,7,8)]
res <- cor(data_cor)
round(res, 2)## H2O STA POV MORB
## H2O 1.00 0.33 -0.46 0.27
## STA 0.33 1.00 -0.58 0.03
## POV -0.46 -0.58 1.00 -0.10
## MORB 0.27 0.03 -0.10 1.00
library(corrplot)## corrplot 0.92 loaded
corrplot(res, type = "upper", order = "hclust",
tl.col = "black", tl.srt = 45)chart.Correlation(data_cor, histogram=TRUE, pch=19)11. Bubble Plot
data %>%
mutate(POP=POP/100000) %>%
ggplot( aes(x=STA, y=MORB, size = POP, color = Provinces)) +
geom_point(alpha=0.7) +
scale_size(range = c(1.4, 19), name="POP") +
scale_color_viridis(discrete=TRUE, guide=FALSE) +
theme_ipsum() +
theme(legend.position="bottom") +
geom_text_repel(aes(label=Provinces),size = 2)## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
12. Local Regression
MORB1 <- range(data$MORB)
plot(data$MORB ,data$POV ,xlim=MORB1 ,cex =.5, col =" darkgrey ", xlab = "MORB", ylab = "POV")
title (" Local Regression ")
fit=loess(POV~MORB ,span =.25, data=data)
fit2=loess(POV~MORB ,span =.5, data=data)
fit3=loess(POV~MORB, span=.75, data=data)
MORB.grid=seq (from=MORB1[1], to=MORB1[2])
lines(MORB.grid,predict(fit ,data.frame(MORB=MORB.grid)), col ="red",lwd =1)
lines(MORB.grid,predict(fit2 ,data.frame(MORB=MORB.grid)), col ="blue",lwd =1)
lines(MORB.grid,predict(fit3 ,data.frame(MORB=MORB.grid)), col ="green",lwd =1)
legend("topright",legend =c("Span = 0.25" ," Span = 0.5", "Span = 0.75"), col=c("red","blue","green"),lty =1,
lwd =2, cex =.8)