Tugas Praktikum

1. Library

library(heatmaply) #Untuk plot heatmap Missing Data
library(visdat) #Untuk plot Missing Data
library(reshape2) #Modifikasi DataFrame
library(tidyr) #Modifikasi DataFrame
library(ggplot2) #Plot
library(psych) #Pair Plot
library(DataExplorer) #Corelation Plot
library(graphics)
library(readxl)
library(ggpubr)
library(RColorBrewer)
library(PerformanceAnalytics)
library(tidyverse)
library(hrbrthemes)
library(viridis)
library(gridExtra)
library(ggrepel)
library(plotly)
library(dplyr)

2. Mengakses Data Set

data <- read_excel("newdata.xlsx")
data$Provinces <- gsub(", Prop.", "",data$Provinces)
head(data)
## # A tibble: 6 x 8
##   Provinces     Provinces_ID   H2O   STA   POV  MORB      POP   POV_P
##   <chr>         <chr>        <dbl> <dbl> <dbl> <dbl>    <dbl>   <dbl>
## 1 Bali          ID.BA         87.6  78.6  4.01  30.3  4292154 172115.
## 2 Banten        ID.BT         74.9  81.5  5.24  32.4 12689736 664942.
## 3 Bengkulu      ID.BE         51.7  80.3 15.4   27.1  1963300 302937.
## 4 DI Yogyakarta ID.YO         76.6  80.6 12.1   35.3  3802872 461288.
## 5 DKI Jakarta   ID.JK         93.4  83.1  3.57  28.8 10467629 373694.
## 6 Gorontalo     ID.GO         79.2  52.4 16.8   37.0  1185492 199281.

3. Rincian Variabel pada Dataset

Variabel-variabel yang terdapat pada dataset ini adalah sebagai berikut :  
1. MORB : Morbidity Rate (in %)  
2. STA : Household Access to safe Sanitation (in % of total Household)  
3. H2O : Household Access to Safe Water (in % of total household)  
4. POV : Poverty Rate (in % of population)  
5. POP : Total Population (in number of people)  
6. POV_P: POV \(\times\) POP  

4. Histogram

Akan ditampilkan tingkat penyakit di Indoneisa pada Tahun 2018

a <- ggplot(data = data,aes(x=MORB))+ geom_histogram(colour = "midnight blue", fill="brown")
b <- ggplot(data = data,aes(x=STA))+ geom_histogram(colour = "midnight blue", fill="green")
c <- ggplot(data = data, aes(x=H2O)) + geom_histogram(colour = "midnight blue", fill="black")
d <- ggplot(data = data, aes(x=POV)) + geom_histogram(colour = "midnight blue", fill="orange")
ggarrange(a,b,c,d)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

5. Boxplot

e <- ggplot(data = data,aes(x=MORB))+ geom_boxplot(colour = "midnight blue", fill="brown")
f <- ggplot(data = data,aes(x=STA))+ geom_boxplot(colour = "midnight blue", fill="green")
g <- ggplot(data = data, aes(x=H2O)) + geom_boxplot(colour = "midnight blue", fill="black")
h <- ggplot(data = data, aes(x=POV)) + geom_boxplot(colour = "midnight blue", fill="orange")
ggarrange(e,f,g,h)

6. Density

i <- ggplot(data = data,aes(x=MORB))+  geom_density(colour = "midnight blue", fill="brown")
j <- ggplot(data = data,aes(x=STA))+ geom_density(colour = "midnight blue", fill="green")
k <- ggplot(data = data, aes(x=H2O)) + geom_density(colour = "midnight blue", fill="black")
l <- ggplot(data = data, aes(x=POV)) + geom_density(colour = "midnight blue", fill="orange")
ggarrange(i,j,k,l)

7. Pie Chart

z <- order(data$POV_P)
data_mis <- tail(data[z,],5)
p <- brewer.pal(5, "Pastel1") 
pie(data_mis$POV_P, labels = c("NTT","Sumatera Utara", 'Jawa Barat', "Jawa Tengah", "Jawa Timur"), border="white", col=p, 
    main="Top 5 Poverty")

8. Bar Chart

data_prop <- data[,-c(2,7,8)]
ggplot(data_prop,aes(x=Provinces,y=POV)) + 
geom_bar(stat="identity",fill="Red") +
coord_flip() +
ylab("POV") + xlab("Provinsi") + 
geom_text(aes(x = Provinces,  y = POV, label = POV))

9. Scatter Plot

POV vs H2O and STA :

q <- ggplot(data, aes(x=POV, y=H2O)) + geom_point(shape=18, color="red") + geom_smooth(method=lm, se=FALSE, linetype="dashed",
             color="blue")
r <- ggplot(data, aes(x=POV, y=STA)) + geom_point(shape=18, color="purple") + geom_smooth(method=lm, se=FALSE, linetype="dashed",
             color="green")
ggarrange(q,r)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

10. Matrix Correlation

data_cor <- data[,-c(1,2,7,8)]
res <- cor(data_cor)
round(res, 2)
##        H2O   STA   POV  MORB
## H2O   1.00  0.33 -0.46  0.27
## STA   0.33  1.00 -0.58  0.03
## POV  -0.46 -0.58  1.00 -0.10
## MORB  0.27  0.03 -0.10  1.00
library(corrplot)
## corrplot 0.92 loaded
corrplot(res, type = "upper", order = "hclust", 
         tl.col = "black", tl.srt = 45)

chart.Correlation(data_cor, histogram=TRUE, pch=19)

11. Bubble Plot

data %>%
  mutate(POP=POP/100000) %>%
  ggplot( aes(x=STA, y=MORB, size = POP, color = Provinces)) +
    geom_point(alpha=0.7) +
    scale_size(range = c(1.4, 19), name="POP") +
    scale_color_viridis(discrete=TRUE, guide=FALSE) +
    theme_ipsum() +
    theme(legend.position="bottom") +
    geom_text_repel(aes(label=Provinces),size = 2)
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

12. Local Regression

MORB1 <- range(data$MORB)
plot(data$MORB ,data$POV ,xlim=MORB1 ,cex =.5, col =" darkgrey ", xlab = "MORB", ylab = "POV")
title (" Local Regression ")
fit=loess(POV~MORB ,span =.25, data=data)
fit2=loess(POV~MORB ,span =.5, data=data)
fit3=loess(POV~MORB, span=.75, data=data)
MORB.grid=seq (from=MORB1[1], to=MORB1[2])
lines(MORB.grid,predict(fit ,data.frame(MORB=MORB.grid)), col ="red",lwd =1)
lines(MORB.grid,predict(fit2 ,data.frame(MORB=MORB.grid)), col ="blue",lwd =1)
lines(MORB.grid,predict(fit3 ,data.frame(MORB=MORB.grid)), col ="green",lwd =1)
legend("topright",legend =c("Span = 0.25" ," Span = 0.5", "Span = 0.75"), col=c("red","blue","green"),lty =1, 
       lwd =2, cex =.8)