Bab 3 Analisis Data Eksploratori 2

1. Package

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(GGally)
library(scatterplot3d)
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

2. Unduh Data

nyc.data <- read.csv("D:/Kuliah/IPB 2025 Semester 3/Analisis Spasial/Praktikum/Bedah Buku Spasial/Bab2. Eksplorasi Data 1/nyc/nyc.csv")
head(nyc.data)
##   bor_subb                 NAME CODE         SUBBOROUGH FORHIS06 FORHIS07
## 1      501          North Shore  501        North Shore  37.0657  34.0317
## 2      502           Mid-Island  502         Mid-Island  27.9822  18.1193
## 3      503          South Shore  503        South Shore  10.7019  12.1404
## 4      401              Astoria  401            Astoria  52.0961  53.9585
## 5      402 Sunnyside / Woodside  402 Sunnyside/Woodside  62.7242  69.3969
## 6      403      Jackson Heights  403    Jackson Heights  68.4834  68.5405
##   FORHIS08 FORHIS09 FORWH06 FORWH07 FORWH08 FORWH09 HHSIZ1990 HHSIZ00 HHSIZ02
## 1  27.3767  29.3091 13.2540 11.8768 11.1788 11.1459    2.7146  2.7338  2.7412
## 2  24.0452  31.1566 20.0616 19.8575 22.4870 17.0371    2.8233  2.7176  2.5405
## 3   9.6890  14.6638 10.3060 12.7699  9.3561 10.2830    3.0547  2.8497  2.6525
## 4  54.6968  47.8050 38.3658 35.6551 32.1289 34.6578    2.4279  2.4995  2.3032
## 5  67.0897  58.2963 37.0512 31.9057 32.3264 33.8794    2.4646  2.6287  2.5300
## 6  66.5080  69.1580 34.3999 38.2428 38.1470 26.5347    2.8081  3.1650  2.9108
##   HHSIZ05 HHSIZ08 KIDS2000 KIDS2005 KIDS2006 KIDS2007 KIDS2008 KIDS2009
## 1  2.8010  2.6983  39.2995  43.3788  38.4022  41.5213  39.8390  40.3169
## 2  2.6228  2.5749  36.2234  35.7630  36.9081  37.6798  37.2447  37.8176
## 3  2.6121  2.6483  39.7362  42.5232  40.3577  40.3797  40.4820  35.3880
## 4  2.3227  2.2746  28.4592  27.2223  25.2556  24.8911  22.0364  17.9996
## 5  2.4993  2.4766  29.8808  28.6841  28.1440  26.3675  29.9032  26.2156
## 6  2.8599  2.8604  41.6335  40.2431  39.3135  37.4414  39.4144  39.0377
##   RENT2002 RENT2005 RENT2008 RENTPCT02 RENTPCT05 RENTPCT08 PUBAST90 PUBAST00
## 1      800      900     1000   21.1119   24.8073   28.5344 47.32913 6.005791
## 2      650      800      950   32.3615   27.2584   27.9567 35.18232 2.287034
## 3      750      775      800   23.0547   20.4146   18.1590 23.89404 1.350208
## 4     1000     1100     1400   25.6022   26.7685   28.0467 80.53393 5.204510
## 5     1000     1000     1400   18.8079   22.6752   21.3009 75.51687 2.974139
## 6      910     1000     1100   34.0156   34.8050   27.1032 66.64228 5.332569
##    YRHOM02  YRHOM05  YRHOM08
## 1 10.80507 12.12785 11.54743
## 2 15.24125 15.18311 14.68212
## 3 12.70425 12.97228 13.56149
## 4 12.83917 13.37751 12.54464
## 5 15.38766 12.51879 12.66691
## 6 12.64923 12.58035 11.96598
names(nyc.data)
##  [1] "bor_subb"   "NAME"       "CODE"       "SUBBOROUGH" "FORHIS06"  
##  [6] "FORHIS07"   "FORHIS08"   "FORHIS09"   "FORWH06"    "FORWH07"   
## [11] "FORWH08"    "FORWH09"    "HHSIZ1990"  "HHSIZ00"    "HHSIZ02"   
## [16] "HHSIZ05"    "HHSIZ08"    "KIDS2000"   "KIDS2005"   "KIDS2006"  
## [21] "KIDS2007"   "KIDS2008"   "KIDS2009"   "RENT2002"   "RENT2005"  
## [26] "RENT2008"   "RENTPCT02"  "RENTPCT05"  "RENTPCT08"  "PUBAST90"  
## [31] "PUBAST00"   "YRHOM02"    "YRHOM05"    "YRHOM08"

3. Membuat matriks scatterplot

1. Matriks sebaran dasar

ggscatmat(nyc.data, columns= c("HHSIZ00","KIDS2000", "YRHOM02", "PUBAST00"))

2. Matriks sebaran default

ggpairs(nyc.data, columns=c("HHSIZ00","KIDS2000", "YRHOM02", "PUBAST00"))

3. Plot sebaran berpasangan

ggpairs(nyc.data, columns=c("HHSIZ00","KIDS2000", "YRHOM02", "PUBAST00"),
        upper=list(continuous="points"),diag=list(continuous="barDiag"))
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

  1. Matriks sebar dengan penghalus linier
ggpairs(nyc.data, columns=c("HHSIZ00","KIDS2000", "YRHOM02", "PUBAST00"),
        upper=list(continuous="smooth"),lower=list(continuous="smooth"))

4. Matriks sebaran dengan penghalus loess

ggpairs(nyc.data, columns=c("HHSIZ00","KIDS2000", "YRHOM02", "PUBAST00"),
        upper=list(continuous="smooth_loess"),lower=list(continuous="smooth_loess"))

4. Membuat plot gelembung (Bubble chart)

ggplot(data=nyc.data,aes(x=KIDS2000,y=PUBAST00,size=RENT2002,col=RENT2002)) +
  geom_point() +
  xlab("Percent HH with kids") +
  ylab("Percent HH with public assistance") +
  ggtitle("Bubble Chart") +
  theme(plot.title = element_text(hjust = 0.5))

5. Membuat diagram sebaran 3d

scatterplot3d(x = nyc.data$KIDS2000, y = nyc.data$PUBAST00, z = nyc.data$RENT2002,
              main = "NYC 3D Scatterplot",
              xlab = "Percent HH with kids",
              ylab = "Percent HH with public assistance",
              zlab = "Median rent",
              pch = 20,
              color = "red")

6. Membuat plot koordinat paralel

plot_ly(nyc.data, x = ~KIDS2000, y = ~PUBAST00, z = ~RENT2002)%>%
  add_markers(marker = list(color="red")) %>%
  layout(scene = list(xaxis = list(title = "Percent HH with kids"),
                     yaxis = list(title = "Percent HH with public assistance"),
                     zaxis = list(title = "Median rent")))

7. Membangun plot bersyarat

1. Conditional Plots

nyc.data$cut.hhsiz <- cut_number(nyc.data$HHSIZ00,n=2)
nyc.data$cut.hhsiz
##  [1] (2.72,3.2]  [1.57,2.72] (2.72,3.2]  [1.57,2.72] [1.57,2.72] (2.72,3.2] 
##  [7] (2.72,3.2]  [1.57,2.72] [1.57,2.72] [1.57,2.72] [1.57,2.72] (2.72,3.2] 
## [13] (2.72,3.2]  [1.57,2.72] (2.72,3.2]  (2.72,3.2]  [1.57,2.72] [1.57,2.72]
## [19] [1.57,2.72] [1.57,2.72] [1.57,2.72] [1.57,2.72] [1.57,2.72] [1.57,2.72]
## [25] [1.57,2.72] [1.57,2.72] (2.72,3.2]  (2.72,3.2]  (2.72,3.2]  (2.72,3.2] 
## [31] (2.72,3.2]  (2.72,3.2]  [1.57,2.72] (2.72,3.2]  [1.57,2.72] [1.57,2.72]
## [37] (2.72,3.2]  (2.72,3.2]  (2.72,3.2]  (2.72,3.2]  [1.57,2.72] [1.57,2.72]
## [43] [1.57,2.72] (2.72,3.2]  (2.72,3.2]  (2.72,3.2]  (2.72,3.2]  (2.72,3.2] 
## [49] (2.72,3.2]  [1.57,2.72] (2.72,3.2]  (2.72,3.2]  [1.57,2.72] [1.57,2.72]
## [55] [1.57,2.72]
## Levels: [1.57,2.72] (2.72,3.2]
nyc.data$cut.yrhom <- cut_number(nyc.data$YRHOM02,n=2)
nyc.data$cut.yrhom
##  [1] [8.22,12.4] (12.4,16.1] (12.4,16.1] (12.4,16.1] (12.4,16.1] (12.4,16.1]
##  [7] [8.22,12.4] (12.4,16.1] (12.4,16.1] (12.4,16.1] (12.4,16.1] [8.22,12.4]
## [13] (12.4,16.1] (12.4,16.1] (12.4,16.1] (12.4,16.1] (12.4,16.1] [8.22,12.4]
## [19] (12.4,16.1] [8.22,12.4] [8.22,12.4] [8.22,12.4] [8.22,12.4] (12.4,16.1]
## [25] (12.4,16.1] (12.4,16.1] (12.4,16.1] [8.22,12.4] [8.22,12.4] [8.22,12.4]
## [31] [8.22,12.4] [8.22,12.4] (12.4,16.1] [8.22,12.4] (12.4,16.1] [8.22,12.4]
## [37] (12.4,16.1] (12.4,16.1] [8.22,12.4] [8.22,12.4] [8.22,12.4] [8.22,12.4]
## [43] [8.22,12.4] [8.22,12.4] [8.22,12.4] [8.22,12.4] [8.22,12.4] [8.22,12.4]
## [49] [8.22,12.4] (12.4,16.1] (12.4,16.1] [8.22,12.4] (12.4,16.1] (12.4,16.1]
## [55] [8.22,12.4]
## Levels: [8.22,12.4] (12.4,16.1]
ggplot(data=nyc.data,aes(x=KIDS2000,y=PUBAST00)) +
  geom_point() +
  facet_grid(cut.yrhom ~ cut.hhsiz,as.table=FALSE)

ggplot(data=nyc.data,aes(x=KIDS2000,y=PUBAST00)) +
  geom_point() +
  geom_smooth(method="lm") +
  facet_grid(cut.yrhom ~ cut.hhsiz,as.table=FALSE)
## `geom_smooth()` using formula = 'y ~ x'

2. Conditional histogram

ggplot(data=nyc.data,aes(PUBAST00)) +
  geom_histogram(bins=7) +
  facet_grid(cut.yrhom ~ cut.hhsiz,as.table=FALSE)

Reference: https://spatialanalysis.github.io/handsonspatialdata/exploratory-data-analysis-2.html#fnref12