library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(GGally)
library(scatterplot3d)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
nyc.data <- read.csv("D:/Kuliah/IPB 2025 Semester 3/Analisis Spasial/Praktikum/Bedah Buku Spasial/Bab2. Eksplorasi Data 1/nyc/nyc.csv")
head(nyc.data)
## bor_subb NAME CODE SUBBOROUGH FORHIS06 FORHIS07
## 1 501 North Shore 501 North Shore 37.0657 34.0317
## 2 502 Mid-Island 502 Mid-Island 27.9822 18.1193
## 3 503 South Shore 503 South Shore 10.7019 12.1404
## 4 401 Astoria 401 Astoria 52.0961 53.9585
## 5 402 Sunnyside / Woodside 402 Sunnyside/Woodside 62.7242 69.3969
## 6 403 Jackson Heights 403 Jackson Heights 68.4834 68.5405
## FORHIS08 FORHIS09 FORWH06 FORWH07 FORWH08 FORWH09 HHSIZ1990 HHSIZ00 HHSIZ02
## 1 27.3767 29.3091 13.2540 11.8768 11.1788 11.1459 2.7146 2.7338 2.7412
## 2 24.0452 31.1566 20.0616 19.8575 22.4870 17.0371 2.8233 2.7176 2.5405
## 3 9.6890 14.6638 10.3060 12.7699 9.3561 10.2830 3.0547 2.8497 2.6525
## 4 54.6968 47.8050 38.3658 35.6551 32.1289 34.6578 2.4279 2.4995 2.3032
## 5 67.0897 58.2963 37.0512 31.9057 32.3264 33.8794 2.4646 2.6287 2.5300
## 6 66.5080 69.1580 34.3999 38.2428 38.1470 26.5347 2.8081 3.1650 2.9108
## HHSIZ05 HHSIZ08 KIDS2000 KIDS2005 KIDS2006 KIDS2007 KIDS2008 KIDS2009
## 1 2.8010 2.6983 39.2995 43.3788 38.4022 41.5213 39.8390 40.3169
## 2 2.6228 2.5749 36.2234 35.7630 36.9081 37.6798 37.2447 37.8176
## 3 2.6121 2.6483 39.7362 42.5232 40.3577 40.3797 40.4820 35.3880
## 4 2.3227 2.2746 28.4592 27.2223 25.2556 24.8911 22.0364 17.9996
## 5 2.4993 2.4766 29.8808 28.6841 28.1440 26.3675 29.9032 26.2156
## 6 2.8599 2.8604 41.6335 40.2431 39.3135 37.4414 39.4144 39.0377
## RENT2002 RENT2005 RENT2008 RENTPCT02 RENTPCT05 RENTPCT08 PUBAST90 PUBAST00
## 1 800 900 1000 21.1119 24.8073 28.5344 47.32913 6.005791
## 2 650 800 950 32.3615 27.2584 27.9567 35.18232 2.287034
## 3 750 775 800 23.0547 20.4146 18.1590 23.89404 1.350208
## 4 1000 1100 1400 25.6022 26.7685 28.0467 80.53393 5.204510
## 5 1000 1000 1400 18.8079 22.6752 21.3009 75.51687 2.974139
## 6 910 1000 1100 34.0156 34.8050 27.1032 66.64228 5.332569
## YRHOM02 YRHOM05 YRHOM08
## 1 10.80507 12.12785 11.54743
## 2 15.24125 15.18311 14.68212
## 3 12.70425 12.97228 13.56149
## 4 12.83917 13.37751 12.54464
## 5 15.38766 12.51879 12.66691
## 6 12.64923 12.58035 11.96598
names(nyc.data)
## [1] "bor_subb" "NAME" "CODE" "SUBBOROUGH" "FORHIS06"
## [6] "FORHIS07" "FORHIS08" "FORHIS09" "FORWH06" "FORWH07"
## [11] "FORWH08" "FORWH09" "HHSIZ1990" "HHSIZ00" "HHSIZ02"
## [16] "HHSIZ05" "HHSIZ08" "KIDS2000" "KIDS2005" "KIDS2006"
## [21] "KIDS2007" "KIDS2008" "KIDS2009" "RENT2002" "RENT2005"
## [26] "RENT2008" "RENTPCT02" "RENTPCT05" "RENTPCT08" "PUBAST90"
## [31] "PUBAST00" "YRHOM02" "YRHOM05" "YRHOM08"
ggscatmat(nyc.data, columns= c("HHSIZ00","KIDS2000", "YRHOM02", "PUBAST00"))
ggpairs(nyc.data, columns=c("HHSIZ00","KIDS2000", "YRHOM02", "PUBAST00"))
ggpairs(nyc.data, columns=c("HHSIZ00","KIDS2000", "YRHOM02", "PUBAST00"),
upper=list(continuous="points"),diag=list(continuous="barDiag"))
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
ggpairs(nyc.data, columns=c("HHSIZ00","KIDS2000", "YRHOM02", "PUBAST00"),
upper=list(continuous="smooth"),lower=list(continuous="smooth"))
ggpairs(nyc.data, columns=c("HHSIZ00","KIDS2000", "YRHOM02", "PUBAST00"),
upper=list(continuous="smooth_loess"),lower=list(continuous="smooth_loess"))
ggplot(data=nyc.data,aes(x=KIDS2000,y=PUBAST00,size=RENT2002,col=RENT2002)) +
geom_point() +
xlab("Percent HH with kids") +
ylab("Percent HH with public assistance") +
ggtitle("Bubble Chart") +
theme(plot.title = element_text(hjust = 0.5))
scatterplot3d(x = nyc.data$KIDS2000, y = nyc.data$PUBAST00, z = nyc.data$RENT2002,
main = "NYC 3D Scatterplot",
xlab = "Percent HH with kids",
ylab = "Percent HH with public assistance",
zlab = "Median rent",
pch = 20,
color = "red")
plot_ly(nyc.data, x = ~KIDS2000, y = ~PUBAST00, z = ~RENT2002)%>%
add_markers(marker = list(color="red")) %>%
layout(scene = list(xaxis = list(title = "Percent HH with kids"),
yaxis = list(title = "Percent HH with public assistance"),
zaxis = list(title = "Median rent")))
nyc.data$cut.hhsiz <- cut_number(nyc.data$HHSIZ00,n=2)
nyc.data$cut.hhsiz
## [1] (2.72,3.2] [1.57,2.72] (2.72,3.2] [1.57,2.72] [1.57,2.72] (2.72,3.2]
## [7] (2.72,3.2] [1.57,2.72] [1.57,2.72] [1.57,2.72] [1.57,2.72] (2.72,3.2]
## [13] (2.72,3.2] [1.57,2.72] (2.72,3.2] (2.72,3.2] [1.57,2.72] [1.57,2.72]
## [19] [1.57,2.72] [1.57,2.72] [1.57,2.72] [1.57,2.72] [1.57,2.72] [1.57,2.72]
## [25] [1.57,2.72] [1.57,2.72] (2.72,3.2] (2.72,3.2] (2.72,3.2] (2.72,3.2]
## [31] (2.72,3.2] (2.72,3.2] [1.57,2.72] (2.72,3.2] [1.57,2.72] [1.57,2.72]
## [37] (2.72,3.2] (2.72,3.2] (2.72,3.2] (2.72,3.2] [1.57,2.72] [1.57,2.72]
## [43] [1.57,2.72] (2.72,3.2] (2.72,3.2] (2.72,3.2] (2.72,3.2] (2.72,3.2]
## [49] (2.72,3.2] [1.57,2.72] (2.72,3.2] (2.72,3.2] [1.57,2.72] [1.57,2.72]
## [55] [1.57,2.72]
## Levels: [1.57,2.72] (2.72,3.2]
nyc.data$cut.yrhom <- cut_number(nyc.data$YRHOM02,n=2)
nyc.data$cut.yrhom
## [1] [8.22,12.4] (12.4,16.1] (12.4,16.1] (12.4,16.1] (12.4,16.1] (12.4,16.1]
## [7] [8.22,12.4] (12.4,16.1] (12.4,16.1] (12.4,16.1] (12.4,16.1] [8.22,12.4]
## [13] (12.4,16.1] (12.4,16.1] (12.4,16.1] (12.4,16.1] (12.4,16.1] [8.22,12.4]
## [19] (12.4,16.1] [8.22,12.4] [8.22,12.4] [8.22,12.4] [8.22,12.4] (12.4,16.1]
## [25] (12.4,16.1] (12.4,16.1] (12.4,16.1] [8.22,12.4] [8.22,12.4] [8.22,12.4]
## [31] [8.22,12.4] [8.22,12.4] (12.4,16.1] [8.22,12.4] (12.4,16.1] [8.22,12.4]
## [37] (12.4,16.1] (12.4,16.1] [8.22,12.4] [8.22,12.4] [8.22,12.4] [8.22,12.4]
## [43] [8.22,12.4] [8.22,12.4] [8.22,12.4] [8.22,12.4] [8.22,12.4] [8.22,12.4]
## [49] [8.22,12.4] (12.4,16.1] (12.4,16.1] [8.22,12.4] (12.4,16.1] (12.4,16.1]
## [55] [8.22,12.4]
## Levels: [8.22,12.4] (12.4,16.1]
ggplot(data=nyc.data,aes(x=KIDS2000,y=PUBAST00)) +
geom_point() +
facet_grid(cut.yrhom ~ cut.hhsiz,as.table=FALSE)
ggplot(data=nyc.data,aes(x=KIDS2000,y=PUBAST00)) +
geom_point() +
geom_smooth(method="lm") +
facet_grid(cut.yrhom ~ cut.hhsiz,as.table=FALSE)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data=nyc.data,aes(PUBAST00)) +
geom_histogram(bins=7) +
facet_grid(cut.yrhom ~ cut.hhsiz,as.table=FALSE)
Reference: https://spatialanalysis.github.io/handsonspatialdata/exploratory-data-analysis-2.html#fnref12