Package yang digunakan:
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggthemes)
library(gap)
## Warning: package 'gap' was built under R version 4.5.2
## Loading required package: gap.datasets
## Warning: package 'gap.datasets' was built under R version 4.5.2
## gap version 1.6
nyc.data <- read_csv("D:/Kuliah/IPB 2025 Semester 3/Analisis Spasial/Praktikum/Bedah Buku Spasial/Bab2. Eksplorasi Data 1/nyc/nyc.csv")
## Rows: 55 Columns: 34
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): NAME, SUBBOROUGH
## dbl (32): bor_subb, CODE, FORHIS06, FORHIS07, FORHIS08, FORHIS09, FORWH06, F...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(nyc.data)
## # A tibble: 6 × 34
## bor_subb NAME CODE SUBBOROUGH FORHIS06 FORHIS07 FORHIS08 FORHIS09 FORWH06
## <dbl> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 501 North S… 501 North Sho… 37.1 34.0 27.4 29.3 13.3
## 2 502 Mid-Isl… 502 Mid-Island 28.0 18.1 24.0 31.2 20.1
## 3 503 South S… 503 South Sho… 10.7 12.1 9.69 14.7 10.3
## 4 401 Astoria 401 Astoria 52.1 54.0 54.7 47.8 38.4
## 5 402 Sunnysi… 402 Sunnyside… 62.7 69.4 67.1 58.3 37.1
## 6 403 Jackson… 403 Jackson H… 68.5 68.5 66.5 69.2 34.4
## # ℹ 25 more variables: FORWH07 <dbl>, FORWH08 <dbl>, FORWH09 <dbl>,
## # HHSIZ1990 <dbl>, HHSIZ00 <dbl>, HHSIZ02 <dbl>, HHSIZ05 <dbl>,
## # HHSIZ08 <dbl>, KIDS2000 <dbl>, KIDS2005 <dbl>, KIDS2006 <dbl>,
## # KIDS2007 <dbl>, KIDS2008 <dbl>, KIDS2009 <dbl>, RENT2002 <dbl>,
## # RENT2005 <dbl>, RENT2008 <dbl>, RENTPCT02 <dbl>, RENTPCT05 <dbl>,
## # RENTPCT08 <dbl>, PUBAST90 <dbl>, PUBAST00 <dbl>, YRHOM02 <dbl>,
## # YRHOM05 <dbl>, YRHOM08 <dbl>
names(nyc.data)
## [1] "bor_subb" "NAME" "CODE" "SUBBOROUGH" "FORHIS06"
## [6] "FORHIS07" "FORHIS08" "FORHIS09" "FORWH06" "FORWH07"
## [11] "FORWH08" "FORWH09" "HHSIZ1990" "HHSIZ00" "HHSIZ02"
## [16] "HHSIZ05" "HHSIZ08" "KIDS2000" "KIDS2005" "KIDS2006"
## [21] "KIDS2007" "KIDS2008" "KIDS2009" "RENT2002" "RENT2005"
## [26] "RENT2008" "RENTPCT02" "RENTPCT05" "RENTPCT08" "PUBAST90"
## [31] "PUBAST00" "YRHOM02" "YRHOM05" "YRHOM08"
nyc.data <- nyc.data %>% rename("kids2009" = "KIDS2009", "kids2000" = "KIDS2000",
"pubast00" = "PUBAST00")
names(nyc.data)
## [1] "bor_subb" "NAME" "CODE" "SUBBOROUGH" "FORHIS06"
## [6] "FORHIS07" "FORHIS08" "FORHIS09" "FORWH06" "FORWH07"
## [11] "FORWH08" "FORWH09" "HHSIZ1990" "HHSIZ00" "HHSIZ02"
## [16] "HHSIZ05" "HHSIZ08" "kids2000" "KIDS2005" "KIDS2006"
## [21] "KIDS2007" "KIDS2008" "kids2009" "RENT2002" "RENT2005"
## [26] "RENT2008" "RENTPCT02" "RENTPCT05" "RENTPCT08" "PUBAST90"
## [31] "pubast00" "YRHOM02" "YRHOM05" "YRHOM08"
Kita mulai dengan perintah histogram sederhana. Seperti di buku kerja GeoDa, kita akan menggunakan variabel kids2009.
ggplot(data=nyc.data,aes(kids2009)) +
geom_histogram(bins=7)#bin=7
ggplot(data=nyc.data,aes(kids2009)) +
geom_histogram(bins=5)#bin=5
ggplot(data=nyc.data,aes(kids2009)) +
geom_histogram(bins=7) +
xlab("Percent kids in 2009") +
ylab("Frequency") +
ggtitle("Example Histogram") +
theme_minimal()#tema bawaan
ggplot(data=nyc.data,aes(kids2009)) +
geom_histogram(bins=7) +
xlab("Percent kids in 2009") +
ylab("Frequency") +
ggtitle("Example Histogram") +
theme_classic()#tema bawaan
ggplot(data=nyc.data,aes(kids2009)) +
geom_histogram(bins=7) +
xlab("Percent kids in 2009") +
ylab("Frequency") +
ggtitle("Example Histogram") +
theme_tufte()
Menetapkan (sebagian) grafik ke suatu objek
baseplt <- ggplot(data=nyc.data,aes(kids2009)) +
geom_histogram(bins=7)
baseplt +
xlab("Percent kids in 2009") +
ylab("Frequency") +
ggtitle("Example Histogram") +
theme(plot.title = element_text(hjust = 0.5))
box.plt <- ggplot(data=nyc.data,aes(x="",y=kids2009)) +
geom_boxplot()
box.plt
box.dta <- layer_data(box.plt)
box.dta
## ymin lower middle upper ymax outliers notchupper notchlower x
## 1 8.6623 26.69425 33.5284 39.6773 48.1308 0 36.2944 30.7624 1
## flipped_aes PANEL group ymin_final ymax_final xmin xmax order xid newx
## 1 FALSE 1 1 0 48.1308 0.625 1.375 1 1 1
## new_width weight colour fill size alpha shape linetype linewidth width
## 1 0.75 1 #333333FF white 1.5 NA 19 1 0.5 0.9
box.desc <- function(box.lyr,mult=1.5) {
# function to computer lower and upper fence in a box plot
# box.lyr: a box plot layer_data object
# mult: the multiplier for the fence calculation, default = 1.5
iqr <- box.lyr$upper - box.lyr$lower # inter-quartile range
upfence <- box.lyr$upper + mult * iqr # upper fence
lofence <- box.lyr$lower - mult * iqr # lower fence
return(c(lofence,upfence))
}
box.desc(box.dta)#fungsi ini untuk memperoleh pagar bawah dan atas.
## [1] 7.219675 59.151875
box.plt3 <- ggplot(data=nyc.data,aes(x="",y=kids2009)) +
geom_boxplot(coef=3)
box.plt3
box.dta3 <- layer_data(box.plt3)
box.dta3
## ymin lower middle upper ymax outliers notchupper notchlower x
## 1 0 26.69425 33.5284 39.6773 48.1308 36.2944 30.7624 1
## flipped_aes PANEL group ymin_final ymax_final xmin xmax order xid newx
## 1 FALSE 1 1 0 48.1308 0.625 1.375 1 1 1
## new_width weight colour fill size alpha shape linetype linewidth width
## 1 0.75 1 #333333FF white 1.5 NA 19 1 0.5 0.9
box.desc(box.dta3,mult=3.0)
## [1] -12.25490 78.62645
Karena pagar bawah negatif, nilai 0 tidak lagi menjadi outlier.
base.plt <- ggplot(data=nyc.data,aes(x="",y=kids2009))
base.plt +
geom_point(color="blue",alpha=0.5) +
geom_boxplot(color="black",fill="purple",outlier.color="red") +
stat_boxplot(geom="errorbar") + #garis eror
xlab("") +
ggtitle("Example Box Plot") +
theme(plot.title = element_text(hjust=0.5))
ggplot(data=nyc.data,aes(x=kids2000,y=pubast00)) +
geom_point()
ggplot(data=nyc.data,aes(x=kids2000,y=pubast00)) +
geom_point() +
geom_smooth(method=lm, color="blue") +
ggtitle("Linear Smoother") +
theme(plot.title = element_text(hjust=0.5))
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data=nyc.data,aes(x=kids2000,y=pubast00)) +
geom_point() +
geom_smooth(method=loess, color="blue", se=FALSE) +
ggtitle("Loess Smoother") +
theme(plot.title = element_text(hjust=0.5))
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data=nyc.data,aes(x=kids2000,y=pubast00)) +
stat_smooth(method="loess",span=0.4,color="blue",se=FALSE) +
geom_point() +
ggtitle("Loess Smoother - Span=0.4") +
theme(plot.title = element_text(hjust=0.5))
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data=nyc.data,aes(x=kids2000,y=pubast00)) +
geom_smooth(method="loess",span=0.2,color="blue",se=FALSE) +
geom_point() +
ggtitle("Loess Smoother - Span=0.2") +
theme(plot.title = element_text(hjust=0.5))
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data=nyc.data,aes(x=kids2000,y=pubast00)) +
geom_smooth(color="blue",se = FALSE) +
geom_point() +
ggtitle("LOWESS Smoother") +
theme(plot.title = element_text(hjust=0.5))
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
ggplot(data = nyc.data, aes(x = kids2000, y = pubast00)) +
geom_smooth(method = "loess", span = 0.4, color = "blue", se = FALSE) +
geom_point() +
ggtitle("LOWESS Smoother - Span = 0.4") +
theme(plot.title = element_text(hjust = 0.5))
## `geom_smooth()` using formula = 'y ~ x'
ggplot(data = nyc.data, aes(x = kids2000, y = pubast00)) +
# Lowess smoothing (loess)
geom_smooth(aes(color = "Lowess"), method = "loess", se = FALSE) +
# Linear regression
geom_smooth(aes(color = "Linear"), method = "lm", se = FALSE) +
# Loess kedua (opsional, tapi akan sama seperti pertama)
geom_smooth(aes(color = "Loess"), method = "loess", se = FALSE, linetype = "dashed") +
# Scatter points
geom_point() +
ggtitle("Comparison of Smoothing Methods") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(color = "Method")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
nyc.select <- nyc.data %>% filter(manbronx == "Select")
nyc.rest <- nyc.data %>% filter(manbronx == "Rest")
dim(nyc.select)
## [1] 20 35
dim(nyc.rest)
## [1] 35 35
reg.select <- lm(pubast00 ~ kids2000,data=nyc.select)
reg.rest <- lm(pubast00 ~ kids2000,data=nyc.rest)
summary(reg.select)
##
## Call:
## lm(formula = pubast00 ~ kids2000, data = nyc.select)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.0486 -1.4829 0.3248 2.0625 4.7156
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.74763 1.84198 -2.577 0.019 *
## kids2000 0.47225 0.05071 9.313 2.64e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.406 on 18 degrees of freedom
## Multiple R-squared: 0.8281, Adjusted R-squared: 0.8186
## F-statistic: 86.73 on 1 and 18 DF, p-value: 2.639e-08
summary(reg.rest)
##
## Call:
## lm(formula = pubast00 ~ kids2000, data = nyc.rest)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.4415 -2.8231 -0.3905 1.9686 8.2359
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.01398 3.33123 -2.106 0.042939 *
## kids2000 0.37260 0.08648 4.308 0.000139 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.957 on 33 degrees of freedom
## Multiple R-squared: 0.36, Adjusted R-squared: 0.3406
## F-statistic: 18.56 on 1 and 33 DF, p-value: 0.0001391
chow <- chow.test(nyc.select$pubast00,nyc.select$kids2000,
nyc.rest$pubast00,nyc.rest$kids2000)
chow
## F value d.f.1 d.f.2 P value
## 1.534013e+01 2.000000e+00 5.100000e+01 6.082099e-06
Reference:
https://spatialanalysis.github.io/handsonspatialdata/exploratory-data-analysis-1.html