packages = c('olsrr', 'corrplot', 'ggpubr', 'sf', 'spdep', 'GWmodel', 'tmap', 'tidyverse','geobr','ggplot2','dplyr','rio','data.table')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}
## Loading required package: olsrr
##
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
##
## rivers
## Loading required package: corrplot
## corrplot 0.84 loaded
## Loading required package: ggpubr
## Loading required package: ggplot2
## Loading required package: sf
## Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
## Loading required package: spdep
## Loading required package: sp
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: GWmodel
## Loading required package: maptools
## Checking rgeos availability: TRUE
## Loading required package: robustbase
## Loading required package: Rcpp
## Loading required package: spatialreg
## Loading required package: Matrix
## Registered S3 methods overwritten by 'spatialreg':
## method from
## residuals.stsls spdep
## deviance.stsls spdep
## coef.stsls spdep
## print.stsls spdep
## summary.stsls spdep
## print.summary.stsls spdep
## residuals.gmsar spdep
## deviance.gmsar spdep
## coef.gmsar spdep
## fitted.gmsar spdep
## print.gmsar spdep
## summary.gmsar spdep
## print.summary.gmsar spdep
## print.lagmess spdep
## summary.lagmess spdep
## print.summary.lagmess spdep
## residuals.lagmess spdep
## deviance.lagmess spdep
## coef.lagmess spdep
## fitted.lagmess spdep
## logLik.lagmess spdep
## fitted.SFResult spdep
## print.SFResult spdep
## fitted.ME_res spdep
## print.ME_res spdep
## print.lagImpact spdep
## plot.lagImpact spdep
## summary.lagImpact spdep
## HPDinterval.lagImpact spdep
## print.summary.lagImpact spdep
## print.sarlm spdep
## summary.sarlm spdep
## residuals.sarlm spdep
## deviance.sarlm spdep
## coef.sarlm spdep
## vcov.sarlm spdep
## fitted.sarlm spdep
## logLik.sarlm spdep
## anova.sarlm spdep
## predict.sarlm spdep
## print.summary.sarlm spdep
## print.sarlm.pred spdep
## as.data.frame.sarlm.pred spdep
## residuals.spautolm spdep
## deviance.spautolm spdep
## coef.spautolm spdep
## fitted.spautolm spdep
## print.spautolm spdep
## summary.spautolm spdep
## logLik.spautolm spdep
## print.summary.spautolm spdep
## print.WXImpact spdep
## summary.WXImpact spdep
## print.summary.WXImpact spdep
## predict.SLX spdep
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## anova.sarlm, as_dgRMatrix_listw, as_dsCMatrix_I, as_dsCMatrix_IrW,
## as_dsTMatrix_listw, as.spam.listw, bptest.sarlm, can.be.simmed,
## cheb_setup, coef.gmsar, coef.sarlm, coef.spautolm, coef.stsls,
## create_WX, deviance.gmsar, deviance.sarlm, deviance.spautolm,
## deviance.stsls, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
## errorsarlm, fitted.gmsar, fitted.ME_res, fitted.sarlm,
## fitted.SFResult, fitted.spautolm, get.ClusterOption,
## get.coresOption, get.mcOption, get.VerboseOption,
## get.ZeroPolicyOption, GMargminImage, GMerrorsar, griffith_sone,
## gstsls, Hausman.test, HPDinterval.lagImpact, impacts, intImpacts,
## Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
## lextrS, lextrW, lmSLX, logLik.sarlm, logLik.spautolm, LR.sarlm,
## LR1.sarlm, LR1.spautolm, LU_prepermutate_setup, LU_setup,
## Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
## mom_calc_int2, moments_setup, powerWeights, predict.sarlm,
## predict.SLX, print.gmsar, print.ME_res, print.sarlm,
## print.sarlm.pred, print.SFResult, print.spautolm, print.stsls,
## print.summary.gmsar, print.summary.sarlm, print.summary.spautolm,
## print.summary.stsls, residuals.gmsar, residuals.sarlm,
## residuals.spautolm, residuals.stsls, sacsarlm, SE_classic_setup,
## SE_interp_setup, SE_whichMin_setup, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption, similar.listw, spam_setup, spam_update_setup,
## SpatialFiltering, spautolm, spBreg_err, spBreg_lag, spBreg_sac,
## stsls, subgraph_eigenw, summary.gmsar, summary.sarlm,
## summary.spautolm, summary.stsls, trW, vcov.sarlm, Wald1.sarlm
## Welcome to GWmodel version 2.1-4.
## The new version of GWmodel 2.1-4 now is readyLoading required package: tmap
## Loading required package: tidyverse
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble 2.1.3 ✓ dplyr 0.8.4
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ✓ purrr 0.3.3
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x tidyr::pack() masks Matrix::pack()
## x tidyr::unpack() masks Matrix::unpack()
## Loading required package: geobr
## Loading required package: rio
## Loading required package: data.table
##
## Attaching package: 'data.table'
##
## The following objects are masked from 'package:dplyr':
##
## between, first, last
##
## The following object is masked from 'package:purrr':
##
## transpose
install_formats()
## [1] TRUE
muni_sf <- st_read(dsn = "data/geospatial", layer = "muni_sf")
## Reading layer `muni_sf' from data source `/Users/daniel/Google Drive (daniel.soh.2017@smu.edu.sg)/SMU | GD/1. Geospatial Analytics and Applications/Take Home Ex/Ex04/data/geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 5572 features and 4 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -73.99045 ymin: -33.75118 xmax: -28.83594 ymax: 5.271841
## CRS: 4674
brazil_cities <- fread("data/aspatial/BRAZIL_CITIES.csv")
## Warning in require_bit64_if_needed(ans): Some columns are type 'integer64'
## but package bit64 is not installed. Those columns will print as strange
## looking floating point data. There is no need to reload the data. Simply
## install.packages('bit64') to obtain the integer64 print method and print the
## data again.
dataDict <- fread ("data/aspatial/Data_Dictionary.csv")
Check missing fields
sum(is.na(brazil_cities))
## [1] 35131
colnames(brazil_cities)
## [1] "CITY" "STATE" "CAPITAL"
## [4] "IBGE_RES_POP" "IBGE_RES_POP_BRAS" "IBGE_RES_POP_ESTR"
## [7] "IBGE_DU" "IBGE_DU_URBAN" "IBGE_DU_RURAL"
## [10] "IBGE_POP" "IBGE_1" "IBGE_1-4"
## [13] "IBGE_5-9" "IBGE_10-14" "IBGE_15-59"
## [16] "IBGE_60+" "IBGE_PLANTED_AREA" "IBGE_CROP_PRODUCTION_$"
## [19] "IDHM Ranking 2010" "IDHM" "IDHM_Renda"
## [22] "IDHM_Longevidade" "IDHM_Educacao" "LONG"
## [25] "LAT" "ALT" "PAY_TV"
## [28] "FIXED_PHONES" "AREA" "REGIAO_TUR"
## [31] "CATEGORIA_TUR" "ESTIMATED_POP" "RURAL_URBAN"
## [34] "GVA_AGROPEC" "GVA_INDUSTRY" "GVA_SERVICES"
## [37] "GVA_PUBLIC" "GVA_TOTAL" "TAXES"
## [40] "GDP" "POP_GDP" "GDP_CAPITA"
## [43] "GVA_MAIN" "MUN_EXPENDIT" "COMP_TOT"
## [46] "COMP_A" "COMP_B" "COMP_C"
## [49] "COMP_D" "COMP_E" "COMP_F"
## [52] "COMP_G" "COMP_H" "COMP_I"
## [55] "COMP_J" "COMP_K" "COMP_L"
## [58] "COMP_M" "COMP_N" "COMP_O"
## [61] "COMP_P" "COMP_Q" "COMP_R"
## [64] "COMP_S" "COMP_T" "COMP_U"
## [67] "HOTELS" "BEDS" "Pr_Agencies"
## [70] "Pu_Agencies" "Pr_Bank" "Pu_Bank"
## [73] "Pr_Assets" "Pu_Assets" "Cars"
## [76] "Motorcycles" "Wheeled_tractor" "UBER"
## [79] "MAC" "WAL-MART" "POST_OFFICES"
popData <- brazil_cities %>% select('CITY','ESTIMATED_POP','GDP_CAPITA','LAT','LONG')
Check for missing fields
sum(is.na(popData))
## [1] 24
Check missing values in popData
popData[!complete.cases(popData), ]
## CITY ESTIMATED_POP GDP_CAPITA LAT LONG
## 1: Balneário Rincão 12570 17788.63 NA NA
## 2: Lagoa Dos Patos NA NA NA NA
## 3: Mojuí Dos Campos 15982 8831.56 NA NA
## 4: Paraíso Das Águas 5455 92163.92 NA NA
## 5: Pescaria Brava 10022 8341.33 NA NA
## 6: Pinhal Da Serra 1965 181845.18 NA NA
## 7: Pinto Bandeira 2968 18184.45 NA NA
## 8: Santa Teresinha 10345 NA -12.77285 -39.52114
## 9: Santa Terezinha NA 6796.16 NA NA
## 10: São Caetano NA NA NA NA
Fill missing values in popData
popData.cleaned <- popData
# Add missing lat long
popData.cleaned$LAT[popData$CITY == "Balneário Rincão"] <- -28.8344
popData.cleaned$LONG[popData$CITY == "Balneário Rincão"] <- -49.2361
popData.cleaned$LAT[popData$CITY == "Lagoa Dos Patos"] <- -31.0697
popData.cleaned $LONG[popData$CITY == "Lagoa Dos Patos"] <- -51.4725
popData.cleaned$LAT[popData$CITY == "Mojuí Dos Campos"] <- -2.68472
popData.cleaned$LONG[popData$CITY == "Mojuí Dos Campos"] <- -54.6431
popData.cleaned$LAT[popData$CITY == "Paraíso Das Águas"] <- -19.0257
popData.cleaned$LONG[popData$CITY == "Paraíso Das Águas"] <- -53.0102
popData.cleaned$LAT[popData$CITY == "Pescaria Brava"] <- -28.4247
popData.cleaned$LONG[popData$CITY == "Pescaria Brava"] <- -48.8956
popData.cleaned$LAT[popData$CITY == "Pinhal Da Serra"] <- -27.8747
popData.cleaned$LONG[popData$CITY == "Pinhal Da Serra"] <- -51.1733
popData.cleaned$LAT[popData$CITY == "Pinto Bandeira"] <- -29.0978
popData.cleaned$LONG[popData$CITY == "Pinto Bandeira"] <- -51.4503
popData.cleaned$LAT[popData$CITY == "Santa Terezinha"] <- -12.7498
popData.cleaned$LONG[popData$CITY == "Santa Terezinha"] <- -39.5183
popData.cleaned$LAT[popData$CITY == "São Caetano"] <- -8.33
popData.cleaned$LONG[popData$CITY == "São Caetano"] <- -36.1459
# Add population values
# https://www.citypopulation.de/en/brazil/regiaosudeste/admin/minas_gerais/3137304__lagoa_dos_patos/
popData.cleaned$ESTIMATED_POP[popData$CITY == "Lagoa Dos Patos"] <- 4102
# https://www.citypopulation.de/en/brazil/regiaonordeste/admin/pernambuco/2612802__santa_terezinha/
popData.cleaned$ESTIMATED_POP[popData$CITY == "Santa Terezinha"] <- 11815
# https://www.citypopulation.de/en/brazil/saopaulo/3548807__s%C3%A3o_caetano_do_sul/
popData.cleaned$ESTIMATED_POP[popData$CITY == "São Caetano"] <- 161127
# Add GDP capita values
# https://cidades.ibge.gov.br/brasil/mg/lagoa-dos-patos/panorama
popData.cleaned$GDP_CAPITA[popData$CITY == "Lagoa Dos Patos"] <- 10518.36
# https://cidades.ibge.gov.br/brasil/pb/santa-teresinha/panorama
popData.cleaned$GDP_CAPITA[popData$CITY == "Santa Teresinha"] <- 9757.39
popData.cleaned$GDP_CAPITA[popData$CITY == "São Caetano"] <- 82119.69
Check if there is any missing fields
popData.cleaned[!complete.cases(popData.cleaned), ]
## Empty data.table (0 rows and 5 cols): CITY,ESTIMATED_POP,GDP_CAPITA,LAT,LONG
No missing fields
popData.cleaned.sf <- st_as_sf(popData.cleaned,
coords = c("LONG", "LAT"),
crs=4326)
head(popData.cleaned.sf)
## Simple feature collection with 6 features and 3 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: -49.44055 ymin: -19.15585 xmax: -39.04755 ymax: -1.72347
## CRS: EPSG:4326
## CITY ESTIMATED_POP GDP_CAPITA geometry
## 1 Abadia De Goiás 8583 20664.57 POINT (-49.44055 -16.75881)
## 2 Abadia Dos Dourados 6972 25591.70 POINT (-47.39683 -18.48756)
## 3 Abadiânia 19614 15628.40 POINT (-48.71881 -16.18267)
## 4 Abaeté 23223 18250.42 POINT (-45.44619 -19.15585)
## 5 Abaetetuba 156292 8222.36 POINT (-48.8844 -1.72347)
## 6 Abaiara 11663 6370.41 POINT (-39.04755 -7.356977)
popData.cleaned.sf.setcrs <- st_transform(popData.cleaned.sf, 4674)
muniGDP <- st_join(muni_sf, popData.cleaned.sf.setcrs)
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
Set geometry null, use data tables to find duplicates
# duplicated(muniGDP.GeoNull) | duplicated(muniGDP.GeoNull, fromLast = TRUE)
# muniGDP.GeoNull <- muniGDP %>% st_set_geometry(NULL)
tm_shape(muniGDP) + tm_polygons(col= "GDP_CAPITA")
Check missing values
muniGDP[!complete.cases(muniGDP$GDP_CAPITA), ]
## Simple feature collection with 6 features and 7 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -53.53197 ymin: -33.61653 xmax: -37.3611 ymax: -7.356656
## CRS: 4674
## code_mn name_mn cod_stt abbrv_s CITY ESTIMATED_POP GDP_CAPITA
## 1612 2612802 Santa Terezinha 26 PE <NA> NA NA
## 2671 3137304 Lagoa Dos Patos 31 MG <NA> NA NA
## 4544 4215679 Santa Terezinha 42 SC <NA> NA NA
## 4607 4300001 Lagoa Mirim 43 RS <NA> NA NA
## 4608 4300002 Lagoa Dos Patos 43 RS <NA> NA NA
## 5303 5107776 Santa Terezinha 51 MT <NA> NA NA
## geometry
## 1612 MULTIPOLYGON (((-37.43182 -...
## 2671 MULTIPOLYGON (((-44.56744 -...
## 4544 MULTIPOLYGON (((-50.01573 -...
## 4607 MULTIPOLYGON (((-52.62241 -...
## 4608 MULTIPOLYGON (((-51.2719 -3...
## 5303 MULTIPOLYGON (((-50.84289 -...
# apply(muniGDP, 2, function(x) any(is.na(x)))
tm_shape(muni_sf)+
tm_polygons() +
tm_shape(popData.cleaned.sf)+
tm_dots(col="orange")
# Available data sets
datasets <- list_geobr()
print(datasets, n=21)
## # A tibble: 22 x 4
## `function` geography years source
## <chr> <chr> <chr> <chr>
## 1 `read_country` Country 1872, 1900, 1911, 1920, 1933… IBGE
## 2 `read_region` Region 2000, 2001, 2010, 2013, 2014… IBGE
## 3 `read_state` States 1872, 1900, 1911, 1920, 1933… IBGE
## 4 `read_meso_regi… Meso region 2000, 2001, 2010, 2013, 2014… IBGE
## 5 `read_micro_reg… Micro region 2000, 2001, 2010, 2013, 2014… IBGE
## 6 `read_intermedi… Intermediate region 2017 IBGE
## 7 `read_immediate… Immediate region 2017 IBGE
## 8 `read_municipal… Municipality 1872, 1900, 1911, 1920, 1933… IBGE
## 9 `read_weighting… Census weighting ar… 2010 IBGE
## 10 `read_census_tr… Census tract (setor… 2000, 2010 IBGE
## 11 `read_municipal… Municipality seats … 1872, 1900, 1911, 1920, 1933… IBGE
## 12 `read_statistic… Statistical Grid of… 2010 IBGE
## 13 `read_metro_are… Metropolitan areas 1970, 2001, 2002, 2003, 2005… IBGE
## 14 `read_urban_are… Urban footprints 2005, 2015 IBGE
## 15 `read_amazon` Brazil's Legal Amaz… 2012 MMA
## 16 `read_biomes` Biomes 2004, 2019 IBGE
## 17 `read_conservat… Environmental Conse… 201909 MMA
## 18 `read_disaster_… Disaster risk areas 2010 CEMADEN …
## 19 `read_indigenou… Indigenous lands 201907 FUNAI
## 20 `read_semiarid` Semi Arid region 2005, 2017 IBGE
## 21 `read_health_fa… Health facilities 2015 CNES, Da…
## # … with 1 more row