packages = c('olsrr', 'corrplot', 'ggpubr', 'sf', 'spdep', 'GWmodel', 'tmap', 'tidyverse', 'geobr','fastDummies')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}
## Loading required package: olsrr
##
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
##
## rivers
## Loading required package: corrplot
## corrplot 0.84 loaded
## Loading required package: ggpubr
## Loading required package: ggplot2
## Loading required package: sf
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
## Loading required package: spdep
## Loading required package: sp
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: GWmodel
## Loading required package: maptools
## Checking rgeos availability: TRUE
## Loading required package: robustbase
## Loading required package: Rcpp
## Loading required package: spatialreg
## Loading required package: Matrix
## Registered S3 methods overwritten by 'spatialreg':
## method from
## residuals.stsls spdep
## deviance.stsls spdep
## coef.stsls spdep
## print.stsls spdep
## summary.stsls spdep
## print.summary.stsls spdep
## residuals.gmsar spdep
## deviance.gmsar spdep
## coef.gmsar spdep
## fitted.gmsar spdep
## print.gmsar spdep
## summary.gmsar spdep
## print.summary.gmsar spdep
## print.lagmess spdep
## summary.lagmess spdep
## print.summary.lagmess spdep
## residuals.lagmess spdep
## deviance.lagmess spdep
## coef.lagmess spdep
## fitted.lagmess spdep
## logLik.lagmess spdep
## fitted.SFResult spdep
## print.SFResult spdep
## fitted.ME_res spdep
## print.ME_res spdep
## print.lagImpact spdep
## plot.lagImpact spdep
## summary.lagImpact spdep
## HPDinterval.lagImpact spdep
## print.summary.lagImpact spdep
## print.sarlm spdep
## summary.sarlm spdep
## residuals.sarlm spdep
## deviance.sarlm spdep
## coef.sarlm spdep
## vcov.sarlm spdep
## fitted.sarlm spdep
## logLik.sarlm spdep
## anova.sarlm spdep
## predict.sarlm spdep
## print.summary.sarlm spdep
## print.sarlm.pred spdep
## as.data.frame.sarlm.pred spdep
## residuals.spautolm spdep
## deviance.spautolm spdep
## coef.spautolm spdep
## fitted.spautolm spdep
## print.spautolm spdep
## summary.spautolm spdep
## logLik.spautolm spdep
## print.summary.spautolm spdep
## print.WXImpact spdep
## summary.WXImpact spdep
## print.summary.WXImpact spdep
## predict.SLX spdep
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## anova.sarlm, as.spam.listw, as_dgRMatrix_listw, as_dsCMatrix_I,
## as_dsCMatrix_IrW, as_dsTMatrix_listw, bptest.sarlm, can.be.simmed,
## cheb_setup, coef.gmsar, coef.sarlm, coef.spautolm, coef.stsls,
## create_WX, deviance.gmsar, deviance.sarlm, deviance.spautolm,
## deviance.stsls, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
## errorsarlm, fitted.gmsar, fitted.ME_res, fitted.sarlm,
## fitted.SFResult, fitted.spautolm, get.ClusterOption,
## get.coresOption, get.mcOption, get.VerboseOption,
## get.ZeroPolicyOption, GMargminImage, GMerrorsar, griffith_sone,
## gstsls, Hausman.test, HPDinterval.lagImpact, impacts, intImpacts,
## Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
## lextrS, lextrW, lmSLX, logLik.sarlm, logLik.spautolm, LR.sarlm,
## LR1.sarlm, LR1.spautolm, LU_prepermutate_setup, LU_setup,
## Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
## mom_calc_int2, moments_setup, powerWeights, predict.sarlm,
## predict.SLX, print.gmsar, print.ME_res, print.sarlm,
## print.sarlm.pred, print.SFResult, print.spautolm, print.stsls,
## print.summary.gmsar, print.summary.sarlm, print.summary.spautolm,
## print.summary.stsls, residuals.gmsar, residuals.sarlm,
## residuals.spautolm, residuals.stsls, sacsarlm, SE_classic_setup,
## SE_interp_setup, SE_whichMin_setup, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption, similar.listw, spam_setup, spam_update_setup,
## SpatialFiltering, spautolm, spBreg_err, spBreg_lag, spBreg_sac,
## stsls, subgraph_eigenw, summary.gmsar, summary.sarlm,
## summary.spautolm, summary.stsls, trW, vcov.sarlm, Wald1.sarlm
## Welcome to GWmodel version 2.1-4.
## The new version of GWmodel 2.1-4 now is readyLoading required package: tmap
## Loading required package: tidyverse
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble 2.1.3 v dplyr 0.8.5
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## v purrr 0.3.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x tidyr::pack() masks Matrix::pack()
## x tidyr::unpack() masks Matrix::unpack()
## Loading required package: geobr
## Loading required package: fastDummies
Naming this data set as Brazi. As the language is in portugese, hence it might involve special characters. Instead of using default encoding, changed to latin-1.
brazil <- read.csv("data/aspatial/BRAZIL_CITIES.csv", header = TRUE, encoding = "latin-1")
Since it return True, it shows that there is empty field. Hence, performing a summary check to identify the empty field
anyNA(brazil)
## [1] TRUE
summary(brazil)
## CITY STATE CAPITAL IBGE_RES_POP
## Bom Jesus : 5 MG : 853 Min. :0.000000 Min. : 805
## São Domingos: 5 SP : 645 1st Qu.:0.000000 1st Qu.: 5235
## Bonito : 4 RS : 498 Median :0.000000 Median : 10934
## Planalto : 4 BA : 418 Mean :0.004845 Mean : 34278
## Santa Helena: 4 PR : 399 3rd Qu.:0.000000 3rd Qu.: 23424
## Santa Inês : 4 SC : 295 Max. :1.000000 Max. :11253503
## (Other) :5547 (Other):2465 NA's :8
## IBGE_RES_POP_BRAS IBGE_RES_POP_ESTR IBGE_DU IBGE_DU_URBAN
## Min. : 805 Min. : 0.0 Min. : 239 Min. : 60
## 1st Qu.: 5230 1st Qu.: 0.0 1st Qu.: 1572 1st Qu.: 874
## Median : 10926 Median : 0.0 Median : 3174 Median : 1846
## Mean : 34200 Mean : 77.5 Mean : 10303 Mean : 8859
## 3rd Qu.: 23390 3rd Qu.: 10.0 3rd Qu.: 6726 3rd Qu.: 4624
## Max. :11133776 Max. :119727.0 Max. :3576148 Max. :3548433
## NA's :8 NA's :8 NA's :10 NA's :10
## IBGE_DU_RURAL IBGE_POP IBGE_1 IBGE_1.4
## Min. : 3 Min. : 174 Min. : 0.0 Min. : 5
## 1st Qu.: 487 1st Qu.: 2801 1st Qu.: 38.0 1st Qu.: 158
## Median : 931 Median : 6170 Median : 92.0 Median : 376
## Mean : 1463 Mean : 27595 Mean : 383.3 Mean : 1544
## 3rd Qu.: 1832 3rd Qu.: 15302 3rd Qu.: 232.0 3rd Qu.: 951
## Max. :33809 Max. :10463636 Max. :129464.0 Max. :514794
## NA's :81 NA's :8 NA's :8 NA's :8
## IBGE_5.9 IBGE_10.14 IBGE_15.59 IBGE_60.
## Min. : 7 Min. : 12 Min. : 94 Min. : 29
## 1st Qu.: 220 1st Qu.: 259 1st Qu.: 1734 1st Qu.: 341
## Median : 516 Median : 588 Median : 3841 Median : 722
## Mean : 2069 Mean : 2381 Mean : 18212 Mean : 3004
## 3rd Qu.: 1300 3rd Qu.: 1478 3rd Qu.: 9628 3rd Qu.: 1724
## Max. :684443 Max. :783702 Max. :7058221 Max. :1293012
## NA's :8 NA's :8 NA's :8 NA's :8
## IBGE_PLANTED_AREA IBGE_CROP_PRODUCTION_. IDHM.Ranking.2010 IDHM
## Min. : 0.0 Min. : 0 Min. : 1 Min. :0.4180
## 1st Qu.: 910.2 1st Qu.: 2326 1st Qu.:1392 1st Qu.:0.5990
## Median : 3471.5 Median : 13846 Median :2783 Median :0.6650
## Mean : 14179.9 Mean : 57384 Mean :2783 Mean :0.6592
## 3rd Qu.: 11194.2 3rd Qu.: 55619 3rd Qu.:4174 3rd Qu.:0.7180
## Max. :1205669.0 Max. :3274885 Max. :5565 Max. :0.8620
## NA's :3 NA's :3 NA's :8 NA's :8
## IDHM_Renda IDHM_Longevidade IDHM_Educacao LONG
## Min. :0.4000 Min. :0.6720 Min. :0.2070 Min. :-72.92
## 1st Qu.:0.5720 1st Qu.:0.7690 1st Qu.:0.4900 1st Qu.:-50.87
## Median :0.6540 Median :0.8080 Median :0.5600 Median :-46.52
## Mean :0.6429 Mean :0.8016 Mean :0.5591 Mean :-46.23
## 3rd Qu.:0.7070 3rd Qu.:0.8360 3rd Qu.:0.6310 3rd Qu.:-41.41
## Max. :0.8910 Max. :0.8940 Max. :0.8250 Max. :-32.44
## NA's :8 NA's :8 NA's :8
## LAT ALT PAY_TV FIXED_PHONES
## Min. :-33.688 Min. : 0.0 Min. : 1 Min. : 3
## 1st Qu.:-22.843 1st Qu.: 169.8 1st Qu.: 88 1st Qu.: 119
## Median :-18.091 Median : 406.5 Median : 247 Median : 327
## Mean :-16.451 Mean : 893.8 Mean : 3094 Mean : 6567
## 3rd Qu.: -8.490 3rd Qu.: 628.9 3rd Qu.: 815 3rd Qu.: 1151
## Max. : 4.585 Max. :874579.0 Max. :2047668 Max. :5543127
## NA's :9 NA's :3 NA's :3
## AREA REGIAO_TUR CATEGORIA_TUR
## Min. : 3.57 :2288 :2288
## 1st Qu.: 204.44 Corredores Das Águas: 59 A: 51
## Median : 416.59 Vale Do Contestado : 45 B: 168
## Mean : 1517.44 Amazônia Atlântica : 40 C: 521
## 3rd Qu.: 1026.57 Araguaia-Tocantins : 39 D:1892
## Max. :159533.33 Cariri : 37 E: 653
## NA's :3 (Other) :3065
## ESTIMATED_POP RURAL_URBAN GVA_AGROPEC
## Min. : 786 : 3 Min. : 0
## 1st Qu.: 5454 Intermediário Adjacente: 686 1st Qu.: 4189
## Median : 11590 Intermediário Remoto : 60 Median : 20426
## Mean : 37432 Rural Adjacente :3040 Mean : 47271
## 3rd Qu.: 25296 Rural Remoto : 323 3rd Qu.: 51227
## Max. :12176866 Sem classificação : 5 Max. :1402282
## NA's :3 Urbano :1456 NA's :3
## GVA_INDUSTRY GVA_SERVICES GVA_PUBLIC GVA_TOTAL
## Min. : 1 Min. : 2 Min. : 7 Min. : 17
## 1st Qu.: 1726 1st Qu.: 10112 1st Qu.: 17267 1st Qu.: 42253
## Median : 7424 Median : 31211 Median : 35866 Median : 119492
## Mean : 175928 Mean : 489451 Mean : 123768 Mean : 832987
## 3rd Qu.: 41022 3rd Qu.: 115406 3rd Qu.: 89245 3rd Qu.: 313963
## Max. :63306755 Max. :464656988 Max. :41902893 Max. :569910503
## NA's :3 NA's :3 NA's :3 NA's :3
## TAXES GDP POP_GDP GDP_CAPITA
## Min. : -14159 Min. : 15 Min. : 815 Min. : 3191
## 1st Qu.: 1305 1st Qu.: 43709 1st Qu.: 5483 1st Qu.: 9058
## Median : 5100 Median : 125153 Median : 11578 Median : 15870
## Mean : 118864 Mean : 954584 Mean : 36998 Mean : 21126
## 3rd Qu.: 22197 3rd Qu.: 329539 3rd Qu.: 25085 3rd Qu.: 26155
## Max. :117125387 Max. :687035890 Max. :12038175 Max. :314638
## NA's :3 NA's :3 NA's :3 NA's :3
## GVA_MAIN
## Administração, defesa, educação e saúde públicas e seguridade social :2725
## Demais serviços :1477
## Agricultura, inclusive apoio à agricultura e a pós colheita : 735
## Indústrias de transformação : 261
## Pecuária, inclusive apoio à pecuária : 161
## Eletricidade e gás, água, esgoto, atividades de gestão de resíduos e descontaminação: 98
## (Other) : 116
## MUN_EXPENDIT COMP_TOT COMP_A COMP_B
## Min. :1.421e+06 Min. : 6.0 Min. : 0.00 Min. : 0.000
## 1st Qu.:1.573e+07 1st Qu.: 68.0 1st Qu.: 1.00 1st Qu.: 0.000
## Median :2.746e+07 Median : 162.0 Median : 2.00 Median : 0.000
## Mean :1.043e+08 Mean : 906.8 Mean : 18.25 Mean : 1.852
## 3rd Qu.:5.666e+07 3rd Qu.: 448.0 3rd Qu.: 8.00 3rd Qu.: 2.000
## Max. :4.577e+10 Max. :530446.0 Max. :1948.00 Max. :274.000
## NA's :1492 NA's :3 NA's :3 NA's :3
## COMP_C COMP_D COMP_E COMP_F
## Min. : 0.00 Min. : 0.0000 Min. : 0.000 Min. : 0.00
## 1st Qu.: 3.00 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 1.00
## Median : 11.00 Median : 0.0000 Median : 0.000 Median : 4.00
## Mean : 73.44 Mean : 0.4262 Mean : 2.029 Mean : 43.26
## 3rd Qu.: 39.00 3rd Qu.: 0.0000 3rd Qu.: 1.000 3rd Qu.: 15.00
## Max. :31566.00 Max. :332.0000 Max. :657.000 Max. :25222.00
## NA's :3 NA's :3 NA's :3 NA's :3
## COMP_G COMP_H COMP_I COMP_J
## Min. : 1.0 Min. : 0 Min. : 0.00 Min. : 0.00
## 1st Qu.: 32.0 1st Qu.: 1 1st Qu.: 2.00 1st Qu.: 0.00
## Median : 74.5 Median : 7 Median : 7.00 Median : 1.00
## Mean : 348.0 Mean : 41 Mean : 55.88 Mean : 24.74
## 3rd Qu.: 199.0 3rd Qu.: 25 3rd Qu.: 24.00 3rd Qu.: 5.00
## Max. :150633.0 Max. :19515 Max. :29290.00 Max. :38720.00
## NA's :3 NA's :3 NA's :3 NA's :3
## COMP_K COMP_L COMP_M COMP_N
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.0
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 1.00 1st Qu.: 1.0
## Median : 0.00 Median : 0.00 Median : 4.00 Median : 4.0
## Mean : 15.55 Mean : 15.14 Mean : 51.29 Mean : 83.7
## 3rd Qu.: 2.00 3rd Qu.: 3.00 3rd Qu.: 13.00 3rd Qu.: 14.0
## Max. :23738.00 Max. :14003.00 Max. :49181.00 Max. :76757.0
## NA's :3 NA's :3 NA's :3 NA's :3
## COMP_O COMP_P COMP_Q COMP_R
## Min. : 0.000 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 2.000 1st Qu.: 2.00 1st Qu.: 1.00 1st Qu.: 0.00
## Median : 2.000 Median : 6.00 Median : 3.00 Median : 2.00
## Mean : 3.269 Mean : 30.96 Mean : 34.15 Mean : 12.18
## 3rd Qu.: 3.000 3rd Qu.: 17.00 3rd Qu.: 12.00 3rd Qu.: 6.00
## Max. :204.000 Max. :16030.00 Max. :22248.00 Max. :6687.00
## NA's :3 NA's :3 NA's :3 NA's :3
## COMP_S COMP_T COMP_U HOTELS
## Min. : 0.00 Min. :0 Min. : 0.00000 Min. : 1.000
## 1st Qu.: 5.00 1st Qu.:0 1st Qu.: 0.00000 1st Qu.: 1.000
## Median : 12.00 Median :0 Median : 0.00000 Median : 1.000
## Mean : 51.61 Mean :0 Mean : 0.05027 Mean : 3.131
## 3rd Qu.: 31.00 3rd Qu.:0 3rd Qu.: 0.00000 3rd Qu.: 3.000
## Max. :24832.00 Max. :0 Max. :123.00000 Max. :97.000
## NA's :3 NA's :3 NA's :3 NA's :4686
## BEDS Pr_Agencies Pu_Agencies Pr_Bank
## Min. : 2.0 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 40.0 1st Qu.: 0.000 1st Qu.: 1.000 1st Qu.: 0.000
## Median : 82.0 Median : 1.000 Median : 2.000 Median : 1.000
## Mean : 257.5 Mean : 3.383 Mean : 2.829 Mean : 1.312
## 3rd Qu.: 200.0 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.: 2.000
## Max. :13247.0 Max. :1693.000 Max. :626.000 Max. :83.000
## NA's :4686 NA's :2231 NA's :2231 NA's :2231
## Pu_Bank Pr_Assets Pu_Assets Cars
## Min. :0.00 Min. :0.000e+00 Min. :0.000e+00 Min. : 2
## 1st Qu.:1.00 1st Qu.:0.000e+00 1st Qu.:4.047e+07 1st Qu.: 602
## Median :2.00 Median :3.231e+07 Median :1.339e+08 Median : 1438
## Mean :1.58 Mean :9.187e+09 Mean :6.005e+09 Mean : 9859
## 3rd Qu.:2.00 3rd Qu.:1.148e+08 3rd Qu.:4.970e+08 3rd Qu.: 4086
## Max. :8.00 Max. :1.950e+13 Max. :8.020e+12 Max. :5740995
## NA's :2231 NA's :2231 NA's :2231 NA's :11
## Motorcycles Wheeled_tractor UBER MAC
## Min. : 4 Min. : 0.000 Min. :1 Min. : 1.000
## 1st Qu.: 591 1st Qu.: 0.000 1st Qu.:1 1st Qu.: 1.000
## Median : 1285 Median : 0.000 Median :1 Median : 2.000
## Mean : 4879 Mean : 5.754 Mean :1 Mean : 4.277
## 3rd Qu.: 3294 3rd Qu.: 1.000 3rd Qu.:1 3rd Qu.: 3.000
## Max. :1134570 Max. :3236.000 Max. :1 Max. :130.000
## NA's :11 NA's :11 NA's :5448 NA's :5407
## WAL.MART POST_OFFICES
## Min. : 1.000 Min. : 1.000
## 1st Qu.: 1.000 1st Qu.: 1.000
## Median : 1.000 Median : 1.000
## Mean : 2.059 Mean : 2.081
## 3rd Qu.: 1.750 3rd Qu.: 2.000
## Max. :26.000 Max. :225.000
## NA's :5471 NA's :120
Currently, the brazil data frame is aspatial. We will convert it to a sf object. The code chunk below converts brazil_sf data frame into a simple feature data frame by using st_as_sf() of sf packages.
Notice that st_transform() of sf package is used to convert the coordinates from crs=4674 to crs = 5530
brazil_sf <- st_as_sf(brazil, coords = c("LONG", "LAT"), crs = 4674) %>%
st_transform(crs = 5530)
brazil_sf
## Simple feature collection with 5573 features and 79 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 2913284 ymin: 6270906 xmax: 7394984 ymax: 10510050
## epsg (SRID): 5530
## proj4string: +proj=poly +lat_0=0 +lon_0=-54 +x_0=5000000 +y_0=10000000 +ellps=aust_SA +towgs84=-67.35,3.88,-38.22,0,0,0,0 +units=m +no_defs
## First 10 features:
## CITY STATE CAPITAL IBGE_RES_POP IBGE_RES_POP_BRAS
## 1 Abadia De Goiás GO 0 6876 6876
## 2 Abadia Dos Dourados MG 0 6704 6704
## 3 Abadiânia GO 0 15757 15609
## 4 Abaeté MG 0 22690 22690
## 5 Abaetetuba PA 0 141100 141040
## 6 Abaiara CE 0 10496 10496
## 7 Abaíra BA 0 8316 8316
## 8 Abaré BA 0 17064 17064
## 9 Abatiá PR 0 7764 7764
## 10 Abdon Batista SC 0 2653 2653
## IBGE_RES_POP_ESTR IBGE_DU IBGE_DU_URBAN IBGE_DU_RURAL IBGE_POP IBGE_1
## 1 0 2137 1546 591 5300 69
## 2 0 2328 1481 847 4154 38
## 3 148 4655 3233 1422 10656 139
## 4 0 7694 6667 1027 18464 176
## 5 60 31061 19057 12004 82956 1354
## 6 0 2791 1251 1540 4538 98
## 7 0 2572 1193 1379 3725 37
## 8 0 4332 2379 1953 8994 167
## 9 0 2499 1877 622 5685 69
## 10 0 848 234 614 724 12
## IBGE_1.4 IBGE_5.9 IBGE_10.14 IBGE_15.59 IBGE_60. IBGE_PLANTED_AREA
## 1 318 438 517 3542 416 319
## 2 207 260 351 2709 589 4479
## 3 650 894 1087 6896 990 10307
## 4 856 1233 1539 11979 2681 1862
## 5 5567 7618 8905 53516 5996 25200
## 6 323 421 483 2631 582 2598
## 7 156 263 277 2319 673 895
## 8 733 978 927 5386 803 2058
## 9 302 370 483 3650 811 1197
## 10 32 49 63 479 89 5502
## IBGE_CROP_PRODUCTION_. IDHM.Ranking.2010 IDHM IDHM_Renda IDHM_Longevidade
## 1 1843 1689 0.708 0.687 0.830
## 2 18017 2207 0.690 0.693 0.839
## 3 33085 2202 0.690 0.671 0.841
## 4 7502 1994 0.698 0.720 0.848
## 5 700872 3530 0.628 0.579 0.798
## 6 5234 3522 0.628 0.540 0.748
## 7 3999 4086 0.603 0.577 0.746
## 8 22761 4756 0.575 0.533 0.776
## 9 9943 2258 0.687 0.676 0.804
## 10 26195 2092 0.690 0.660 0.812
## IDHM_Educacao ALT PAY_TV FIXED_PHONES AREA
## 1 0.622 893.60 360 842 147.26
## 2 0.563 753.12 77 296 881.06
## 3 0.579 1017.55 227 720 1045.13
## 4 0.556 644.74 1230 1716 1817.07
## 5 0.537 10.12 3389 1218 1610.65
## 6 0.612 403.11 29 34 180.08
## 7 0.510 674.22 952 335 538.68
## 8 0.460 316.38 51 222 1604.92
## 9 0.596 579.30 55 392 228.72
## 10 0.625 720.98 109 260 237.16
## REGIAO_TUR CATEGORIA_TUR ESTIMATED_POP
## 1 8583
## 2 Caminhos Do Cerrado D 6972
## 3 Região Turística Do Ouro E Cristais C 19614
## 4 Lago De Três Marias D 23223
## 5 Araguaia-Tocantins D 156292
## 6 11663
## 7 Chapada Diamantina D 8767
## 8 19814
## 9 7507
## 10 Vale Do Contestado D 2577
## RURAL_URBAN GVA_AGROPEC GVA_INDUSTRY GVA_SERVICES GVA_PUBLIC GVA_TOTAL
## 1 Urbano 6.20 27991.25 74750.32 36915.04 145857.60
## 2 Rural Adjacente 50524.57 25917.70 62689.23 28083.79 167215.28
## 3 Rural Adjacente 42.84 16728.30 138198.58 63396.20 261161.91
## 4 Urbano 113824.60 31002.62 172.33 86081.41 403241.27
## 5 Urbano 140463.72 58610.00 468128.69 486872.40 1154074.81
## 6 Rural Adjacente 4435.16 5.88 22.81 35989.96 69108.67
## 7 Rural Remoto 12.41 3437.43 17990.74 28463.74 62304.83
## 8 Rural Remoto 9176.40 6.70 36921.84 65.75 118.55
## 9 Rural Adjacente 73340.52 8839.71 42999.37 34103.49 159283.08
## 10 Rural Adjacente 24996.75 3578.87 16011.10 17842.64 62429.36
## TAXES GDP POP_GDP GDP_CAPITA
## 1 20554.20 166.41 8053 20664.57
## 2 12873.50 180.09 7037 25591.70
## 3 26822.58 287984.49 18427 15628.40
## 4 26994.09 430235.36 23574 18250.42
## 5 95180.48 1249255.29 151934 8222.36
## 6 4042.79 73151.46 11483 6370.41
## 7 2019.77 64324.59 9212 6982.70
## 8 6.21 124754.26 19939 6256.80
## 9 5.77 165048.21 7795 21173.60
## 10 2312.65 64742.01 2617 24739.02
## GVA_MAIN
## 1 Demais serviços
## 2 Demais serviços
## 3 Demais serviços
## 4 Demais serviços
## 5 Administração, defesa, educação e saúde públicas e seguridade social
## 6 Administração, defesa, educação e saúde públicas e seguridade social
## 7 Administração, defesa, educação e saúde públicas e seguridade social
## 8 Administração, defesa, educação e saúde públicas e seguridade social
## 9 Agricultura, inclusive apoio à agricultura e a pós colheita
## 10 Administração, defesa, educação e saúde públicas e seguridade social
## MUN_EXPENDIT COMP_TOT COMP_A COMP_B COMP_C COMP_D COMP_E COMP_F COMP_G
## 1 28227691 284 5 1 56 0 2 29 110
## 2 17909274 476 6 6 30 1 2 34 190
## 3 37513019 288 5 9 26 0 2 7 117
## 4 NA 621 18 1 40 0 1 20 303
## 5 NA 931 4 2 43 0 1 27 500
## 6 NA 86 1 0 4 0 0 6 48
## 7 NA 191 6 0 8 0 1 4 97
## 8 NA 87 2 0 3 0 0 0 71
## 9 NA 285 5 0 20 0 1 10 133
## 10 19506956 69 2 0 4 0 0 2 35
## COMP_H COMP_I COMP_J COMP_K COMP_L COMP_M COMP_N COMP_O COMP_P COMP_Q COMP_R
## 1 26 4 5 0 2 10 12 4 6 6 1
## 2 70 28 11 0 4 15 29 2 9 14 6
## 3 12 57 2 1 0 7 15 3 11 5 1
## 4 62 30 9 6 4 28 27 2 15 19 9
## 5 16 31 6 1 1 22 16 2 155 33 15
## 6 2 10 2 0 0 2 3 2 0 2 0
## 7 5 5 3 1 0 5 5 2 8 1 2
## 8 0 1 1 0 0 0 1 2 0 2 0
## 9 18 14 8 0 4 11 26 2 8 9 4
## 10 8 3 1 1 0 4 0 2 1 3 0
## COMP_S COMP_T COMP_U HOTELS BEDS Pr_Agencies Pu_Agencies Pr_Bank Pu_Bank
## 1 5 0 0 NA NA NA NA NA NA
## 2 19 0 0 NA NA NA NA NA NA
## 3 8 0 0 1 34 1 1 1 1
## 4 27 0 0 NA NA 2 2 2 2
## 5 56 0 0 NA NA 2 4 2 4
## 6 4 0 0 NA NA NA NA NA NA
## 7 38 0 0 1 24 NA NA NA NA
## 8 4 0 0 NA NA 1 0 1 0
## 9 12 0 0 NA NA 0 1 0 1
## 10 3 0 0 NA NA 0 1 0 1
## Pr_Assets Pu_Assets Cars Motorcycles Wheeled_tractor UBER MAC WAL.MART
## 1 NA NA 2158 1246 0 NA NA NA
## 2 NA NA 2227 1142 0 NA NA NA
## 3 33724584 67091904 2838 1426 0 NA NA NA
## 4 44974716 371922572 6928 2953 0 NA NA NA
## 5 76181384 800078483 5277 25661 0 NA NA NA
## 6 NA NA 553 1674 0 NA NA NA
## 7 NA NA 896 696 0 NA NA NA
## 8 21823314 0 613 1532 0 NA NA NA
## 9 0 45976288 2168 912 0 NA NA NA
## 10 0 42909056 976 345 2 NA NA NA
## POST_OFFICES geometry
## 1 1 POINT (5486143 8140850)
## 2 1 POINT (5697257 7942355)
## 3 3 POINT (5564742 8202931)
## 4 4 POINT (5899497 7859088)
## 5 2 POINT (5569259 9808703)
## 6 1 POINT (6650627 9158923)
## 7 1 POINT (6336662 8501282)
## 8 1 POINT (6637397 9003125)
## 9 1 POINT (5377218 7417426)
## 10 1 POINT (5293673 6941399)
This set of data os obtain from the internet. In the geobr geospatial data frame, there consists of several years of data, but for this exercise we will be using the data from year 2016. Hence, year = 2016, to ensure that this year data is extracted.
municipality <- read_municipality(year = 2016)
## Using year 2016
## Loading data for the whole country. This might take a few minutes.
##
|
| | 0%
|
|=== | 4%
|
|===== | 7%
|
|======== | 11%
|
|========== | 15%
|
|============= | 19%
|
|================ | 22%
|
|================== | 26%
|
|===================== | 30%
|
|======================= | 33%
|
|========================== | 37%
|
|============================= | 41%
|
|=============================== | 44%
|
|================================== | 48%
|
|==================================== | 52%
|
|======================================= | 56%
|
|========================================= | 59%
|
|============================================ | 63%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|==================================================== | 74%
|
|====================================================== | 78%
|
|========================================================= | 81%
|
|============================================================ | 85%
|
|============================================================== | 89%
|
|================================================================= | 93%
|
|=================================================================== | 96%
|
|======================================================================| 100%
municipality
## Simple feature collection with 5572 features and 4 fields
## geometry type: GEOMETRY
## dimension: XY
## bbox: xmin: -73.99045 ymin: -33.75118 xmax: -28.83594 ymax: 5.271841
## epsg (SRID): 4674
## proj4string: +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
## First 10 features:
## code_muni name_muni code_state abbrev_state
## 1 1100015 Alta Floresta D'oeste 11 RO
## 2 1100023 Ariquemes 11 RO
## 3 1100031 Cabixi 11 RO
## 4 1100049 Cacoal 11 RO
## 5 1100056 Cerejeiras 11 RO
## 6 1100064 Colorado Do Oeste 11 RO
## 7 1100072 Corumbiara 11 RO
## 8 1100080 Costa Marques 11 RO
## 9 1100098 Espigão D'oeste 11 RO
## 10 1100106 Guajará-Mirim 11 RO
## geom
## 1 POLYGON ((-62.19465 -11.827...
## 2 POLYGON ((-62.53648 -9.7322...
## 3 POLYGON ((-60.37075 -13.363...
## 4 POLYGON ((-61.0008 -11.2973...
## 5 POLYGON ((-61.49976 -13.005...
## 6 POLYGON ((-60.50475 -12.966...
## 7 POLYGON ((-61.34273 -12.666...
## 8 POLYGON ((-63.71199 -11.650...
## 9 POLYGON ((-60.94827 -10.988...
## 10 POLYGON ((-65.37724 -10.431...
Based on the above code chunk. epsg is in degree minute seconds, however we want to convert to x,y coordinates by changing epsg to 5530 After converting, it can be seen that it has been converted to x,y coordinates and crs = 5530.
municipality_sf <- st_transform(municipality,crs=5530)
municipality_sf
## Simple feature collection with 5572 features and 4 fields
## geometry type: GEOMETRY
## dimension: XY
## bbox: xmin: 2794591 ymin: 6263815 xmax: 7615330 ymax: 10586410
## epsg (SRID): 5530
## proj4string: +proj=poly +lat_0=0 +lon_0=-54 +x_0=5000000 +y_0=10000000 +ellps=aust_SA +towgs84=-67.35,3.88,-38.22,0,0,0,0 +units=m +no_defs
## First 10 features:
## code_muni name_muni code_state abbrev_state
## 1 1100015 Alta Floresta D'oeste 11 RO
## 2 1100023 Ariquemes 11 RO
## 3 1100031 Cabixi 11 RO
## 4 1100049 Cacoal 11 RO
## 5 1100056 Cerejeiras 11 RO
## 6 1100064 Colorado Do Oeste 11 RO
## 7 1100072 Corumbiara 11 RO
## 8 1100080 Costa Marques 11 RO
## 9 1100098 Espigão D'oeste 11 RO
## 10 1100106 Guajará-Mirim 11 RO
## geom
## 1 POLYGON ((4107199 8678957, ...
## 2 POLYGON ((4063462 8912011, ...
## 3 POLYGON ((4310019 8513235, ...
## 4 POLYGON ((4235803 8741535, ...
## 5 POLYGON ((4186578 8549768, ...
## 6 POLYGON ((4294369 8557024, ...
## 7 POLYGON ((4202534 8588007, ...
## 8 POLYGON ((3941254 8693538, ...
## 9 POLYGON ((4240732 8776064, ...
## 10 POLYGON ((3754608 8824106, ...
Since we know that the Dependent variables is GDP per capita. Have to filter out which variables to be used as independent variables. Given a list of information of the independent variables, but based on the content of these variables, some of it is not related to GDP per capita whereas some as strong relationship with GDP per capita. Hence, we would have to elimimate the irrelevant variables.
brazil_sf_filter <- brazil_sf %>%
select(CITY,STATE, GDP_CAPITA,IBGE_RES_POP_BRAS, RURAL_URBAN, IBGE_RES_POP_ESTR, IBGE_DU_URBAN, IBGE_DU_RURAL, IBGE_15.59, IBGE_PLANTED_AREA, IBGE_CROP_PRODUCTION_., IDHM, IDHM_Renda, IDHM_Longevidade, IDHM_Educacao,PAY_TV, FIXED_PHONES, AREA,MUN_EXPENDIT, 44:62,64,65,Pr_Bank, Pu_Bank, Pr_Assets, Pu_Assets, Cars, Motorcycles,POST_OFFICES )
Brazil data involves special characters, these characters falls in the lower case. Based on the summary of the municipality and Brazil, it can be seen that there are both datas consists of different upper and lower cases in their data. This would pose as a problem upon joining the data as it is case-sensitive. Hence, converting upper case to lower case for further joining with the num data set will prevent the hiccup of a fail join.
brazil_filter <- brazil_sf_filter %>%
mutate_at(.vars = vars(CITY), .funs = funs(tolower)) %>%
st_set_geometry(NULL)
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
Since it has been mentioned above, it also applies to the municipality_sf. Converting all to lower case.
municipality_sf <- municipality_sf %>%
mutate_at(.vars = vars(name_muni), .funs = funs(tolower))
brazil_filter <- as.data.frame(brazil_filter)
municipality_df <- as.data.frame(municipality_sf)
Joining both data set based on their common features (city name, state name) to prevent any confusion in extracting wrong data set, we will use state name and scale to city name to be assured that the data is extracted correctly.
brazil_valuemap <- left_join(municipality_df, brazil_filter, by = c("name_muni" = "CITY", "abbrev_state" = "STATE"))
## Warning: Column `abbrev_state`/`STATE` joining factors with different levels,
## coercing to character vector
summary(brazil_valuemap)
## code_muni name_muni code_state abbrev_state
## Min. :1100015 Length:5572 31 : 853 Length:5572
## 1st Qu.:2512175 Class :character 35 : 645 Class :character
## Median :3146354 Mode :character 43 : 499 Mode :character
## Mean :3253966 29 : 417
## 3rd Qu.:4119264 41 : 399
## Max. :5300108 42 : 295
## (Other):2464
## geom GDP_CAPITA IBGE_RES_POP_BRAS
## MULTIPOLYGON :2773 Min. : 3191 Min. : 805
## POLYGON :2799 1st Qu.: 9058 1st Qu.: 5230
## epsg:5530 : 0 Median : 15870 Median : 10926
## +proj=poly...: 0 Mean : 21126 Mean : 34200
## 3rd Qu.: 26155 3rd Qu.: 23390
## Max. :314638 Max. :11133776
## NA's :2 NA's :7
## RURAL_URBAN IBGE_RES_POP_ESTR IBGE_DU_URBAN
## Rural Adjacente :3040 Min. : 0.0 Min. : 60
## Urbano :1456 1st Qu.: 0.0 1st Qu.: 874
## Intermediário Adjacente: 686 Median : 0.0 Median : 1846
## Rural Remoto : 323 Mean : 77.5 Mean : 8859
## Intermediário Remoto : 60 3rd Qu.: 10.0 3rd Qu.: 4624
## (Other) : 6 Max. :119727.0 Max. :3548433
## NA's : 1 NA's :7 NA's :9
## IBGE_DU_RURAL IBGE_15.59 IBGE_PLANTED_AREA IBGE_CROP_PRODUCTION_.
## Min. : 3 Min. : 94 Min. : 0.0 Min. : 0
## 1st Qu.: 487 1st Qu.: 1734 1st Qu.: 910.2 1st Qu.: 2326
## Median : 931 Median : 3841 Median : 3471.5 Median : 13846
## Mean : 1463 Mean : 18212 Mean : 14179.9 Mean : 57384
## 3rd Qu.: 1832 3rd Qu.: 9628 3rd Qu.: 11194.2 3rd Qu.: 55619
## Max. :33809 Max. :7058221 Max. :1205669.0 Max. :3274885
## NA's :80 NA's :7 NA's :2 NA's :2
## IDHM IDHM_Renda IDHM_Longevidade IDHM_Educacao
## Min. :0.4180 Min. :0.4000 Min. :0.6720 Min. :0.2070
## 1st Qu.:0.5990 1st Qu.:0.5720 1st Qu.:0.7690 1st Qu.:0.4900
## Median :0.6650 Median :0.6540 Median :0.8080 Median :0.5600
## Mean :0.6592 Mean :0.6429 Mean :0.8016 Mean :0.5591
## 3rd Qu.:0.7180 3rd Qu.:0.7070 3rd Qu.:0.8360 3rd Qu.:0.6310
## Max. :0.8620 Max. :0.8910 Max. :0.8940 Max. :0.8250
## NA's :8 NA's :8 NA's :8 NA's :8
## PAY_TV FIXED_PHONES AREA MUN_EXPENDIT
## Min. : 1 Min. : 3 Min. : 3.57 Min. :1.421e+06
## 1st Qu.: 88 1st Qu.: 119 1st Qu.: 204.44 1st Qu.:1.573e+07
## Median : 247 Median : 327 Median : 416.59 Median :2.746e+07
## Mean : 3095 Mean : 6568 Mean : 1517.44 Mean :1.043e+08
## 3rd Qu.: 815 3rd Qu.: 1151 3rd Qu.: 1026.57 3rd Qu.:5.672e+07
## Max. :2047668 Max. :5543127 Max. :159533.33 Max. :4.577e+10
## NA's :3 NA's :3 NA's :2 NA's :1493
## COMP_A COMP_B COMP_C COMP_D
## Min. : 0.00 Min. : 0.000 Min. : 0.00 Min. : 0.0000
## 1st Qu.: 1.00 1st Qu.: 0.000 1st Qu.: 3.00 1st Qu.: 0.0000
## Median : 2.00 Median : 0.000 Median : 11.00 Median : 0.0000
## Mean : 18.25 Mean : 1.852 Mean : 73.44 Mean : 0.4262
## 3rd Qu.: 8.00 3rd Qu.: 2.000 3rd Qu.: 39.00 3rd Qu.: 0.0000
## Max. :1948.00 Max. :274.000 Max. :31566.00 Max. :332.0000
## NA's :2 NA's :2 NA's :2 NA's :2
## COMP_E COMP_F COMP_G COMP_H
## Min. : 0.000 Min. : 0.00 Min. : 1.0 Min. : 0
## 1st Qu.: 0.000 1st Qu.: 1.00 1st Qu.: 32.0 1st Qu.: 1
## Median : 0.000 Median : 4.00 Median : 74.5 Median : 7
## Mean : 2.029 Mean : 43.26 Mean : 348.0 Mean : 41
## 3rd Qu.: 1.000 3rd Qu.: 15.00 3rd Qu.: 199.0 3rd Qu.: 25
## Max. :657.000 Max. :25222.00 Max. :150633.0 Max. :19515
## NA's :2 NA's :2 NA's :2 NA's :2
## COMP_I COMP_J COMP_K COMP_L
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 2.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00
## Median : 7.00 Median : 1.00 Median : 0.00 Median : 0.00
## Mean : 55.88 Mean : 24.74 Mean : 15.55 Mean : 15.14
## 3rd Qu.: 24.00 3rd Qu.: 5.00 3rd Qu.: 2.00 3rd Qu.: 3.00
## Max. :29290.00 Max. :38720.00 Max. :23738.00 Max. :14003.00
## NA's :2 NA's :2 NA's :2 NA's :2
## COMP_M COMP_N COMP_O COMP_P
## Min. : 0.00 Min. : 0.0 Min. : 0.000 Min. : 0.00
## 1st Qu.: 1.00 1st Qu.: 1.0 1st Qu.: 2.000 1st Qu.: 2.00
## Median : 4.00 Median : 4.0 Median : 2.000 Median : 6.00
## Mean : 51.29 Mean : 83.7 Mean : 3.269 Mean : 30.96
## 3rd Qu.: 13.00 3rd Qu.: 14.0 3rd Qu.: 3.000 3rd Qu.: 17.00
## Max. :49181.00 Max. :76757.0 Max. :204.000 Max. :16030.00
## NA's :2 NA's :2 NA's :2 NA's :2
## COMP_Q COMP_R COMP_S COMP_U
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00000
## 1st Qu.: 1.00 1st Qu.: 0.00 1st Qu.: 5.00 1st Qu.: 0.00000
## Median : 3.00 Median : 2.00 Median : 12.00 Median : 0.00000
## Mean : 34.15 Mean : 12.18 Mean : 51.61 Mean : 0.05027
## 3rd Qu.: 12.00 3rd Qu.: 6.00 3rd Qu.: 31.00 3rd Qu.: 0.00000
## Max. :22248.00 Max. :6687.00 Max. :24832.00 Max. :123.00000
## NA's :2 NA's :2 NA's :2 NA's :2
## HOTELS Pr_Bank Pu_Bank Pr_Assets
## Min. : 1.000 Min. : 0.000 Min. :0.00 Min. :0.000e+00
## 1st Qu.: 1.000 1st Qu.: 0.000 1st Qu.:1.00 1st Qu.:0.000e+00
## Median : 1.000 Median : 1.000 Median :2.00 Median :3.231e+07
## Mean : 3.131 Mean : 1.312 Mean :1.58 Mean :9.187e+09
## 3rd Qu.: 3.000 3rd Qu.: 2.000 3rd Qu.:2.00 3rd Qu.:1.148e+08
## Max. :97.000 Max. :83.000 Max. :8.00 Max. :1.950e+13
## NA's :4685 NA's :2230 NA's :2230 NA's :2230
## Pu_Assets Cars Motorcycles POST_OFFICES
## Min. :0.000e+00 Min. : 2 Min. : 4 Min. : 1.000
## 1st Qu.:4.047e+07 1st Qu.: 602 1st Qu.: 591 1st Qu.: 1.000
## Median :1.339e+08 Median : 1440 Median : 1285 Median : 1.000
## Mean :6.005e+09 Mean : 9861 Mean : 4879 Mean : 2.081
## 3rd Qu.:4.970e+08 3rd Qu.: 4086 3rd Qu.: 3295 3rd Qu.: 2.000
## Max. :8.020e+12 Max. :5740995 Max. :1134570 Max. :225.000
## NA's :2230 NA's :11 NA's :11 NA's :119
By looking through the data sets, there are two cities without GDP_capita. Replacing it with zero might not pose as a good idea due to the fact that there isn’t any value provided. Hence, instead of inputting a zero value in the field, it would be wiser to completely omit the data to prevent any inaccuracy.
brazil_valuemap_1 <- brazil_valuemap[-c(4607,4608),]
brazil_valuemap_1[is.na(brazil_valuemap_1)] <- 0
summary(brazil_valuemap_1)
## code_muni name_muni code_state abbrev_state
## Min. :1100015 Length:5570 31 : 853 Length:5570
## 1st Qu.:2512126 Class :character 35 : 645 Class :character
## Median :3146280 Mode :character 43 : 497 Mode :character
## Mean :3253591 29 : 417
## 3rd Qu.:4119190 41 : 399
## Max. :5300108 42 : 295
## (Other):2464
## geom GDP_CAPITA IBGE_RES_POP_BRAS
## MULTIPOLYGON :2773 Min. : 3191 Min. : 0
## POLYGON :2797 1st Qu.: 9058 1st Qu.: 5217
## epsg:5530 : 0 Median : 15870 Median : 10920
## +proj=poly...: 0 Mean : 21126 Mean : 34170
## 3rd Qu.: 26155 3rd Qu.: 23380
## Max. :314638 Max. :11133776
##
## RURAL_URBAN IBGE_RES_POP_ESTR IBGE_DU_URBAN
## : 0 Min. : 0.00 Min. : 0
## Intermediário Adjacente: 686 1st Qu.: 0.00 1st Qu.: 871
## Intermediário Remoto : 60 Median : 0.00 Median : 1840
## Rural Adjacente :3040 Mean : 77.44 Mean : 8848
## Rural Remoto : 323 3rd Qu.: 10.00 3rd Qu.: 4619
## Sem classificação : 5 Max. :119727.00 Max. :3548433
## Urbano :1456
## IBGE_DU_RURAL IBGE_15.59 IBGE_PLANTED_AREA IBGE_CROP_PRODUCTION_.
## Min. : 0 Min. : 0 Min. : 0.0 Min. : 0
## 1st Qu.: 471 1st Qu.: 1730 1st Qu.: 910.2 1st Qu.: 2326
## Median : 917 Median : 3837 Median : 3471.5 Median : 13846
## Mean : 1442 Mean : 18196 Mean : 14179.9 Mean : 57384
## 3rd Qu.: 1813 3rd Qu.: 9591 3rd Qu.: 11194.2 3rd Qu.: 55619
## Max. :33809 Max. :7058221 Max. :1205669.0 Max. :3274885
##
## IDHM IDHM_Renda IDHM_Longevidade IDHM_Educacao
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.5990 1st Qu.:0.5720 1st Qu.:0.7690 1st Qu.:0.4900
## Median :0.6650 Median :0.6540 Median :0.8080 Median :0.5600
## Mean :0.6585 Mean :0.6422 Mean :0.8007 Mean :0.5585
## 3rd Qu.:0.7180 3rd Qu.:0.7070 3rd Qu.:0.8360 3rd Qu.:0.6310
## Max. :0.8620 Max. :0.8910 Max. :0.8940 Max. :0.8250
##
## PAY_TV FIXED_PHONES AREA MUN_EXPENDIT
## Min. : 0 Min. : 0 Min. : 0.0 Min. :0.000e+00
## 1st Qu.: 88 1st Qu.: 118 1st Qu.: 204.3 1st Qu.:0.000e+00
## Median : 247 Median : 327 Median : 415.9 Median :1.774e+07
## Mean : 3094 Mean : 6567 Mean : 1515.6 Mean :7.641e+07
## 3rd Qu.: 815 3rd Qu.: 1151 3rd Qu.: 1026.2 3rd Qu.:4.150e+07
## Max. :2047668 Max. :5543127 Max. :159533.3 Max. :4.577e+10
##
## COMP_A COMP_B COMP_C COMP_D
## Min. : 0.00 Min. : 0.000 Min. : 0.00 Min. : 0.0000
## 1st Qu.: 1.00 1st Qu.: 0.000 1st Qu.: 3.00 1st Qu.: 0.0000
## Median : 2.00 Median : 0.000 Median : 11.00 Median : 0.0000
## Mean : 18.25 Mean : 1.852 Mean : 73.44 Mean : 0.4262
## 3rd Qu.: 8.00 3rd Qu.: 2.000 3rd Qu.: 39.00 3rd Qu.: 0.0000
## Max. :1948.00 Max. :274.000 Max. :31566.00 Max. :332.0000
##
## COMP_E COMP_F COMP_G COMP_H
## Min. : 0.000 Min. : 0.00 Min. : 1.0 Min. : 0
## 1st Qu.: 0.000 1st Qu.: 1.00 1st Qu.: 32.0 1st Qu.: 1
## Median : 0.000 Median : 4.00 Median : 74.5 Median : 7
## Mean : 2.029 Mean : 43.26 Mean : 348.0 Mean : 41
## 3rd Qu.: 1.000 3rd Qu.: 15.00 3rd Qu.: 199.0 3rd Qu.: 25
## Max. :657.000 Max. :25222.00 Max. :150633.0 Max. :19515
##
## COMP_I COMP_J COMP_K COMP_L
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 2.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00
## Median : 7.00 Median : 1.00 Median : 0.00 Median : 0.00
## Mean : 55.88 Mean : 24.74 Mean : 15.55 Mean : 15.14
## 3rd Qu.: 24.00 3rd Qu.: 5.00 3rd Qu.: 2.00 3rd Qu.: 3.00
## Max. :29290.00 Max. :38720.00 Max. :23738.00 Max. :14003.00
##
## COMP_M COMP_N COMP_O COMP_P
## Min. : 0.00 Min. : 0.0 Min. : 0.000 Min. : 0.00
## 1st Qu.: 1.00 1st Qu.: 1.0 1st Qu.: 2.000 1st Qu.: 2.00
## Median : 4.00 Median : 4.0 Median : 2.000 Median : 6.00
## Mean : 51.29 Mean : 83.7 Mean : 3.269 Mean : 30.96
## 3rd Qu.: 13.00 3rd Qu.: 14.0 3rd Qu.: 3.000 3rd Qu.: 17.00
## Max. :49181.00 Max. :76757.0 Max. :204.000 Max. :16030.00
##
## COMP_Q COMP_R COMP_S COMP_U
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00000
## 1st Qu.: 1.00 1st Qu.: 0.00 1st Qu.: 5.00 1st Qu.: 0.00000
## Median : 3.00 Median : 2.00 Median : 12.00 Median : 0.00000
## Mean : 34.15 Mean : 12.18 Mean : 51.61 Mean : 0.05027
## 3rd Qu.: 12.00 3rd Qu.: 6.00 3rd Qu.: 31.00 3rd Qu.: 0.00000
## Max. :22248.00 Max. :6687.00 Max. :24832.00 Max. :123.00000
##
## HOTELS Pr_Bank Pu_Bank Pr_Assets
## Min. : 0.0000 Min. : 0.0000 Min. :0.0000 Min. :0.000e+00
## 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.:0.0000 1st Qu.:0.000e+00
## Median : 0.0000 Median : 0.0000 Median :1.0000 Median :0.000e+00
## Mean : 0.4986 Mean : 0.7873 Mean :0.9479 Mean :5.512e+09
## 3rd Qu.: 0.0000 3rd Qu.: 1.0000 3rd Qu.:2.0000 3rd Qu.:4.775e+07
## Max. :97.0000 Max. :83.0000 Max. :8.0000 Max. :1.950e+13
##
## Pu_Assets Cars Motorcycles POST_OFFICES
## Min. :0.000e+00 Min. : 0 Min. : 0.0 Min. : 0.000
## 1st Qu.:0.000e+00 1st Qu.: 599 1st Qu.: 589.2 1st Qu.: 1.000
## Median :2.323e+07 Median : 1433 Median : 1282.5 Median : 1.000
## Mean :3.603e+09 Mean : 9845 Mean : 4871.5 Mean : 2.037
## 3rd Qu.:1.994e+08 3rd Qu.: 4084 3rd Qu.: 3292.8 3rd Qu.: 2.000
## Max. :8.020e+12 Max. :5740995 Max. :1134570.0 Max. :225.000
##
brazil_valuemap_sf <- st_as_sf(brazil_valuemap)
brazil_valuemap_sf1 <- st_as_sf(brazil_valuemap_1)
In brazil_valuemap_sf, plot 1 is before removal of municipality whereas plot 2 is after removing municipality that consist of empty GDP_capita
plot1 <- tm_shape(brazil_valuemap_sf) +
tm_fill(col = "GDP_CAPITA",
style="quantile")+
tm_borders(alpha = 0.2)
plot2 <- tm_shape(brazil_valuemap_sf1) +
tm_fill(col = "GDP_CAPITA",
style="quantile")+
tm_borders(alpha = 0.2)
tmap_arrange(plot1, plot2)
## Warning: The shape brazil_valuemap_sf is invalid. See sf::st_is_valid
## Warning: The shape brazil_valuemap_sf1 is invalid. See sf::st_is_valid
## Warning: The shape brazil_valuemap_sf is invalid. See sf::st_is_valid
## Warning: The shape brazil_valuemap_sf1 is invalid. See sf::st_is_valid
Based on the choropleth map on the distribution of the GDP_capita, the ligher shade are located at the top part of the choropleth map whereas the darker shades are located from the middle to the bottom of the choropleth map
brazil_valuemap <- as.data.frame(brazil_valuemap_sf1)
By looking at the selected independent variable data sets,there is a column variable that is not in numeric form but rather in categorical form. In order to make full use of the data we would have to conevert it into dummy variables and turn the data set into binary form. We will now create a data set which only consist of the rural_urban values in it.
RURAL_URBAN <- brazil_valuemap_1 %>%
select(RURAL_URBAN)
By using the fast dummy function, we are able to create dummy variable for rural_urban. And all the fields has been converted to binary form
RURAL_URBAN <- fastDummies::dummy_cols(RURAL_URBAN)
RURAL_URBAN <- rename(RURAL_URBAN, Intermediário_Adjacente = `RURAL_URBAN_Intermediário Adjacente`)
RURAL_URBAN <-rename(RURAL_URBAN, Intermediário_Remoto = `RURAL_URBAN_Intermediário Remoto`)
RURAL_URBAN <-rename(RURAL_URBAN, Rural_Adjacente = `RURAL_URBAN_Rural Adjacente`)
RURAL_URBAN <-rename(RURAL_URBAN, Rural_Remoto = `RURAL_URBAN_Rural Remoto`)
RURAL_URBAN <-rename(RURAL_URBAN, Sem_classificação = `RURAL_URBAN_Sem classificação`)
RURAL_URBAN <- dplyr :: select(RURAL_URBAN,3:8)
brazil_valuemap <- subset(brazil_valuemap, select = -c(RURAL_URBAN) )
brazil_valuemap <- cbind(brazil_valuemap, RURAL_URBAN)
brazil_valuemap <- brazil_valuemap[, c(49,1:48,50:55)]
corrplot(cor(brazil_valuemap[, 7:55]), diag = FALSE, order = "AOE",
tl.pos = "td", tl.cex = 0.5, method = "number", type = "upper", number.cex = 0.3)
gdp.mlr <- lm(formula = GDP_CAPITA ~ COMP_A + IBGE_15.59 + COMP_B + COMP_O + Pu_Bank + Pr_Assets + Pu_Assets + IBGE_DU_RURAL + IBGE_CROP_PRODUCTION_. + AREA + IDHM_Longevidade + IDHM_Educacao + Intermediário_Adjacente + Intermediário_Remoto + `Rural_Adjacente` + Rural_Remoto + Sem_classificação + RURAL_URBAN_Urbano, data=brazil_valuemap)
summary(gdp.mlr)
##
## Call:
## lm(formula = GDP_CAPITA ~ COMP_A + IBGE_15.59 + COMP_B + COMP_O +
## Pu_Bank + Pr_Assets + Pu_Assets + IBGE_DU_RURAL + IBGE_CROP_PRODUCTION_. +
## AREA + IDHM_Longevidade + IDHM_Educacao + Intermediário_Adjacente +
## Intermediário_Remoto + Rural_Adjacente + Rural_Remoto + Sem_classificação +
## RURAL_URBAN_Urbano, data = brazil_valuemap)
##
## Residuals:
## Min 1Q Median 3Q Max
## -55528 -7323 -2859 2512 283396
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.752e+04 4.954e+03 -15.646 < 2e-16 ***
## COMP_A -2.506e+00 3.011e+00 -0.833 0.40513
## IBGE_15.59 -8.328e-03 5.419e-03 -1.537 0.12438
## COMP_B -1.098e+01 4.906e+01 -0.224 0.82299
## COMP_O 9.856e+01 7.361e+01 1.339 0.18067
## Pu_Bank 1.343e+03 3.265e+02 4.112 3.97e-05 ***
## Pr_Assets 5.110e-09 1.662e-09 3.075 0.00212 **
## Pu_Assets 3.693e-09 2.621e-09 1.409 0.15899
## IBGE_DU_RURAL -1.184e+00 1.734e-01 -6.828 9.50e-12 ***
## IBGE_CROP_PRODUCTION_. 2.971e-02 1.666e-03 17.831 < 2e-16 ***
## AREA 6.350e-03 4.639e-02 0.137 0.89115
## IDHM_Longevidade 8.473e+04 7.324e+03 11.569 < 2e-16 ***
## IDHM_Educacao 5.209e+04 3.923e+03 13.278 < 2e-16 ***
## Intermediário_Adjacente -1.784e+03 8.501e+02 -2.099 0.03589 *
## Intermediário_Remoto 3.367e+03 2.430e+03 1.386 0.16592
## Rural_Adjacente 4.930e+02 7.404e+02 0.666 0.50552
## Rural_Remoto 1.868e+03 1.238e+03 1.509 0.13130
## Sem_classificação 1.034e+05 9.222e+03 11.209 < 2e-16 ***
## RURAL_URBAN_Urbano NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17350 on 5552 degrees of freedom
## Multiple R-squared: 0.2743, Adjusted R-squared: 0.272
## F-statistic: 123.4 on 17 and 5552 DF, p-value: < 2.2e-16
With reference to the report above, it is clear that not all the indepent variables are statistically significant. We will revised the model by removing those variables which are not statistically significant.
gdp.mlr1 <- lm(formula = GDP_CAPITA ~ Pu_Bank + Pr_Assets + IBGE_DU_RURAL + IBGE_CROP_PRODUCTION_. + IDHM_Longevidade + IDHM_Educacao + Intermediário_Adjacente +Sem_classificação, data=brazil_valuemap)
summary(gdp.mlr1)
##
## Call:
## lm(formula = GDP_CAPITA ~ Pu_Bank + Pr_Assets + IBGE_DU_RURAL +
## IBGE_CROP_PRODUCTION_. + IDHM_Longevidade + IDHM_Educacao +
## Intermediário_Adjacente + Sem_classificação, data = brazil_valuemap)
##
## Residuals:
## Min 1Q Median 3Q Max
## -57152 -7273 -2803 2446 283343
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.627e+04 4.847e+03 -15.735 < 2e-16 ***
## Pu_Bank 1.144e+03 2.753e+02 4.153 3.32e-05 ***
## Pr_Assets 3.391e-09 8.556e-10 3.963 7.48e-05 ***
## IBGE_DU_RURAL -1.151e+00 1.688e-01 -6.815 1.04e-11 ***
## IBGE_CROP_PRODUCTION_. 3.011e-02 1.633e-03 18.442 < 2e-16 ***
## IDHM_Longevidade 8.580e+04 7.258e+03 11.821 < 2e-16 ***
## IDHM_Educacao 4.960e+04 3.638e+03 13.634 < 2e-16 ***
## Intermediário_Adjacente -2.159e+03 7.126e+02 -3.030 0.00246 **
## Sem_classificação 1.023e+05 9.168e+03 11.153 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17350 on 5561 degrees of freedom
## Multiple R-squared: 0.2726, Adjusted R-squared: 0.2716
## F-statistic: 260.6 on 8 and 5561 DF, p-value: < 2.2e-16
The adjusted r value is at 0.273. Which implies that the independent varialbes can only explain 27% of GDP per capita. When there is an increase of 1 unit of Pu_bank, it increases 1.144e+03 reais of GDP per capita
When there is an increase of 1 unit of Pr_assets, it increases 3.391e-09 reais of GDP per capita
When there is an increase of 1 unit of IGBE_DU_RURAL, it increases -1.151e+00 reais of GDP per capita
When there is an increase of 1 unit of IGBE_CROP_PRODUCTION_, it increases 3.011e-02 reais of GDP per capita
When there is an increase of 1 unit of IDHM_Longevidade, it increases 8.580e+04 reais of GDP per capita
When there is an increase of 1 unit of IDHM_Educacao, it increases 4.960e+04 reais of GDP per capita
When the municipality belongs to Intermediário_Adjacente, it decreases -2.159e+03 reais of GDP per capita
When the municipality belongs to Sem_classificação, it increases 1.023e+05 reais of GDP per capita
ols_vif_tol(gdp.mlr1)
## Variables Tolerance VIF
## 1 Pu_Bank 0.6236533 1.603455
## 2 Pr_Assets 0.9549051 1.047224
## 3 IBGE_DU_RURAL 0.6635635 1.507015
## 4 IBGE_CROP_PRODUCTION_. 0.9242048 1.082011
## 5 IDHM_Longevidade 0.3821807 2.616563
## 6 IDHM_Educacao 0.4521191 2.211807
## 7 Intermediário_Adjacente 0.9859876 1.014211
## 8 Sem_classificação 0.7171250 1.394457
Since the VIF of the independent variables are less than 10. We can safely conclude that there are no sign of multicollinearity among the independent variables.
ols_plot_resid_fit(gdp.mlr1)
The figure above reveals that most of the data poitns are scattered around the 0 line, hence we can safely conclude that the relationships between the dependent variable and independent variables are linear.
ols_plot_resid_hist() of olsrr package to perform normality assumption test.
ols_plot_resid_hist(gdp.mlr1)
The figure reveals that the residual of the multiple linear regression model (i.e. condo.mlr1) is resemble normal distribution.
The hedonic model we try to build are using geographically referenced attributes, hence it is also important for us to visual the residual of the hedonic pricing model. First, we will export the residual of the hedonic pricing model and save it as a data frame.
mlr.output <- as.data.frame(gdp.mlr1$residuals)
Next, we will join the newly created data frame with brazil_valuemap.res object.
brazil_valuemap.res <- cbind(brazil_valuemap,
mlr.output) %>%
rename(`MLR_RES` = `gdp.mlr1$residuals`)
class(brazil_valuemap.res)
## [1] "data.frame"
brazil_valuemap.res.sf <- st_as_sf(brazil_valuemap.res)
gdp.sp <- as_Spatial(brazil_valuemap.res.sf)
tm_shape(gdp.sp) +
tm_fill(col = "MLR_RES",
style="quantile")+
tm_borders(alpha = 0.2)
## Warning: The shape gdp.sp is invalid. See sf::st_is_valid
## Variable "MLR_RES" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
Based on the choropleth map, there are signs of spatial autocorrelation across the map
When converting to data frame, it will automatically drop the geometry column.
gdp <- as.data.frame(gdp.sp)
Create a new data frame, consisting of three columns, city, state, geometry as it is in sf format. Converting to lower case
brazil_sf_filter <- brazil_sf_filter %>%
select(CITY, STATE) %>%
mutate_at(.vars = vars(CITY), .funs = funs(tolower))
br_gdp <- left_join(gdp, brazil_sf_filter, by = c("name_muni" = "CITY", "abbrev_state" = "STATE"))
## Warning: Column `abbrev_state`/`STATE` joining character vector and factor,
## coercing into character vector
Consists of geomtry points of municipality and convert into spatial point data frame
br_gdp.sp <- st_as_sf(br_gdp)
br_gdp.sp <- as_Spatial(br_gdp.sp)
To proof that our observation is indeed true, the Moran’s I test will be performed
First, we will compute the distance-based weight matrix by using dnearneigh() function of spdep.
nb <- dnearneigh(coordinates(br_gdp.sp), 0, 400000, longlat = FALSE)
summary(nb)
## Neighbour list object:
## Number of regions: 5570
## Number of nonzero links: 3381502
## Percentage nonzero weights: 10.89932
## Average number of links: 607.0919
## Link number distribution:
##
## 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## 5 3 4 4 12 6 4 6 3 5 5 7 3 2 2 4
## 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
## 2 3 1 2 4 3 6 1 5 4 6 3 6 3 5 7
## 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
## 3 5 2 4 2 3 3 6 3 4 6 4 5 3 5 6
## 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 74
## 7 7 7 5 2 4 3 5 7 1 4 1 2 1 2 3
## 75 76 77 78 79 80 81 82 83 84 85 86 87 88 90 91
## 3 1 1 3 2 6 4 2 1 2 3 2 3 2 2 5
## 92 93 94 95 96 98 99 100 102 103 104 105 107 108 109 110
## 2 2 2 4 1 1 4 4 3 2 3 3 2 2 1 3
## 111 112 113 116 117 118 119 122 123 124 125 126 127 128 129 131
## 5 1 1 1 4 2 1 2 1 5 3 1 4 4 4 2
## 132 133 134 135 137 138 139 140 141 142 143 145 146 147 148 149
## 2 4 1 3 2 2 3 1 2 2 3 1 4 2 2 2
## 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
## 7 4 1 3 2 4 4 5 1 2 4 1 6 3 3 7
## 166 167 168 170 171 172 173 174 175 176 177 178 179 180 181 182
## 5 3 12 2 3 7 8 6 8 3 4 4 6 7 4 2
## 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## 4 3 3 5 4 4 5 5 1 4 3 2 6 2 1 2
## 199 200 201 202 204 205 206 207 208 209 210 211 212 213 215 216
## 2 5 1 3 5 5 3 2 3 3 2 5 4 2 2 2
## 217 218 219 220 221 222 224 225 227 228 229 230 231 232 233 234
## 1 2 2 3 2 2 3 6 6 3 2 2 2 2 5 5
## 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
## 2 2 8 3 1 4 5 5 4 5 3 3 4 1 1 4
## 251 252 253 254 255 256 257 259 260 261 262 263 265 266 267 268
## 4 3 2 5 5 3 9 4 3 2 8 3 5 2 4 1
## 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
## 4 8 5 6 6 1 5 6 3 3 3 5 5 4 9 5
## 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
## 4 4 2 4 1 3 5 7 8 8 5 3 3 4 8 6
## 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
## 7 2 2 2 3 4 9 6 7 5 4 8 5 6 4 4
## 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
## 3 8 8 8 6 5 1 5 1 2 3 8 3 5 2 4
## 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
## 3 6 4 7 4 3 7 7 4 6 5 5 2 6 8 4
## 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
## 4 7 6 9 4 4 5 4 5 4 4 4 3 6 2 7
## 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
## 3 8 2 10 5 7 5 3 4 4 5 4 6 7 6 4
## 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
## 2 2 8 7 4 3 3 4 5 9 4 6 5 5 4 5
## 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412
## 9 5 5 7 6 7 2 5 8 9 7 8 9 7 9 9
## 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428
## 7 10 14 6 13 4 4 5 7 4 8 6 4 6 7 7
## 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444
## 5 9 3 4 7 4 8 5 6 6 6 5 6 7 4 5
## 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
## 3 4 3 6 3 2 6 2 3 5 10 10 6 5 3 3
## 461 462 463 464 465 466 467 468 469 470 471 472 473 475 476 477
## 7 3 5 7 3 4 6 4 5 3 4 4 3 5 2 1
## 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493
## 3 1 8 3 3 4 2 2 5 4 3 2 2 2 4 3
## 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509
## 5 7 3 4 2 5 1 1 1 7 9 4 2 6 2 4
## 512 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
## 3 1 3 1 3 3 3 3 3 4 1 2 2 6 5 2
## 530 531 533 534 535 536 537 538 539 540 541 542 543 544 545 546
## 1 3 1 4 8 4 2 2 5 1 7 3 1 4 3 1
## 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562
## 2 4 3 2 4 7 3 5 6 7 6 5 5 6 8 5
## 563 564 565 566 567 568 569 570 571 572 573 574 575 576 578 579
## 4 6 1 9 3 2 8 7 2 4 4 4 1 5 8 1
## 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595
## 5 3 6 3 2 8 2 1 3 3 7 4 5 6 7 3
## 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611
## 2 3 6 9 6 6 4 8 7 3 7 2 6 5 9 6
## 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627
## 3 3 6 8 10 5 6 11 2 5 8 6 12 7 9 8
## 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
## 9 6 4 9 11 7 5 5 11 13 9 6 7 4 6 3
## 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659
## 10 5 4 6 6 3 11 7 5 10 4 9 7 2 9 7
## 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675
## 3 7 6 6 3 5 9 6 5 11 11 5 5 10 5 11
## 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691
## 6 5 7 11 6 10 11 5 7 9 4 10 5 7 11 7
## 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707
## 10 9 7 6 7 4 11 9 6 6 5 8 10 7 5 9
## 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723
## 13 6 9 6 2 9 8 7 9 8 6 8 8 7 9 6
## 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739
## 8 14 3 14 7 9 8 5 4 9 11 10 14 7 5 4
## 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755
## 7 11 11 5 10 11 9 9 10 12 6 11 8 8 6 3
## 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771
## 7 8 5 12 11 11 10 10 9 9 8 8 11 5 14 11
## 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787
## 8 7 9 11 8 5 9 11 10 9 11 11 11 9 10 12
## 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803
## 12 6 10 8 12 6 11 6 10 8 7 8 10 12 10 16
## 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819
## 11 13 10 7 8 9 6 8 7 6 8 11 13 10 3 13
## 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835
## 16 6 14 7 5 10 7 4 9 10 8 8 9 10 2 10
## 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851
## 6 5 6 9 9 7 10 9 6 8 8 10 8 2 8 9
## 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867
## 12 5 11 8 7 8 7 8 4 6 3 3 8 7 6 4
## 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883
## 2 17 1 7 3 7 7 8 8 11 7 4 5 6 11 9
## 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899
## 8 7 5 5 3 5 6 8 5 7 5 6 2 8 7 8
## 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915
## 6 7 5 5 6 7 8 2 12 10 3 10 5 5 9 6
## 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931
## 10 10 4 9 6 6 12 10 9 4 8 5 6 4 2 5
## 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947
## 7 9 7 7 9 8 2 4 6 7 8 4 4 7 11 5
## 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963
## 5 9 7 3 7 5 1 8 6 7 8 3 6 12 7 7
## 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979
## 9 3 7 3 7 3 3 9 4 6 8 5 8 5 8 5
## 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995
## 7 10 5 6 7 8 4 3 4 8 4 5 9 4 5 7
## 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011
## 1 7 11 5 4 2 9 8 2 6 8 11 9 9 6 9
## 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027
## 12 11 14 7 15 15 3 11 12 10 10 9 6 8 3 2
## 1028 1029 1030 1031 1033 1034 1035 1036 1038 1039 1041 1043 1044 1045 1051 1052
## 3 8 8 2 3 3 3 1 2 2 1 2 1 1 2 1
## 1053 1062 1069 1075 1077 1080 1082 1092
## 2 1 4 1 1 1 1 1
## 5 least connected regions:
## 124 126 147 306 1526 with 10 links
## 1 most connected region:
## 3947 with 1092 links
nb_lw <- nb2listw(nb, style = 'W')
summary(nb_lw)
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 5570
## Number of nonzero links: 3381502
## Percentage nonzero weights: 10.89932
## Average number of links: 607.0919
## Link number distribution:
##
## 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## 5 3 4 4 12 6 4 6 3 5 5 7 3 2 2 4
## 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
## 2 3 1 2 4 3 6 1 5 4 6 3 6 3 5 7
## 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
## 3 5 2 4 2 3 3 6 3 4 6 4 5 3 5 6
## 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 74
## 7 7 7 5 2 4 3 5 7 1 4 1 2 1 2 3
## 75 76 77 78 79 80 81 82 83 84 85 86 87 88 90 91
## 3 1 1 3 2 6 4 2 1 2 3 2 3 2 2 5
## 92 93 94 95 96 98 99 100 102 103 104 105 107 108 109 110
## 2 2 2 4 1 1 4 4 3 2 3 3 2 2 1 3
## 111 112 113 116 117 118 119 122 123 124 125 126 127 128 129 131
## 5 1 1 1 4 2 1 2 1 5 3 1 4 4 4 2
## 132 133 134 135 137 138 139 140 141 142 143 145 146 147 148 149
## 2 4 1 3 2 2 3 1 2 2 3 1 4 2 2 2
## 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
## 7 4 1 3 2 4 4 5 1 2 4 1 6 3 3 7
## 166 167 168 170 171 172 173 174 175 176 177 178 179 180 181 182
## 5 3 12 2 3 7 8 6 8 3 4 4 6 7 4 2
## 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## 4 3 3 5 4 4 5 5 1 4 3 2 6 2 1 2
## 199 200 201 202 204 205 206 207 208 209 210 211 212 213 215 216
## 2 5 1 3 5 5 3 2 3 3 2 5 4 2 2 2
## 217 218 219 220 221 222 224 225 227 228 229 230 231 232 233 234
## 1 2 2 3 2 2 3 6 6 3 2 2 2 2 5 5
## 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
## 2 2 8 3 1 4 5 5 4 5 3 3 4 1 1 4
## 251 252 253 254 255 256 257 259 260 261 262 263 265 266 267 268
## 4 3 2 5 5 3 9 4 3 2 8 3 5 2 4 1
## 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
## 4 8 5 6 6 1 5 6 3 3 3 5 5 4 9 5
## 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
## 4 4 2 4 1 3 5 7 8 8 5 3 3 4 8 6
## 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
## 7 2 2 2 3 4 9 6 7 5 4 8 5 6 4 4
## 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
## 3 8 8 8 6 5 1 5 1 2 3 8 3 5 2 4
## 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
## 3 6 4 7 4 3 7 7 4 6 5 5 2 6 8 4
## 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
## 4 7 6 9 4 4 5 4 5 4 4 4 3 6 2 7
## 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
## 3 8 2 10 5 7 5 3 4 4 5 4 6 7 6 4
## 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
## 2 2 8 7 4 3 3 4 5 9 4 6 5 5 4 5
## 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412
## 9 5 5 7 6 7 2 5 8 9 7 8 9 7 9 9
## 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428
## 7 10 14 6 13 4 4 5 7 4 8 6 4 6 7 7
## 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444
## 5 9 3 4 7 4 8 5 6 6 6 5 6 7 4 5
## 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
## 3 4 3 6 3 2 6 2 3 5 10 10 6 5 3 3
## 461 462 463 464 465 466 467 468 469 470 471 472 473 475 476 477
## 7 3 5 7 3 4 6 4 5 3 4 4 3 5 2 1
## 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493
## 3 1 8 3 3 4 2 2 5 4 3 2 2 2 4 3
## 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509
## 5 7 3 4 2 5 1 1 1 7 9 4 2 6 2 4
## 512 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
## 3 1 3 1 3 3 3 3 3 4 1 2 2 6 5 2
## 530 531 533 534 535 536 537 538 539 540 541 542 543 544 545 546
## 1 3 1 4 8 4 2 2 5 1 7 3 1 4 3 1
## 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562
## 2 4 3 2 4 7 3 5 6 7 6 5 5 6 8 5
## 563 564 565 566 567 568 569 570 571 572 573 574 575 576 578 579
## 4 6 1 9 3 2 8 7 2 4 4 4 1 5 8 1
## 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595
## 5 3 6 3 2 8 2 1 3 3 7 4 5 6 7 3
## 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611
## 2 3 6 9 6 6 4 8 7 3 7 2 6 5 9 6
## 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627
## 3 3 6 8 10 5 6 11 2 5 8 6 12 7 9 8
## 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
## 9 6 4 9 11 7 5 5 11 13 9 6 7 4 6 3
## 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659
## 10 5 4 6 6 3 11 7 5 10 4 9 7 2 9 7
## 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675
## 3 7 6 6 3 5 9 6 5 11 11 5 5 10 5 11
## 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691
## 6 5 7 11 6 10 11 5 7 9 4 10 5 7 11 7
## 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707
## 10 9 7 6 7 4 11 9 6 6 5 8 10 7 5 9
## 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723
## 13 6 9 6 2 9 8 7 9 8 6 8 8 7 9 6
## 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739
## 8 14 3 14 7 9 8 5 4 9 11 10 14 7 5 4
## 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755
## 7 11 11 5 10 11 9 9 10 12 6 11 8 8 6 3
## 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771
## 7 8 5 12 11 11 10 10 9 9 8 8 11 5 14 11
## 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787
## 8 7 9 11 8 5 9 11 10 9 11 11 11 9 10 12
## 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803
## 12 6 10 8 12 6 11 6 10 8 7 8 10 12 10 16
## 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819
## 11 13 10 7 8 9 6 8 7 6 8 11 13 10 3 13
## 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835
## 16 6 14 7 5 10 7 4 9 10 8 8 9 10 2 10
## 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851
## 6 5 6 9 9 7 10 9 6 8 8 10 8 2 8 9
## 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867
## 12 5 11 8 7 8 7 8 4 6 3 3 8 7 6 4
## 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883
## 2 17 1 7 3 7 7 8 8 11 7 4 5 6 11 9
## 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899
## 8 7 5 5 3 5 6 8 5 7 5 6 2 8 7 8
## 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915
## 6 7 5 5 6 7 8 2 12 10 3 10 5 5 9 6
## 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931
## 10 10 4 9 6 6 12 10 9 4 8 5 6 4 2 5
## 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947
## 7 9 7 7 9 8 2 4 6 7 8 4 4 7 11 5
## 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963
## 5 9 7 3 7 5 1 8 6 7 8 3 6 12 7 7
## 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979
## 9 3 7 3 7 3 3 9 4 6 8 5 8 5 8 5
## 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995
## 7 10 5 6 7 8 4 3 4 8 4 5 9 4 5 7
## 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011
## 1 7 11 5 4 2 9 8 2 6 8 11 9 9 6 9
## 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027
## 12 11 14 7 15 15 3 11 12 10 10 9 6 8 3 2
## 1028 1029 1030 1031 1033 1034 1035 1036 1038 1039 1041 1043 1044 1045 1051 1052
## 3 8 8 2 3 3 3 1 2 2 1 2 1 1 2 1
## 1053 1062 1069 1075 1077 1080 1082 1092
## 2 1 4 1 1 1 1 1
## 5 least connected regions:
## 124 126 147 306 1526 with 10 links
## 1 most connected region:
## 3947 with 1092 links
##
## Weights style: W
## Weights constants summary:
## n nn S0 S1 S2
## W 5570 31024900 5570 38.90204 22403.03
lm.morantest(gdp.mlr1, nb_lw)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = GDP_CAPITA ~ Pu_Bank + Pr_Assets + IBGE_DU_RURAL +
## IBGE_CROP_PRODUCTION_. + IDHM_Longevidade + IDHM_Educacao +
## Intermediário_Adjacente + Sem_classificação, data = brazil_valuemap)
## weights: nb_lw
##
## Moran I statistic standard deviate = 24.937, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 2.576960e-02 -3.672959e-04 1.098585e-06
The Global Moran’s I test for residual spatial autocorrelation shows that it’s p-value is less than 0.00000000000000022 which is less than the alpha value of 0.05. Hence, we will reject the null hypothesis that the residuals are randomly distributed.
Since the Observed Global Moran I = 0.02576960 which is greater than 0, we can infer than the residuals resemble cluster distribution.
Hypothesis is:
H0 : The residuals are randomly distributed
H1 : The residuals are not randomly distributed
bw.fixed <- bw.gwr(formula = GDP_CAPITA ~ Pu_Bank + Pr_Assets + IBGE_DU_RURAL + IBGE_CROP_PRODUCTION_. + IDHM_Longevidade + IDHM_Educacao + Intermediário_Adjacente +Sem_classificação, data= br_gdp.sp, approach="CV", kernel="gaussian", adaptive=FALSE, longlat=FALSE)
## Take a cup of tea and have a break, it will take a few minutes.
## -----A kind suggestion from GWmodel development group
## Fixed bandwidth: 3813094 CV score: 1.707958e+12
## Fixed bandwidth: 2357093 CV score: 1.702272e+12
## Fixed bandwidth: 1457235 CV score: 1.690388e+12
## Fixed bandwidth: 901092 CV score: 1.67108e+12
## Fixed bandwidth: 557376.8 CV score: 1.651448e+12
## Fixed bandwidth: 344949.1 CV score: 1.637433e+12
## Fixed bandwidth: 213661.5 CV score: 1.6762e+12
## Fixed bandwidth: 426089.2 CV score: 1.644207e+12
## Fixed bandwidth: 294801.7 CV score: 1.632855e+12
## Fixed bandwidth: 263808.9 CV score: 1.632973e+12
## Fixed bandwidth: 313956.3 CV score: 1.634283e+12
## Fixed bandwidth: 282963.5 CV score: 1.632443e+12
## Fixed bandwidth: 275647.1 CV score: 1.632434e+12
## Fixed bandwidth: 271125.3 CV score: 1.632547e+12
## Fixed bandwidth: 278441.7 CV score: 1.632412e+12
## Fixed bandwidth: 280168.9 CV score: 1.632415e+12
## Fixed bandwidth: 277374.2 CV score: 1.632417e+12
## Fixed bandwidth: 279101.4 CV score: 1.632412e+12
## Fixed bandwidth: 279509.1 CV score: 1.632412e+12
## Fixed bandwidth: 278849.4 CV score: 1.632412e+12
## Fixed bandwidth: 278693.7 CV score: 1.632412e+12
## Fixed bandwidth: 278945.7 CV score: 1.632412e+12
## Fixed bandwidth: 279005.2 CV score: 1.632412e+12
## Fixed bandwidth: 278908.9 CV score: 1.632412e+12
## Fixed bandwidth: 278968.4 CV score: 1.632412e+12
## Fixed bandwidth: 278931.6 CV score: 1.632412e+12
## Fixed bandwidth: 278954.4 CV score: 1.632412e+12
## Fixed bandwidth: 278959.7 CV score: 1.632412e+12
## Fixed bandwidth: 278951 CV score: 1.632412e+12
## Fixed bandwidth: 278956.4 CV score: 1.632412e+12
## Fixed bandwidth: 278953.1 CV score: 1.632412e+12
## Fixed bandwidth: 278955.1 CV score: 1.632412e+12
## Fixed bandwidth: 278953.9 CV score: 1.632412e+12
## Fixed bandwidth: 278953.6 CV score: 1.632412e+12
## Fixed bandwidth: 278954.1 CV score: 1.632412e+12
## Fixed bandwidth: 278953.8 CV score: 1.632412e+12
## Fixed bandwidth: 278953.9 CV score: 1.632412e+12
## Fixed bandwidth: 278953.8 CV score: 1.632412e+12
## Fixed bandwidth: 278953.9 CV score: 1.632412e+12
## Fixed bandwidth: 278953.9 CV score: 1.632412e+12
## Fixed bandwidth: 278953.9 CV score: 1.632412e+12
## Fixed bandwidth: 278953.9 CV score: 1.632412e+12
## Fixed bandwidth: 278953.9 CV score: 1.632412e+12
## Fixed bandwidth: 278953.9 CV score: 1.632412e+12
## Fixed bandwidth: 278953.9 CV score: 1.632412e+12
## Fixed bandwidth: 278953.9 CV score: 1.632412e+12
## Fixed bandwidth: 278953.9 CV score: 1.632412e+12
## Fixed bandwidth: 278953.9 CV score: 1.632412e+12
## Fixed bandwidth: 278953.9 CV score: 1.632412e+12
## Fixed bandwidth: 278953.9 CV score: 1.632412e+12
## Fixed bandwidth: 278953.9 CV score: 1.632412e+12
## Fixed bandwidth: 278953.9 CV score: 1.632412e+12
## Fixed bandwidth: 278953.9 CV score: 1.632412e+12
The result shows that the recommended bandwidth is 278953.9 metres
gwr.fixed <- gwr.basic(formula = GDP_CAPITA ~ Pu_Bank + Pr_Assets + IBGE_DU_RURAL + IBGE_CROP_PRODUCTION_. + IDHM_Longevidade + IDHM_Educacao + Intermediário_Adjacente +Sem_classificação, data= br_gdp.sp, bw=bw.fixed, kernel = 'gaussian', longlat = FALSE)
The output is saved in a list of class “gwrm”. The code below can be used to display the model output.
gwr.fixed
## ***********************************************************************
## * Package GWmodel *
## ***********************************************************************
## Program starts at: 2020-06-01 01:46:12
## Call:
## gwr.basic(formula = GDP_CAPITA ~ Pu_Bank + Pr_Assets + IBGE_DU_RURAL +
## IBGE_CROP_PRODUCTION_. + IDHM_Longevidade + IDHM_Educacao +
## Intermediário_Adjacente + Sem_classificação, data = br_gdp.sp,
## bw = bw.fixed, kernel = "gaussian", longlat = FALSE)
##
## Dependent (y) variable: GDP_CAPITA
## Independent variables: Pu_Bank Pr_Assets IBGE_DU_RURAL IBGE_CROP_PRODUCTION_. IDHM_Longevidade IDHM_Educacao Intermediário_Adjacente Sem_classificação
## Number of data points: 5570
## ***********************************************************************
## * Results of Global Regression *
## ***********************************************************************
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -57152 -7273 -2803 2446 283343
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.627e+04 4.847e+03 -15.735 < 2e-16 ***
## Pu_Bank 1.144e+03 2.753e+02 4.153 3.32e-05 ***
## Pr_Assets 3.391e-09 8.556e-10 3.963 7.48e-05 ***
## IBGE_DU_RURAL -1.151e+00 1.688e-01 -6.815 1.04e-11 ***
## IBGE_CROP_PRODUCTION_. 3.011e-02 1.633e-03 18.442 < 2e-16 ***
## IDHM_Longevidade 8.580e+04 7.258e+03 11.821 < 2e-16 ***
## IDHM_Educacao 4.960e+04 3.638e+03 13.634 < 2e-16 ***
## Intermediário_Adjacente -2.159e+03 7.126e+02 -3.030 0.00246 **
## Sem_classificação 1.023e+05 9.168e+03 11.153 < 2e-16 ***
##
## ---Significance stars
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Residual standard error: 17350 on 5561 degrees of freedom
## Multiple R-squared: 0.2726
## Adjusted R-squared: 0.2716
## F-statistic: 260.6 on 8 and 5561 DF, p-value: < 2.2e-16
## ***Extra Diagnostic information
## Residual sum of squares: 1.674621e+12
## Sigma(hat): 17342.38
## AIC: 124561.5
## AICc: 124561.5
## ***********************************************************************
## * Results of Geographically Weighted Regression *
## ***********************************************************************
##
## *********************Model calibration information*********************
## Kernel function: gaussian
## Fixed bandwidth: 278953.9
## Regression points: the same locations as observations are used.
## Distance metric: Euclidean distance metric is used.
##
## ****************Summary of GWR coefficient estimates:******************
## Min. 1st Qu. Median 3rd Qu.
## Intercept -1.6429e+05 -6.6110e+04 -5.5332e+04 -1.0863e+04
## Pu_Bank -2.1997e+03 7.5391e+02 1.4644e+03 2.2272e+03
## Pr_Assets -9.5155e-07 3.2438e-09 4.5363e-09 1.2673e-07
## IBGE_DU_RURAL -7.0772e+00 -1.4395e+00 -8.9156e-01 -5.8992e-01
## IBGE_CROP_PRODUCTION_. -2.1804e-03 1.7176e-02 2.2388e-02 2.9993e-02
## IDHM_Longevidade -3.4239e+03 1.7678e+04 6.0431e+04 8.1058e+04
## IDHM_Educacao -1.6838e+04 1.5331e+04 3.4725e+04 5.6234e+04
## Intermediário_Adjacente -7.9035e+03 -2.6510e+03 -1.9195e+03 -8.3240e+02
## Sem_classificação -9.6496e+03 5.4642e+04 8.4945e+04 1.1760e+05
## Max.
## Intercept 1.3148e+04
## Pu_Bank 4.0008e+03
## Pr_Assets 0.0000e+00
## IBGE_DU_RURAL 8.8840e-01
## IBGE_CROP_PRODUCTION_. 7.5800e-02
## IDHM_Longevidade 2.2149e+05
## IDHM_Educacao 8.3895e+04
## Intermediário_Adjacente 1.1537e+04
## Sem_classificação 2.3099e+05
## ************************Diagnostic information*************************
## Number of data points: 5570
## Effective number of parameters (2trace(S) - trace(S'S)): 176.9071
## Effective degrees of freedom (n-2trace(S) + trace(S'S)): 5393.093
## AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 124297.7
## AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 124159.8
## Residual sum of squares: 1.527761e+12
## R-square value: 0.3364246
## Adjusted R-square value: 0.3146536
##
## ***********************************************************************
## Program stops at: 2020-06-01 01:46:25
bw.adaptive <- bw.gwr(formula = GDP_CAPITA ~ Pu_Bank + Pr_Assets + IBGE_DU_RURAL + IBGE_CROP_PRODUCTION_. + IDHM_Longevidade + IDHM_Educacao + Intermediário_Adjacente +Sem_classificação, data= br_gdp.sp, approach="CV", kernel="gaussian",
adaptive=TRUE, longlat=FALSE, dMat = gw.dist(dp.locat = coordinates(br_gdp.sp))
)
## Take a cup of tea and have a break, it will take a few minutes.
## -----A kind suggestion from GWmodel development group
## Adaptive bandwidth: 3450 CV score: 1.694039e+12
## Adaptive bandwidth: 2140 CV score: 1.678698e+12
## Adaptive bandwidth: 1330 CV score: 1.657367e+12
## Adaptive bandwidth: 829 CV score: 1.644962e+12
## Adaptive bandwidth: 520 CV score: 1.633098e+12
## Adaptive bandwidth: 328 CV score: 1.623361e+12
## Adaptive bandwidth: 210 CV score: 1.625011e+12
## Adaptive bandwidth: 401 CV score: 1.626733e+12
## Adaptive bandwidth: 282 CV score: 1.623024e+12
## Adaptive bandwidth: 254 CV score: 1.623645e+12
## Adaptive bandwidth: 299 CV score: 1.623059e+12
## Adaptive bandwidth: 271 CV score: 1.6234e+12
## Adaptive bandwidth: 288 CV score: 1.62317e+12
## Adaptive bandwidth: 277 CV score: 1.62349e+12
## Adaptive bandwidth: 284 CV score: 1.623136e+12
## Adaptive bandwidth: 279 CV score: 1.623381e+12
## Adaptive bandwidth: 282 CV score: 1.623024e+12
The result shows that the 282 is the recommended data points to be used.
gwr.adaptive <- gwr.basic(formula = GDP_CAPITA ~ Pu_Bank + Pr_Assets + IBGE_DU_RURAL + IBGE_CROP_PRODUCTION_. + IDHM_Longevidade + IDHM_Educacao + Intermediário_Adjacente +Sem_classificação, data= br_gdp.sp, bw=bw.adaptive, kernel = 'gaussian', adaptive=TRUE, longlat = FALSE)
gwr.adaptive
## ***********************************************************************
## * Package GWmodel *
## ***********************************************************************
## Program starts at: 2020-06-01 01:48:36
## Call:
## gwr.basic(formula = GDP_CAPITA ~ Pu_Bank + Pr_Assets + IBGE_DU_RURAL +
## IBGE_CROP_PRODUCTION_. + IDHM_Longevidade + IDHM_Educacao +
## Intermediário_Adjacente + Sem_classificação, data = br_gdp.sp,
## bw = bw.adaptive, kernel = "gaussian", adaptive = TRUE, longlat = FALSE)
##
## Dependent (y) variable: GDP_CAPITA
## Independent variables: Pu_Bank Pr_Assets IBGE_DU_RURAL IBGE_CROP_PRODUCTION_. IDHM_Longevidade IDHM_Educacao Intermediário_Adjacente Sem_classificação
## Number of data points: 5570
## ***********************************************************************
## * Results of Global Regression *
## ***********************************************************************
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -57152 -7273 -2803 2446 283343
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.627e+04 4.847e+03 -15.735 < 2e-16 ***
## Pu_Bank 1.144e+03 2.753e+02 4.153 3.32e-05 ***
## Pr_Assets 3.391e-09 8.556e-10 3.963 7.48e-05 ***
## IBGE_DU_RURAL -1.151e+00 1.688e-01 -6.815 1.04e-11 ***
## IBGE_CROP_PRODUCTION_. 3.011e-02 1.633e-03 18.442 < 2e-16 ***
## IDHM_Longevidade 8.580e+04 7.258e+03 11.821 < 2e-16 ***
## IDHM_Educacao 4.960e+04 3.638e+03 13.634 < 2e-16 ***
## Intermediário_Adjacente -2.159e+03 7.126e+02 -3.030 0.00246 **
## Sem_classificação 1.023e+05 9.168e+03 11.153 < 2e-16 ***
##
## ---Significance stars
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Residual standard error: 17350 on 5561 degrees of freedom
## Multiple R-squared: 0.2726
## Adjusted R-squared: 0.2716
## F-statistic: 260.6 on 8 and 5561 DF, p-value: < 2.2e-16
## ***Extra Diagnostic information
## Residual sum of squares: 1.674621e+12
## Sigma(hat): 17342.38
## AIC: 124561.5
## AICc: 124561.5
## ***********************************************************************
## * Results of Geographically Weighted Regression *
## ***********************************************************************
##
## *********************Model calibration information*********************
## Kernel function: gaussian
## Adaptive bandwidth: 282 (number of nearest neighbours)
## Regression points: the same locations as observations are used.
## Distance metric: Euclidean distance metric is used.
##
## ****************Summary of GWR coefficient estimates:******************
## Min. 1st Qu. Median 3rd Qu.
## Intercept -1.0795e+05 -7.1517e+04 -5.2314e+04 -1.2104e+04
## Pu_Bank -1.6477e+03 5.2667e+02 1.3530e+03 2.1932e+03
## Pr_Assets -5.0407e-08 3.4879e-09 4.3324e-09 2.9855e-08
## IBGE_DU_RURAL -4.3035e+00 -1.4747e+00 -8.2904e-01 -5.9725e-01
## IBGE_CROP_PRODUCTION_. -2.2955e-02 1.6534e-02 2.4066e-02 3.2382e-02
## IDHM_Longevidade -1.9890e+04 1.7301e+04 5.9419e+04 8.4781e+04
## IDHM_Educacao 4.2785e+01 1.7253e+04 4.0043e+04 5.6818e+04
## Intermediário_Adjacente -5.9038e+03 -2.6448e+03 -1.8566e+03 -1.0095e+03
## Sem_classificação 5.0026e+03 5.6872e+04 8.9566e+04 1.2350e+05
## Max.
## Intercept 2387.1860
## Pu_Bank 4690.7946
## Pr_Assets 0.0000
## IBGE_DU_RURAL 0.3513
## IBGE_CROP_PRODUCTION_. 0.0765
## IDHM_Longevidade 115856.7073
## IDHM_Educacao 106665.2836
## Intermediário_Adjacente -49.9717
## Sem_classificação 187964.4826
## ************************Diagnostic information*************************
## Number of data points: 5570
## Effective number of parameters (2trace(S) - trace(S'S)): 109.5094
## Effective degrees of freedom (n-2trace(S) + trace(S'S)): 5460.491
## AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 124193.5
## AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 124110.6
## Residual sum of squares: 1.528231e+12
## R-square value: 0.3362204
## Adjusted R-square value: 0.322906
##
## ***********************************************************************
## Program stops at: 2020-06-01 01:48:55
Adjusted r square " 0.322906. Location is just 32% of Based on the geographically weighted regression, the adjusted r square of 0.322906.Means that the above independent variables and location of the municipality explains 32% of the GDP per capita which is higher than the multi linear regresion model. therefpre we can safely conclude that the location of the municipality also plays a part in affecting the GDP per capita.
gdp.sf.adaptive <- st_as_sf(gwr.adaptive$SDF) %>%
st_transform(crs=5530)
Preparation for local R2
gwr.adaptive.output <- as.data.frame(gdp.sf.adaptive)
gwr.adaptive.output = subset(gwr.adaptive.output, select = -c(geometry) )
brazil_valuemap.sf.adaptive <- cbind(brazil_valuemap.res.sf, as.matrix(gwr.adaptive.output))
tm_shape(brazil_valuemap.sf.adaptive) +
tm_fill(col = "Local_R2",
style="quantile")+
tm_borders(alpha = 0.1)
## Warning: The shape brazil_valuemap.sf.adaptive is invalid. See sf::st_is_valid
Local R2 is high in less denser population.
As observed from the graph above, local R2 is highest around the mountainous region in the North East region. In denser population areas , local R2 is 0.6 - 0.8. This could be due to certain factors in the populated areas not being considered during the regression calculation. Also, it can be further supported by the low r square vales when using multi linear regression.