In this take home assignment, I will need to calibrate the explanatory model by using conventional least regression and geographically weighted regression method, then describe the analysis results.
The R packages needed for this take home exercise are as follows:
Spatial data handling +sf and spdep Geospatial analysis package +ClustGeo Attribute data handling +tidyverse, especially readr, and dplyr Choropleth mapping +tmap ggplot2-based graphs +ggpubr Geospatial statistical modelling +GWmodel *Access to official spatial data sets of Brazil +Geobr
packages <- c('rgdal', 'spdep', 'raster', 'ClustGeo', 'tmap', 'sf', 'ggpubr', 'cluster', 'heatmaply', 'corrplot', 'tidyverse', 'cleangeo', 'GWmodel', 'geobr', 'olsrr')
for(p in packages){
if(!require(p, character.only=T)){
install.packages(p)
}
library(p,character.only=T)
}
municipality <- read_csv("data/aspatial/BRAZIL_CITIES.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## CITY = col_character(),
## STATE = col_character(),
## REGIAO_TUR = col_character(),
## CATEGORIA_TUR = col_character(),
## RURAL_URBAN = col_character(),
## GVA_MAIN = col_character()
## )
## See spec(...) for full column specifications.
brazil_sp <- read_municipality(year=2016)
## Using year 2016
## Loading data for the whole country. This might take a few minutes.
##
|
| | 0%
|
|=== | 4%
|
|===== | 7%
|
|======== | 11%
|
|========== | 15%
|
|============= | 19%
|
|================ | 22%
|
|================== | 26%
|
|===================== | 30%
|
|======================= | 33%
|
|========================== | 37%
|
|============================= | 41%
|
|=============================== | 44%
|
|================================== | 48%
|
|==================================== | 52%
|
|======================================= | 56%
|
|========================================= | 59%
|
|============================================ | 63%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|==================================================== | 74%
|
|====================================================== | 78%
|
|========================================================= | 81%
|
|============================================================ | 85%
|
|============================================================== | 89%
|
|================================================================= | 93%
|
|=================================================================== | 96%
|
|======================================================================| 100%
glimpse(municipality)
## Rows: 5,571
## Columns: 81
## $ CITY <chr> "Abadia De Goiás", "Abadia Dos Dourados", ...
## $ STATE <chr> "GO", "MG", "GO", "MG", "PA", "CE", "BA", ...
## $ CAPITAL <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ IBGE_RES_POP <dbl> 6876, 6704, 15757, 22690, 141100, 10496, 8...
## $ IBGE_RES_POP_BRAS <dbl> 6876, 6704, 15609, 22690, 141040, 10496, 8...
## $ IBGE_RES_POP_ESTR <dbl> 0, 0, 148, 0, 60, 0, 0, 0, 0, 0, 0, 16, 17...
## $ IBGE_DU <dbl> 2137, 2328, 4655, 7694, 31061, 2791, 2572,...
## $ IBGE_DU_URBAN <dbl> 1546, 1481, 3233, 6667, 19057, 1251, 1193,...
## $ IBGE_DU_RURAL <dbl> 591, 847, 1422, 1027, 12004, 1540, 1379, 1...
## $ IBGE_POP <dbl> 5300, 4154, 10656, 18464, 82956, 4538, 372...
## $ IBGE_1 <dbl> 69, 38, 139, 176, 1354, 98, 37, 167, 69, 1...
## $ `IBGE_1-4` <dbl> 318, 207, 650, 856, 5567, 323, 156, 733, 3...
## $ `IBGE_5-9` <dbl> 438, 260, 894, 1233, 7618, 421, 263, 978, ...
## $ `IBGE_10-14` <dbl> 517, 351, 1087, 1539, 8905, 483, 277, 927,...
## $ `IBGE_15-59` <dbl> 3542, 2709, 6896, 11979, 53516, 2631, 2319...
## $ `IBGE_60+` <dbl> 416, 589, 990, 2681, 5996, 582, 673, 803, ...
## $ IBGE_PLANTED_AREA <dbl> 319, 4479, 10307, 1862, 25200, 2598, 895, ...
## $ `IBGE_CROP_PRODUCTION_$` <dbl> 1843, 18017, 33085, 7502, 700872, 5234, 39...
## $ `IDHM Ranking 2010` <dbl> 1689, 2207, 2202, 1994, 3530, 3522, 4086, ...
## $ IDHM <dbl> 0.708, 0.690, 0.690, 0.698, 0.628, 0.628, ...
## $ IDHM_Renda <dbl> 0.687, 0.693, 0.671, 0.720, 0.579, 0.540, ...
## $ IDHM_Longevidade <dbl> 0.830, 0.839, 0.841, 0.848, 0.798, 0.748, ...
## $ IDHM_Educacao <dbl> 0.622, 0.563, 0.579, 0.556, 0.537, 0.612, ...
## $ LONG <dbl> -49.44055, -47.39683, -48.71881, -45.44619...
## $ LAT <dbl> -16.758812, -18.487565, -16.182672, -19.15...
## $ ALT <dbl> 893.60, 753.12, 1017.55, 644.74, 10.12, 40...
## $ PAY_TV <dbl> 360, 77, 227, 1230, 3389, 29, 952, 51, 55,...
## $ FIXED_PHONES <dbl> 842, 296, 720, 1716, 1218, 34, 335, 222, 3...
## $ AREA <dbl> 147.26, 881.06, 1045.13, 1817.07, 1610.65,...
## $ REGIAO_TUR <chr> NA, "Caminhos Do Cerrado", "Região Turísti...
## $ CATEGORIA_TUR <chr> NA, "D", "C", "D", "D", NA, "D", NA, NA, "...
## $ ESTIMATED_POP <dbl> 8583, 6972, 19614, 23223, 156292, 11663, 8...
## $ RURAL_URBAN <chr> "Urbano", "Rural Adjacente", "Rural Adjace...
## $ GVA_AGROPEC <dbl> 6.20, 50524.57, 42.84, 113824.60, 140463.7...
## $ GVA_INDUSTRY <dbl> 27991.25, 25917.70, 16728.30, 31002.62, 58...
## $ GVA_SERVICES <dbl> 74750.32, 62689.23, 138198.58, 172.33, 468...
## $ GVA_PUBLIC <dbl> 36915.04, 28083.79, 63396.20, 86081.41, 48...
## $ GVA_TOTAL <dbl> 145857.60, 167215.28, 261161.91, 403241.27...
## $ TAXES <dbl> 20554.20, 12873.50, 26822.58, 26994.09, 95...
## $ GDP <dbl> 166.41, 180.09, 287984.49, 430235.36, 1249...
## $ POP_GDP <dbl> 8053, 7037, 18427, 23574, 151934, 11483, 9...
## $ GDP_CAPITA <dbl> 20664.57, 25591.70, 15628.40, 18250.42, 82...
## $ GVA_MAIN <chr> "Demais serviços", "Demais serviços", "Dem...
## $ MUN_EXPENDIT <dbl> 28227691, 17909274, 37513019, NA, NA, NA, ...
## $ COMP_TOT <dbl> 284, 476, 288, 621, 931, 86, 191, 87, 285,...
## $ COMP_A <dbl> 5, 6, 5, 18, 4, 1, 6, 2, 5, 2, 0, 8, 3, 1,...
## $ COMP_B <dbl> 1, 6, 9, 1, 2, 0, 0, 0, 0, 0, 0, 2, 2, 0, ...
## $ COMP_C <dbl> 56, 30, 26, 40, 43, 4, 8, 3, 20, 4, 9, 40,...
## $ COMP_D <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, ...
## $ COMP_E <dbl> 2, 2, 2, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 2, ...
## $ COMP_F <dbl> 29, 34, 7, 20, 27, 6, 4, 0, 10, 2, 0, 25, ...
## $ COMP_G <dbl> 110, 190, 117, 303, 500, 48, 97, 71, 133, ...
## $ COMP_H <dbl> 26, 70, 12, 62, 16, 2, 5, 0, 18, 8, 1, 67,...
## $ COMP_I <dbl> 4, 28, 57, 30, 31, 10, 5, 1, 14, 3, 0, 25,...
## $ COMP_J <dbl> 5, 11, 2, 9, 6, 2, 3, 1, 8, 1, 1, 9, 5, 14...
## $ COMP_K <dbl> 0, 0, 1, 6, 1, 0, 1, 0, 0, 1, 0, 4, 3, 3, ...
## $ COMP_L <dbl> 2, 4, 0, 4, 1, 0, 0, 0, 4, 0, 0, 7, 4, 4, ...
## $ COMP_M <dbl> 10, 15, 7, 28, 22, 2, 5, 0, 11, 4, 2, 26, ...
## $ COMP_N <dbl> 12, 29, 15, 27, 16, 3, 5, 1, 26, 0, 1, 16,...
## $ COMP_O <dbl> 4, 2, 3, 2, 2, 2, 2, 2, 2, 2, 6, 2, 4, 2, ...
## $ COMP_P <dbl> 6, 9, 11, 15, 155, 0, 8, 0, 8, 1, 6, 14, 1...
## $ COMP_Q <dbl> 6, 14, 5, 19, 33, 2, 1, 2, 9, 3, 0, 13, 22...
## $ COMP_R <dbl> 1, 6, 1, 9, 15, 0, 2, 0, 4, 0, 0, 4, 6, 6,...
## $ COMP_S <dbl> 5, 19, 8, 27, 56, 4, 38, 4, 12, 3, 4, 23, ...
## $ COMP_T <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ COMP_U <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ HOTELS <dbl> NA, NA, 1, NA, NA, NA, 1, NA, NA, NA, NA, ...
## $ BEDS <dbl> NA, NA, 34, NA, NA, NA, 24, NA, NA, NA, NA...
## $ Pr_Agencies <dbl> NA, NA, 1, 2, 2, NA, NA, 1, 0, 0, 0, 1, 0,...
## $ Pu_Agencies <dbl> NA, NA, 1, 2, 4, NA, NA, 0, 1, 1, 1, 2, 1,...
## $ Pr_Bank <dbl> NA, NA, 1, 2, 2, NA, NA, 1, 0, 0, 0, 1, 0,...
## $ Pu_Bank <dbl> NA, NA, 1, 2, 4, NA, NA, 0, 1, 1, 1, 2, 1,...
## $ Pr_Assets <dbl> NA, NA, 33724584, 44974716, 76181384, NA, ...
## $ Pu_Assets <dbl> NA, NA, 67091904, 371922572, 800078483, NA...
## $ Cars <dbl> 2158, 2227, 2838, 6928, 5277, 553, 896, 61...
## $ Motorcycles <dbl> 1246, 1142, 1426, 2953, 25661, 1674, 696, ...
## $ Wheeled_tractor <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, ...
## $ UBER <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ MAC <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ `WAL-MART` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA...
## $ POST_OFFICES <dbl> 1, 1, 3, 4, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, ...
summary(municipality)
## CITY STATE CAPITAL IBGE_RES_POP
## Length:5571 Length:5571 Min. :0.000000 Min. : 805
## Class :character Class :character 1st Qu.:0.000000 1st Qu.: 5235
## Mode :character Mode :character Median :0.000000 Median : 10934
## Mean :0.004847 Mean : 34278
## 3rd Qu.:0.000000 3rd Qu.: 23424
## Max. :1.000000 Max. :11253503
## NA's :6
## IBGE_RES_POP_BRAS IBGE_RES_POP_ESTR IBGE_DU IBGE_DU_URBAN
## Min. : 805 Min. : 0.0 Min. : 239 Min. : 60
## 1st Qu.: 5230 1st Qu.: 0.0 1st Qu.: 1572 1st Qu.: 874
## Median : 10926 Median : 0.0 Median : 3174 Median : 1846
## Mean : 34200 Mean : 77.5 Mean : 10303 Mean : 8859
## 3rd Qu.: 23390 3rd Qu.: 10.0 3rd Qu.: 6726 3rd Qu.: 4624
## Max. :11133776 Max. :119727.0 Max. :3576148 Max. :3548433
## NA's :6 NA's :6 NA's :8 NA's :8
## IBGE_DU_RURAL IBGE_POP IBGE_1 IBGE_1-4
## Min. : 3 Min. : 174 Min. : 0.0 Min. : 5
## 1st Qu.: 487 1st Qu.: 2801 1st Qu.: 38.0 1st Qu.: 158
## Median : 931 Median : 6170 Median : 92.0 Median : 376
## Mean : 1463 Mean : 27595 Mean : 383.3 Mean : 1544
## 3rd Qu.: 1832 3rd Qu.: 15302 3rd Qu.: 232.0 3rd Qu.: 951
## Max. :33809 Max. :10463636 Max. :129464.0 Max. :514794
## NA's :79 NA's :6 NA's :6 NA's :6
## IBGE_5-9 IBGE_10-14 IBGE_15-59 IBGE_60+
## Min. : 7 Min. : 12 Min. : 94 Min. : 29
## 1st Qu.: 220 1st Qu.: 259 1st Qu.: 1734 1st Qu.: 341
## Median : 516 Median : 588 Median : 3841 Median : 722
## Mean : 2069 Mean : 2381 Mean : 18212 Mean : 3004
## 3rd Qu.: 1300 3rd Qu.: 1478 3rd Qu.: 9628 3rd Qu.: 1724
## Max. :684443 Max. :783702 Max. :7058221 Max. :1293012
## NA's :6 NA's :6 NA's :6 NA's :6
## IBGE_PLANTED_AREA IBGE_CROP_PRODUCTION_$ IDHM Ranking 2010 IDHM
## Min. : 0.0 Min. : 0 Min. : 1 Min. :0.4180
## 1st Qu.: 910.2 1st Qu.: 2326 1st Qu.:1392 1st Qu.:0.5990
## Median : 3471.5 Median : 13846 Median :2782 Median :0.6650
## Mean : 14179.9 Mean : 57384 Mean :2783 Mean :0.6592
## 3rd Qu.: 11194.2 3rd Qu.: 55619 3rd Qu.:4173 3rd Qu.:0.7180
## Max. :1205669.0 Max. :3274885 Max. :5565 Max. :0.8620
## NA's :1 NA's :1 NA's :7 NA's :6
## IDHM_Renda IDHM_Longevidade IDHM_Educacao LONG
## Min. :0.4000 Min. :0.1870 Min. :0.2070 Min. :-72.92
## 1st Qu.:0.5720 1st Qu.:0.7690 1st Qu.:0.4900 1st Qu.:-50.87
## Median :0.6540 Median :0.8080 Median :0.5600 Median :-46.53
## Mean :0.6429 Mean :0.8015 Mean :0.5591 Mean :-46.23
## 3rd Qu.:0.7070 3rd Qu.:0.8360 3rd Qu.:0.6310 3rd Qu.:-41.41
## Max. :0.8910 Max. :0.8940 Max. :0.8250 Max. :-32.44
## NA's :6 NA's :6 NA's :6
## LAT ALT PAY_TV FIXED_PHONES
## Min. :-33.688 Min. : 0.0 Min. : 1 Min. : 3
## 1st Qu.:-22.843 1st Qu.: 169.7 1st Qu.: 88 1st Qu.: 119
## Median :-18.097 Median : 406.5 Median : 247 Median : 327
## Mean :-16.453 Mean : 894.0 Mean : 3095 Mean : 6568
## 3rd Qu.: -8.493 3rd Qu.: 629.0 3rd Qu.: 815 3rd Qu.: 1151
## Max. : 4.585 Max. :874579.0 Max. :2047668 Max. :5543127
## NA's :8 NA's :2 NA's :2
## AREA REGIAO_TUR CATEGORIA_TUR ESTIMATED_POP
## Min. : 3.57 Length:5571 Length:5571 Min. : 786
## 1st Qu.: 204.38 Class :character Class :character 1st Qu.: 5454
## Median : 415.92 Mode :character Mode :character Median : 11591
## Mean : 1517.18 Mean : 37437
## 3rd Qu.: 1026.51 3rd Qu.: 25297
## Max. :159533.33 Max. :12176866
## NA's :2
## RURAL_URBAN GVA_AGROPEC GVA_INDUSTRY GVA_SERVICES
## Length:5571 Min. : 0 Min. : 1 Min. : 2
## Class :character 1st Qu.: 4189 1st Qu.: 1726 1st Qu.: 10112
## Mode :character Median : 20426 Median : 7424 Median : 31211
## Mean : 47271 Mean : 175928 Mean : 489451
## 3rd Qu.: 51227 3rd Qu.: 41022 3rd Qu.: 115406
## Max. :1402282 Max. :63306755 Max. :464656988
## NA's :1 NA's :1 NA's :1
## GVA_PUBLIC GVA_TOTAL TAXES GDP
## Min. : 7 Min. : 17 Min. : -14159 Min. : 15
## 1st Qu.: 17267 1st Qu.: 42253 1st Qu.: 1305 1st Qu.: 43709
## Median : 35866 Median : 119492 Median : 5100 Median : 125153
## Mean : 123768 Mean : 832987 Mean : 118864 Mean : 954584
## 3rd Qu.: 89245 3rd Qu.: 313963 3rd Qu.: 22197 3rd Qu.: 329539
## Max. :41902893 Max. :569910503 Max. :117125387 Max. :687035890
## NA's :1 NA's :1 NA's :1 NA's :1
## POP_GDP GDP_CAPITA GVA_MAIN MUN_EXPENDIT
## Min. : 815 Min. : 3191 Length:5571 Min. :1.421e+06
## 1st Qu.: 5483 1st Qu.: 9058 Class :character 1st Qu.:1.573e+07
## Median : 11578 Median : 15870 Mode :character Median :2.746e+07
## Mean : 36998 Mean : 21126 Mean :1.043e+08
## 3rd Qu.: 25085 3rd Qu.: 26155 3rd Qu.:5.672e+07
## Max. :12038175 Max. :314638 Max. :4.577e+10
## NA's :1 NA's :1 NA's :1492
## COMP_TOT COMP_A COMP_B COMP_C
## Min. : 6.0 Min. : 0.00 Min. : 0.000 Min. : 0.00
## 1st Qu.: 68.0 1st Qu.: 1.00 1st Qu.: 0.000 1st Qu.: 3.00
## Median : 162.0 Median : 2.00 Median : 0.000 Median : 11.00
## Mean : 906.8 Mean : 18.25 Mean : 1.852 Mean : 73.44
## 3rd Qu.: 448.0 3rd Qu.: 8.00 3rd Qu.: 2.000 3rd Qu.: 39.00
## Max. :530446.0 Max. :1948.00 Max. :274.000 Max. :31566.00
## NA's :1 NA's :1 NA's :1 NA's :1
## COMP_D COMP_E COMP_F COMP_G
## Min. : 0.0000 Min. : 0.000 Min. : 0.00 Min. : 1.0
## 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 1.00 1st Qu.: 32.0
## Median : 0.0000 Median : 0.000 Median : 4.00 Median : 74.5
## Mean : 0.4262 Mean : 2.029 Mean : 43.26 Mean : 348.0
## 3rd Qu.: 0.0000 3rd Qu.: 1.000 3rd Qu.: 15.00 3rd Qu.: 199.0
## Max. :332.0000 Max. :657.000 Max. :25222.00 Max. :150633.0
## NA's :1 NA's :1 NA's :1 NA's :1
## COMP_H COMP_I COMP_J COMP_K
## Min. : 0 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 1 1st Qu.: 2.00 1st Qu.: 0.00 1st Qu.: 0.00
## Median : 7 Median : 7.00 Median : 1.00 Median : 0.00
## Mean : 41 Mean : 55.88 Mean : 24.74 Mean : 15.55
## 3rd Qu.: 25 3rd Qu.: 24.00 3rd Qu.: 5.00 3rd Qu.: 2.00
## Max. :19515 Max. :29290.00 Max. :38720.00 Max. :23738.00
## NA's :1 NA's :1 NA's :1 NA's :1
## COMP_L COMP_M COMP_N COMP_O
## Min. : 0.00 Min. : 0.00 Min. : 0.0 Min. : 0.000
## 1st Qu.: 0.00 1st Qu.: 1.00 1st Qu.: 1.0 1st Qu.: 2.000
## Median : 0.00 Median : 4.00 Median : 4.0 Median : 2.000
## Mean : 15.14 Mean : 51.29 Mean : 83.7 Mean : 3.269
## 3rd Qu.: 3.00 3rd Qu.: 13.00 3rd Qu.: 14.0 3rd Qu.: 3.000
## Max. :14003.00 Max. :49181.00 Max. :76757.0 Max. :204.000
## NA's :1 NA's :1 NA's :1 NA's :1
## COMP_P COMP_Q COMP_R COMP_S
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 2.00 1st Qu.: 1.00 1st Qu.: 0.00 1st Qu.: 5.00
## Median : 6.00 Median : 3.00 Median : 2.00 Median : 12.00
## Mean : 30.96 Mean : 34.15 Mean : 12.18 Mean : 51.61
## 3rd Qu.: 17.00 3rd Qu.: 12.00 3rd Qu.: 6.00 3rd Qu.: 31.00
## Max. :16030.00 Max. :22248.00 Max. :6687.00 Max. :24832.00
## NA's :1 NA's :1 NA's :1 NA's :1
## COMP_T COMP_U HOTELS BEDS
## Min. :0 Min. : 0.00000 Min. : 1.000 Min. : 2.0
## 1st Qu.:0 1st Qu.: 0.00000 1st Qu.: 1.000 1st Qu.: 40.0
## Median :0 Median : 0.00000 Median : 1.000 Median : 82.0
## Mean :0 Mean : 0.05027 Mean : 3.131 Mean : 257.5
## 3rd Qu.:0 3rd Qu.: 0.00000 3rd Qu.: 3.000 3rd Qu.: 200.0
## Max. :0 Max. :123.00000 Max. :97.000 Max. :13247.0
## NA's :1 NA's :1 NA's :4684 NA's :4684
## Pr_Agencies Pu_Agencies Pr_Bank Pu_Bank
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. :0.00
## 1st Qu.: 0.000 1st Qu.: 1.000 1st Qu.: 0.000 1st Qu.:1.00
## Median : 1.000 Median : 2.000 Median : 1.000 Median :2.00
## Mean : 3.383 Mean : 2.829 Mean : 1.312 Mean :1.58
## 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.:2.00
## Max. :1693.000 Max. :626.000 Max. :83.000 Max. :8.00
## NA's :2229 NA's :2229 NA's :2229 NA's :2229
## Pr_Assets Pu_Assets Cars Motorcycles
## Min. :0.000e+00 Min. :0.000e+00 Min. : 2 Min. : 4
## 1st Qu.:0.000e+00 1st Qu.:4.047e+07 1st Qu.: 602 1st Qu.: 591
## Median :3.231e+07 Median :1.339e+08 Median : 1440 Median : 1285
## Mean :9.187e+09 Mean :6.005e+09 Mean : 9861 Mean : 4879
## 3rd Qu.:1.148e+08 3rd Qu.:4.970e+08 3rd Qu.: 4086 3rd Qu.: 3295
## Max. :1.950e+13 Max. :8.020e+12 Max. :5740995 Max. :1134570
## NA's :2229 NA's :2229 NA's :10 NA's :10
## Wheeled_tractor UBER MAC WAL-MART
## Min. : 0.000 Min. :1 Min. : 1.000 Min. : 1.000
## 1st Qu.: 0.000 1st Qu.:1 1st Qu.: 1.000 1st Qu.: 1.000
## Median : 0.000 Median :1 Median : 2.000 Median : 1.000
## Mean : 5.755 Mean :1 Mean : 4.277 Mean : 2.059
## 3rd Qu.: 1.000 3rd Qu.:1 3rd Qu.: 3.000 3rd Qu.: 1.750
## Max. :3236.000 Max. :1 Max. :130.000 Max. :26.000
## NA's :10 NA's :5446 NA's :5405 NA's :5469
## POST_OFFICES
## Min. : 1.000
## 1st Qu.: 1.000
## Median : 1.000
## Mean : 2.081
## 3rd Qu.: 2.000
## Max. :225.000
## NA's :118
I noticed that some columns are not set with the correct data types such as STATE, CAPITAL, REGIAO_TUR, CATEGORIA_TUR, and UBER. Therefore, will set them correctly to show a much clearer summary results.
Then, I also identity that they are municipality without GDP_CAPITA, let’s find out more.
municipality <- municipality %>%
mutate_at(.vars = vars(CITY), .funs = funs(toupper))
municipality$CITY <- factor(municipality$CITY)
municipality$STATE <- factor(municipality$STATE)
municipality$CAPITAL <- factor(municipality$CAPITAL)
municipality$REGIAO_TUR <- factor(municipality$REGIAO_TUR)
municipality$CATEGORIA_TUR <- factor(municipality$CATEGORIA_TUR)
municipality$UBER <- factor(municipality$UBER)
summary(municipality)
## CITY STATE CAPITAL IBGE_RES_POP
## BOM JESUS : 5 MG : 853 0:5544 Min. : 805
## SÃO DOMINGOS: 5 SP : 645 1: 27 1st Qu.: 5235
## BONITO : 4 RS : 498 Median : 10934
## PLANALTO : 4 BA : 417 Mean : 34278
## SANTA HELENA: 4 PR : 399 3rd Qu.: 23424
## SANTA INÊS : 4 SC : 295 Max. :11253503
## (Other) :5545 (Other):2464 NA's :6
## IBGE_RES_POP_BRAS IBGE_RES_POP_ESTR IBGE_DU IBGE_DU_URBAN
## Min. : 805 Min. : 0.0 Min. : 239 Min. : 60
## 1st Qu.: 5230 1st Qu.: 0.0 1st Qu.: 1572 1st Qu.: 874
## Median : 10926 Median : 0.0 Median : 3174 Median : 1846
## Mean : 34200 Mean : 77.5 Mean : 10303 Mean : 8859
## 3rd Qu.: 23390 3rd Qu.: 10.0 3rd Qu.: 6726 3rd Qu.: 4624
## Max. :11133776 Max. :119727.0 Max. :3576148 Max. :3548433
## NA's :6 NA's :6 NA's :8 NA's :8
## IBGE_DU_RURAL IBGE_POP IBGE_1 IBGE_1-4
## Min. : 3 Min. : 174 Min. : 0.0 Min. : 5
## 1st Qu.: 487 1st Qu.: 2801 1st Qu.: 38.0 1st Qu.: 158
## Median : 931 Median : 6170 Median : 92.0 Median : 376
## Mean : 1463 Mean : 27595 Mean : 383.3 Mean : 1544
## 3rd Qu.: 1832 3rd Qu.: 15302 3rd Qu.: 232.0 3rd Qu.: 951
## Max. :33809 Max. :10463636 Max. :129464.0 Max. :514794
## NA's :79 NA's :6 NA's :6 NA's :6
## IBGE_5-9 IBGE_10-14 IBGE_15-59 IBGE_60+
## Min. : 7 Min. : 12 Min. : 94 Min. : 29
## 1st Qu.: 220 1st Qu.: 259 1st Qu.: 1734 1st Qu.: 341
## Median : 516 Median : 588 Median : 3841 Median : 722
## Mean : 2069 Mean : 2381 Mean : 18212 Mean : 3004
## 3rd Qu.: 1300 3rd Qu.: 1478 3rd Qu.: 9628 3rd Qu.: 1724
## Max. :684443 Max. :783702 Max. :7058221 Max. :1293012
## NA's :6 NA's :6 NA's :6 NA's :6
## IBGE_PLANTED_AREA IBGE_CROP_PRODUCTION_$ IDHM Ranking 2010 IDHM
## Min. : 0.0 Min. : 0 Min. : 1 Min. :0.4180
## 1st Qu.: 910.2 1st Qu.: 2326 1st Qu.:1392 1st Qu.:0.5990
## Median : 3471.5 Median : 13846 Median :2782 Median :0.6650
## Mean : 14179.9 Mean : 57384 Mean :2783 Mean :0.6592
## 3rd Qu.: 11194.2 3rd Qu.: 55619 3rd Qu.:4173 3rd Qu.:0.7180
## Max. :1205669.0 Max. :3274885 Max. :5565 Max. :0.8620
## NA's :1 NA's :1 NA's :7 NA's :6
## IDHM_Renda IDHM_Longevidade IDHM_Educacao LONG
## Min. :0.4000 Min. :0.1870 Min. :0.2070 Min. :-72.92
## 1st Qu.:0.5720 1st Qu.:0.7690 1st Qu.:0.4900 1st Qu.:-50.87
## Median :0.6540 Median :0.8080 Median :0.5600 Median :-46.53
## Mean :0.6429 Mean :0.8015 Mean :0.5591 Mean :-46.23
## 3rd Qu.:0.7070 3rd Qu.:0.8360 3rd Qu.:0.6310 3rd Qu.:-41.41
## Max. :0.8910 Max. :0.8940 Max. :0.8250 Max. :-32.44
## NA's :6 NA's :6 NA's :6
## LAT ALT PAY_TV FIXED_PHONES
## Min. :-33.688 Min. : 0.0 Min. : 1 Min. : 3
## 1st Qu.:-22.843 1st Qu.: 169.7 1st Qu.: 88 1st Qu.: 119
## Median :-18.097 Median : 406.5 Median : 247 Median : 327
## Mean :-16.453 Mean : 894.0 Mean : 3095 Mean : 6568
## 3rd Qu.: -8.493 3rd Qu.: 629.0 3rd Qu.: 815 3rd Qu.: 1151
## Max. : 4.585 Max. :874579.0 Max. :2047668 Max. :5543127
## NA's :8 NA's :2 NA's :2
## AREA REGIAO_TUR CATEGORIA_TUR
## Min. : 3.57 Corredores Das Águas: 59 A : 51
## 1st Qu.: 204.38 Vale Do Contestado : 45 B : 168
## Median : 415.92 Amazônia Atlântica : 40 C : 521
## Mean : 1517.18 Araguaia-Tocantins : 39 D :1891
## 3rd Qu.: 1026.51 Cariri : 37 E : 653
## Max. :159533.33 (Other) :3064 NA's:2287
## NA's :2287
## ESTIMATED_POP RURAL_URBAN GVA_AGROPEC GVA_INDUSTRY
## Min. : 786 Length:5571 Min. : 0 Min. : 1
## 1st Qu.: 5454 Class :character 1st Qu.: 4189 1st Qu.: 1726
## Median : 11591 Mode :character Median : 20426 Median : 7424
## Mean : 37437 Mean : 47271 Mean : 175928
## 3rd Qu.: 25297 3rd Qu.: 51227 3rd Qu.: 41022
## Max. :12176866 Max. :1402282 Max. :63306755
## NA's :2 NA's :1 NA's :1
## GVA_SERVICES GVA_PUBLIC GVA_TOTAL TAXES
## Min. : 2 Min. : 7 Min. : 17 Min. : -14159
## 1st Qu.: 10112 1st Qu.: 17267 1st Qu.: 42253 1st Qu.: 1305
## Median : 31211 Median : 35866 Median : 119492 Median : 5100
## Mean : 489451 Mean : 123768 Mean : 832987 Mean : 118864
## 3rd Qu.: 115406 3rd Qu.: 89245 3rd Qu.: 313963 3rd Qu.: 22197
## Max. :464656988 Max. :41902893 Max. :569910503 Max. :117125387
## NA's :1 NA's :1 NA's :1 NA's :1
## GDP POP_GDP GDP_CAPITA GVA_MAIN
## Min. : 15 Min. : 815 Min. : 3191 Length:5571
## 1st Qu.: 43709 1st Qu.: 5483 1st Qu.: 9058 Class :character
## Median : 125153 Median : 11578 Median : 15870 Mode :character
## Mean : 954584 Mean : 36998 Mean : 21126
## 3rd Qu.: 329539 3rd Qu.: 25085 3rd Qu.: 26155
## Max. :687035890 Max. :12038175 Max. :314638
## NA's :1 NA's :1 NA's :1
## MUN_EXPENDIT COMP_TOT COMP_A COMP_B
## Min. :1.421e+06 Min. : 6.0 Min. : 0.00 Min. : 0.000
## 1st Qu.:1.573e+07 1st Qu.: 68.0 1st Qu.: 1.00 1st Qu.: 0.000
## Median :2.746e+07 Median : 162.0 Median : 2.00 Median : 0.000
## Mean :1.043e+08 Mean : 906.8 Mean : 18.25 Mean : 1.852
## 3rd Qu.:5.672e+07 3rd Qu.: 448.0 3rd Qu.: 8.00 3rd Qu.: 2.000
## Max. :4.577e+10 Max. :530446.0 Max. :1948.00 Max. :274.000
## NA's :1492 NA's :1 NA's :1 NA's :1
## COMP_C COMP_D COMP_E COMP_F
## Min. : 0.00 Min. : 0.0000 Min. : 0.000 Min. : 0.00
## 1st Qu.: 3.00 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 1.00
## Median : 11.00 Median : 0.0000 Median : 0.000 Median : 4.00
## Mean : 73.44 Mean : 0.4262 Mean : 2.029 Mean : 43.26
## 3rd Qu.: 39.00 3rd Qu.: 0.0000 3rd Qu.: 1.000 3rd Qu.: 15.00
## Max. :31566.00 Max. :332.0000 Max. :657.000 Max. :25222.00
## NA's :1 NA's :1 NA's :1 NA's :1
## COMP_G COMP_H COMP_I COMP_J
## Min. : 1.0 Min. : 0 Min. : 0.00 Min. : 0.00
## 1st Qu.: 32.0 1st Qu.: 1 1st Qu.: 2.00 1st Qu.: 0.00
## Median : 74.5 Median : 7 Median : 7.00 Median : 1.00
## Mean : 348.0 Mean : 41 Mean : 55.88 Mean : 24.74
## 3rd Qu.: 199.0 3rd Qu.: 25 3rd Qu.: 24.00 3rd Qu.: 5.00
## Max. :150633.0 Max. :19515 Max. :29290.00 Max. :38720.00
## NA's :1 NA's :1 NA's :1 NA's :1
## COMP_K COMP_L COMP_M COMP_N
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.0
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 1.00 1st Qu.: 1.0
## Median : 0.00 Median : 0.00 Median : 4.00 Median : 4.0
## Mean : 15.55 Mean : 15.14 Mean : 51.29 Mean : 83.7
## 3rd Qu.: 2.00 3rd Qu.: 3.00 3rd Qu.: 13.00 3rd Qu.: 14.0
## Max. :23738.00 Max. :14003.00 Max. :49181.00 Max. :76757.0
## NA's :1 NA's :1 NA's :1 NA's :1
## COMP_O COMP_P COMP_Q COMP_R
## Min. : 0.000 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 2.000 1st Qu.: 2.00 1st Qu.: 1.00 1st Qu.: 0.00
## Median : 2.000 Median : 6.00 Median : 3.00 Median : 2.00
## Mean : 3.269 Mean : 30.96 Mean : 34.15 Mean : 12.18
## 3rd Qu.: 3.000 3rd Qu.: 17.00 3rd Qu.: 12.00 3rd Qu.: 6.00
## Max. :204.000 Max. :16030.00 Max. :22248.00 Max. :6687.00
## NA's :1 NA's :1 NA's :1 NA's :1
## COMP_S COMP_T COMP_U HOTELS
## Min. : 0.00 Min. :0 Min. : 0.00000 Min. : 1.000
## 1st Qu.: 5.00 1st Qu.:0 1st Qu.: 0.00000 1st Qu.: 1.000
## Median : 12.00 Median :0 Median : 0.00000 Median : 1.000
## Mean : 51.61 Mean :0 Mean : 0.05027 Mean : 3.131
## 3rd Qu.: 31.00 3rd Qu.:0 3rd Qu.: 0.00000 3rd Qu.: 3.000
## Max. :24832.00 Max. :0 Max. :123.00000 Max. :97.000
## NA's :1 NA's :1 NA's :1 NA's :4684
## BEDS Pr_Agencies Pu_Agencies Pr_Bank
## Min. : 2.0 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 40.0 1st Qu.: 0.000 1st Qu.: 1.000 1st Qu.: 0.000
## Median : 82.0 Median : 1.000 Median : 2.000 Median : 1.000
## Mean : 257.5 Mean : 3.383 Mean : 2.829 Mean : 1.312
## 3rd Qu.: 200.0 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.: 2.000
## Max. :13247.0 Max. :1693.000 Max. :626.000 Max. :83.000
## NA's :4684 NA's :2229 NA's :2229 NA's :2229
## Pu_Bank Pr_Assets Pu_Assets Cars
## Min. :0.00 Min. :0.000e+00 Min. :0.000e+00 Min. : 2
## 1st Qu.:1.00 1st Qu.:0.000e+00 1st Qu.:4.047e+07 1st Qu.: 602
## Median :2.00 Median :3.231e+07 Median :1.339e+08 Median : 1440
## Mean :1.58 Mean :9.187e+09 Mean :6.005e+09 Mean : 9861
## 3rd Qu.:2.00 3rd Qu.:1.148e+08 3rd Qu.:4.970e+08 3rd Qu.: 4086
## Max. :8.00 Max. :1.950e+13 Max. :8.020e+12 Max. :5740995
## NA's :2229 NA's :2229 NA's :2229 NA's :10
## Motorcycles Wheeled_tractor UBER MAC
## Min. : 4 Min. : 0.000 1 : 125 Min. : 1.000
## 1st Qu.: 591 1st Qu.: 0.000 NA's:5446 1st Qu.: 1.000
## Median : 1285 Median : 0.000 Median : 2.000
## Mean : 4879 Mean : 5.755 Mean : 4.277
## 3rd Qu.: 3295 3rd Qu.: 1.000 3rd Qu.: 3.000
## Max. :1134570 Max. :3236.000 Max. :130.000
## NA's :10 NA's :10 NA's :5405
## WAL-MART POST_OFFICES
## Min. : 1.000 Min. : 1.000
## 1st Qu.: 1.000 1st Qu.: 1.000
## Median : 1.000 Median : 1.000
## Mean : 2.059 Mean : 2.081
## 3rd Qu.: 1.750 3rd Qu.: 2.000
## Max. :26.000 Max. :225.000
## NA's :5469 NA's :118
brazil_sp <- brazil_sp %>%
mutate_at(.vars = vars(name_muni), .funs = funs(toupper))
brazil_sp$name_muni <- factor(brazil_sp$name_muni)
summary(brazil_sp)
## code_muni name_muni code_state abbrev_state
## Min. :1100015 BOM JESUS : 5 31 : 853 MG : 853
## 1st Qu.:2512175 SÃO DOMINGOS: 5 35 : 645 SP : 645
## Median :3146354 BONITO : 4 43 : 499 RS : 499
## Mean :3253966 PLANALTO : 4 29 : 417 BA : 417
## 3rd Qu.:4119264 SANTA HELENA: 4 41 : 399 PR : 399
## Max. :5300108 SANTA INÊS : 4 42 : 295 SC : 295
## (Other) :5546 (Other):2464 (Other):2464
## geom
## MULTIPOLYGON :2773
## POLYGON :2799
## epsg:4674 : 0
## +proj=long...: 0
##
##
##
Comparing to the abbrev_state with STATE from municipality there is some differences for STATE RS.
municipality %>%
group_by(STATE) %>%
tally(sort=TRUE)
## # A tibble: 27 x 2
## STATE n
## <fct> <int>
## 1 MG 853
## 2 SP 645
## 3 RS 498
## 4 BA 417
## 5 PR 399
## 6 SC 295
## 7 GO 246
## 8 PI 224
## 9 PB 223
## 10 MA 217
## # ... with 17 more rows
brazil_sp %>%
group_by(abbrev_state) %>%
tally(sort=TRUE)
## Simple feature collection with 27 features and 2 fields
## geometry type: GEOMETRY
## dimension: XY
## bbox: xmin: -73.99045 ymin: -33.75118 xmax: -28.83594 ymax: 5.271841
## geographic CRS: SIRGAS 2000
## # A tibble: 27 x 3
## abbrev_state n geom
## <fct> <int> <GEOMETRY [°]>
## 1 MG 853 POLYGON ((-45.73032 -22.61888, -45.72688 -22.62098, -45.7~
## 2 SP 645 MULTIPOLYGON (((-47.41529 -24.67558, -47.4203 -24.67827, ~
## 3 RS 499 POLYGON ((-53.42198 -33.74379, -53.42642 -33.73917, -53.4~
## 4 BA 417 MULTIPOLYGON (((-39.49154 -17.99766, -39.5011 -18.00894, ~
## 5 PR 399 POLYGON ((-51.25217 -26.35151, -51.25865 -26.35935, -51.2~
## 6 SC 295 MULTIPOLYGON (((-49.51647 -29.0953, -49.55068 -29.13209, ~
## 7 GO 246 POLYGON ((-51.09259 -19.3072, -51.09259 -19.3072, -51.097~
## 8 PI 224 POLYGON ((-44.8638 -10.88523, -44.88443 -10.90231, -44.89~
## 9 PB 223 MULTIPOLYGON (((-36.57259 -7.935975, -36.57999 -7.946684,~
## 10 MA 217 MULTIPOLYGON (((-45.9384 -8.784939, -45.93744 -8.791058, ~
## # ... with 17 more rows
municipality %>%
filter(CITY=="LAGOA MIRIM") %>%
select(CITY, STATE)
## # A tibble: 0 x 2
## # ... with 2 variables: CITY <fct>, STATE <fct>
brazil_sp %>%
group_by(abbrev_state) %>%
filter(name_muni=="LAGOA MIRIM") %>%
select(name_muni, abbrev_state)
## Simple feature collection with 1 feature and 2 fields
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: -53.53197 ymin: -33.61653 xmax: -52.58641 ymax: -32.14398
## geographic CRS: SIRGAS 2000
## # A tibble: 1 x 3
## # Groups: abbrev_state [1]
## name_muni abbrev_state geom
## <fct> <fct> <POLYGON [°]>
## 1 LAGOA MIRIM RS ((-52.62241 -32.14662, -52.62802 -32.15108, -52.6308~
LAGOA MIRIM, RS is in geospatial data, but don’t exist in municipality aspatial. Therefore, I will filter this municipality out from the geospatial data for consistency before proceeding forward.
brazil_sp <- brazil_sp %>%
filter(name_muni != "LAGOA MIRIM")
municipality %>%
group_by(STATE, CITY) %>%
filter(is.na(GDP_CAPITA)) %>%
tally(sort=TRUE)
## # A tibble: 1 x 3
## # Groups: STATE [1]
## STATE CITY n
## <fct> <fct> <int>
## 1 RS LAGOA DOS PATOS 1
municipality %>%
filter(CITY == "LAGOA DOS PATOS")
## # A tibble: 2 x 81
## CITY STATE CAPITAL IBGE_RES_POP IBGE_RES_POP_BR~ IBGE_RES_POP_ES~ IBGE_DU
## <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 LAGO~ MG 0 4225 4225 0 1193
## 2 LAGO~ RS 0 NA NA NA NA
## # ... with 74 more variables: IBGE_DU_URBAN <dbl>, IBGE_DU_RURAL <dbl>,
## # IBGE_POP <dbl>, IBGE_1 <dbl>, `IBGE_1-4` <dbl>, `IBGE_5-9` <dbl>,
## # `IBGE_10-14` <dbl>, `IBGE_15-59` <dbl>, `IBGE_60+` <dbl>,
## # IBGE_PLANTED_AREA <dbl>, `IBGE_CROP_PRODUCTION_$` <dbl>, `IDHM Ranking
## # 2010` <dbl>, IDHM <dbl>, IDHM_Renda <dbl>, IDHM_Longevidade <dbl>,
## # IDHM_Educacao <dbl>, LONG <dbl>, LAT <dbl>, ALT <dbl>, PAY_TV <dbl>,
## # FIXED_PHONES <dbl>, AREA <dbl>, REGIAO_TUR <fct>, CATEGORIA_TUR <fct>,
## # ESTIMATED_POP <dbl>, RURAL_URBAN <chr>, GVA_AGROPEC <dbl>,
## # GVA_INDUSTRY <dbl>, GVA_SERVICES <dbl>, GVA_PUBLIC <dbl>, GVA_TOTAL <dbl>,
## # TAXES <dbl>, GDP <dbl>, POP_GDP <dbl>, GDP_CAPITA <dbl>, GVA_MAIN <chr>,
## # MUN_EXPENDIT <dbl>, COMP_TOT <dbl>, COMP_A <dbl>, COMP_B <dbl>,
## # COMP_C <dbl>, COMP_D <dbl>, COMP_E <dbl>, COMP_F <dbl>, COMP_G <dbl>,
## # COMP_H <dbl>, COMP_I <dbl>, COMP_J <dbl>, COMP_K <dbl>, COMP_L <dbl>,
## # COMP_M <dbl>, COMP_N <dbl>, COMP_O <dbl>, COMP_P <dbl>, COMP_Q <dbl>,
## # COMP_R <dbl>, COMP_S <dbl>, COMP_T <dbl>, COMP_U <dbl>, HOTELS <dbl>,
## # BEDS <dbl>, Pr_Agencies <dbl>, Pu_Agencies <dbl>, Pr_Bank <dbl>,
## # Pu_Bank <dbl>, Pr_Assets <dbl>, Pu_Assets <dbl>, Cars <dbl>,
## # Motorcycles <dbl>, Wheeled_tractor <dbl>, UBER <fct>, MAC <dbl>,
## # `WAL-MART` <dbl>, POST_OFFICES <dbl>
After investigating further, I found that a city name such as “LAGOA DOS PATOS, RS” could be exist in another state. Therefore, I will need to join the data not just by city but as by state later.
municipality <- municipality %>%
filter(!is.na(GDP_CAPITA))
summary(municipality)
## CITY STATE CAPITAL IBGE_RES_POP
## BOM JESUS : 5 MG : 853 0:5543 Min. : 805
## SÃO DOMINGOS: 5 SP : 645 1: 27 1st Qu.: 5235
## BONITO : 4 RS : 497 Median : 10934
## PLANALTO : 4 BA : 417 Mean : 34278
## SANTA HELENA: 4 PR : 399 3rd Qu.: 23424
## SANTA INÊS : 4 SC : 295 Max. :11253503
## (Other) :5544 (Other):2464 NA's :5
## IBGE_RES_POP_BRAS IBGE_RES_POP_ESTR IBGE_DU IBGE_DU_URBAN
## Min. : 805 Min. : 0.0 Min. : 239 Min. : 60
## 1st Qu.: 5230 1st Qu.: 0.0 1st Qu.: 1572 1st Qu.: 874
## Median : 10926 Median : 0.0 Median : 3174 Median : 1846
## Mean : 34200 Mean : 77.5 Mean : 10303 Mean : 8859
## 3rd Qu.: 23390 3rd Qu.: 10.0 3rd Qu.: 6726 3rd Qu.: 4624
## Max. :11133776 Max. :119727.0 Max. :3576148 Max. :3548433
## NA's :5 NA's :5 NA's :7 NA's :7
## IBGE_DU_RURAL IBGE_POP IBGE_1 IBGE_1-4
## Min. : 3 Min. : 174 Min. : 0.0 Min. : 5
## 1st Qu.: 487 1st Qu.: 2801 1st Qu.: 38.0 1st Qu.: 158
## Median : 931 Median : 6170 Median : 92.0 Median : 376
## Mean : 1463 Mean : 27595 Mean : 383.3 Mean : 1544
## 3rd Qu.: 1832 3rd Qu.: 15302 3rd Qu.: 232.0 3rd Qu.: 951
## Max. :33809 Max. :10463636 Max. :129464.0 Max. :514794
## NA's :78 NA's :5 NA's :5 NA's :5
## IBGE_5-9 IBGE_10-14 IBGE_15-59 IBGE_60+
## Min. : 7 Min. : 12 Min. : 94 Min. : 29
## 1st Qu.: 220 1st Qu.: 259 1st Qu.: 1734 1st Qu.: 341
## Median : 516 Median : 588 Median : 3841 Median : 722
## Mean : 2069 Mean : 2381 Mean : 18212 Mean : 3004
## 3rd Qu.: 1300 3rd Qu.: 1478 3rd Qu.: 9628 3rd Qu.: 1724
## Max. :684443 Max. :783702 Max. :7058221 Max. :1293012
## NA's :5 NA's :5 NA's :5 NA's :5
## IBGE_PLANTED_AREA IBGE_CROP_PRODUCTION_$ IDHM Ranking 2010 IDHM
## Min. : 0.0 Min. : 0 Min. : 1 Min. :0.4180
## 1st Qu.: 910.2 1st Qu.: 2326 1st Qu.:1392 1st Qu.:0.5990
## Median : 3471.5 Median : 13846 Median :2782 Median :0.6650
## Mean : 14179.9 Mean : 57384 Mean :2783 Mean :0.6592
## 3rd Qu.: 11194.2 3rd Qu.: 55619 3rd Qu.:4173 3rd Qu.:0.7180
## Max. :1205669.0 Max. :3274885 Max. :5565 Max. :0.8620
## NA's :6 NA's :5
## IDHM_Renda IDHM_Longevidade IDHM_Educacao LONG
## Min. :0.4000 Min. :0.1870 Min. :0.2070 Min. :-72.92
## 1st Qu.:0.5720 1st Qu.:0.7690 1st Qu.:0.4900 1st Qu.:-50.87
## Median :0.6540 Median :0.8080 Median :0.5600 Median :-46.53
## Mean :0.6429 Mean :0.8015 Mean :0.5591 Mean :-46.23
## 3rd Qu.:0.7070 3rd Qu.:0.8360 3rd Qu.:0.6310 3rd Qu.:-41.41
## Max. :0.8910 Max. :0.8940 Max. :0.8250 Max. :-32.44
## NA's :5 NA's :5 NA's :5
## LAT ALT PAY_TV FIXED_PHONES
## Min. :-33.688 Min. : 0.0 Min. : 1 Min. : 3
## 1st Qu.:-22.842 1st Qu.: 169.7 1st Qu.: 88 1st Qu.: 119
## Median :-18.094 Median : 406.5 Median : 247 Median : 327
## Mean :-16.451 Mean : 894.0 Mean : 3095 Mean : 6568
## 3rd Qu.: -8.491 3rd Qu.: 629.0 3rd Qu.: 815 3rd Qu.: 1151
## Max. : 4.585 Max. :874579.0 Max. :2047668 Max. :5543127
## NA's :7 NA's :1 NA's :1
## AREA REGIAO_TUR CATEGORIA_TUR
## Min. : 3.57 Corredores Das Águas: 59 A : 51
## 1st Qu.: 204.35 Vale Do Contestado : 45 B : 168
## Median : 415.87 Amazônia Atlântica : 40 C : 521
## Mean : 1515.63 Araguaia-Tocantins : 39 D :1891
## 3rd Qu.: 1026.16 Cariri : 37 E : 653
## Max. :159533.33 (Other) :3064 NA's:2286
## NA's :2286
## ESTIMATED_POP RURAL_URBAN GVA_AGROPEC GVA_INDUSTRY
## Min. : 786 Length:5570 Min. : 0 Min. : 1
## 1st Qu.: 5454 Class :character 1st Qu.: 4189 1st Qu.: 1726
## Median : 11591 Mode :character Median : 20426 Median : 7424
## Mean : 37437 Mean : 47271 Mean : 175928
## 3rd Qu.: 25297 3rd Qu.: 51227 3rd Qu.: 41022
## Max. :12176866 Max. :1402282 Max. :63306755
## NA's :1
## GVA_SERVICES GVA_PUBLIC GVA_TOTAL TAXES
## Min. : 2 Min. : 7 Min. : 17 Min. : -14159
## 1st Qu.: 10112 1st Qu.: 17267 1st Qu.: 42253 1st Qu.: 1305
## Median : 31211 Median : 35866 Median : 119492 Median : 5100
## Mean : 489451 Mean : 123768 Mean : 832987 Mean : 118864
## 3rd Qu.: 115406 3rd Qu.: 89245 3rd Qu.: 313963 3rd Qu.: 22197
## Max. :464656988 Max. :41902893 Max. :569910503 Max. :117125387
##
## GDP POP_GDP GDP_CAPITA GVA_MAIN
## Min. : 15 Min. : 815 Min. : 3191 Length:5570
## 1st Qu.: 43709 1st Qu.: 5483 1st Qu.: 9058 Class :character
## Median : 125153 Median : 11578 Median : 15870 Mode :character
## Mean : 954584 Mean : 36998 Mean : 21126
## 3rd Qu.: 329539 3rd Qu.: 25085 3rd Qu.: 26155
## Max. :687035890 Max. :12038175 Max. :314638
##
## MUN_EXPENDIT COMP_TOT COMP_A COMP_B
## Min. :1.421e+06 Min. : 6.0 Min. : 0.00 Min. : 0.000
## 1st Qu.:1.573e+07 1st Qu.: 68.0 1st Qu.: 1.00 1st Qu.: 0.000
## Median :2.746e+07 Median : 162.0 Median : 2.00 Median : 0.000
## Mean :1.043e+08 Mean : 906.8 Mean : 18.25 Mean : 1.852
## 3rd Qu.:5.672e+07 3rd Qu.: 448.0 3rd Qu.: 8.00 3rd Qu.: 2.000
## Max. :4.577e+10 Max. :530446.0 Max. :1948.00 Max. :274.000
## NA's :1491
## COMP_C COMP_D COMP_E COMP_F
## Min. : 0.00 Min. : 0.0000 Min. : 0.000 Min. : 0.00
## 1st Qu.: 3.00 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 1.00
## Median : 11.00 Median : 0.0000 Median : 0.000 Median : 4.00
## Mean : 73.44 Mean : 0.4262 Mean : 2.029 Mean : 43.26
## 3rd Qu.: 39.00 3rd Qu.: 0.0000 3rd Qu.: 1.000 3rd Qu.: 15.00
## Max. :31566.00 Max. :332.0000 Max. :657.000 Max. :25222.00
##
## COMP_G COMP_H COMP_I COMP_J
## Min. : 1.0 Min. : 0 Min. : 0.00 Min. : 0.00
## 1st Qu.: 32.0 1st Qu.: 1 1st Qu.: 2.00 1st Qu.: 0.00
## Median : 74.5 Median : 7 Median : 7.00 Median : 1.00
## Mean : 348.0 Mean : 41 Mean : 55.88 Mean : 24.74
## 3rd Qu.: 199.0 3rd Qu.: 25 3rd Qu.: 24.00 3rd Qu.: 5.00
## Max. :150633.0 Max. :19515 Max. :29290.00 Max. :38720.00
##
## COMP_K COMP_L COMP_M COMP_N
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.0
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 1.00 1st Qu.: 1.0
## Median : 0.00 Median : 0.00 Median : 4.00 Median : 4.0
## Mean : 15.55 Mean : 15.14 Mean : 51.29 Mean : 83.7
## 3rd Qu.: 2.00 3rd Qu.: 3.00 3rd Qu.: 13.00 3rd Qu.: 14.0
## Max. :23738.00 Max. :14003.00 Max. :49181.00 Max. :76757.0
##
## COMP_O COMP_P COMP_Q COMP_R
## Min. : 0.000 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 2.000 1st Qu.: 2.00 1st Qu.: 1.00 1st Qu.: 0.00
## Median : 2.000 Median : 6.00 Median : 3.00 Median : 2.00
## Mean : 3.269 Mean : 30.96 Mean : 34.15 Mean : 12.18
## 3rd Qu.: 3.000 3rd Qu.: 17.00 3rd Qu.: 12.00 3rd Qu.: 6.00
## Max. :204.000 Max. :16030.00 Max. :22248.00 Max. :6687.00
##
## COMP_S COMP_T COMP_U HOTELS
## Min. : 0.00 Min. :0 Min. : 0.00000 Min. : 1.000
## 1st Qu.: 5.00 1st Qu.:0 1st Qu.: 0.00000 1st Qu.: 1.000
## Median : 12.00 Median :0 Median : 0.00000 Median : 1.000
## Mean : 51.61 Mean :0 Mean : 0.05027 Mean : 3.131
## 3rd Qu.: 31.00 3rd Qu.:0 3rd Qu.: 0.00000 3rd Qu.: 3.000
## Max. :24832.00 Max. :0 Max. :123.00000 Max. :97.000
## NA's :4683
## BEDS Pr_Agencies Pu_Agencies Pr_Bank
## Min. : 2.0 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 40.0 1st Qu.: 0.000 1st Qu.: 1.000 1st Qu.: 0.000
## Median : 82.0 Median : 1.000 Median : 2.000 Median : 1.000
## Mean : 257.5 Mean : 3.383 Mean : 2.829 Mean : 1.312
## 3rd Qu.: 200.0 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.: 2.000
## Max. :13247.0 Max. :1693.000 Max. :626.000 Max. :83.000
## NA's :4683 NA's :2228 NA's :2228 NA's :2228
## Pu_Bank Pr_Assets Pu_Assets Cars
## Min. :0.00 Min. :0.000e+00 Min. :0.000e+00 Min. : 2
## 1st Qu.:1.00 1st Qu.:0.000e+00 1st Qu.:4.047e+07 1st Qu.: 602
## Median :2.00 Median :3.231e+07 Median :1.339e+08 Median : 1440
## Mean :1.58 Mean :9.187e+09 Mean :6.005e+09 Mean : 9861
## 3rd Qu.:2.00 3rd Qu.:1.148e+08 3rd Qu.:4.970e+08 3rd Qu.: 4086
## Max. :8.00 Max. :1.950e+13 Max. :8.020e+12 Max. :5740995
## NA's :2228 NA's :2228 NA's :2228 NA's :9
## Motorcycles Wheeled_tractor UBER MAC
## Min. : 4 Min. : 0.000 1 : 125 Min. : 1.000
## 1st Qu.: 591 1st Qu.: 0.000 NA's:5445 1st Qu.: 1.000
## Median : 1285 Median : 0.000 Median : 2.000
## Mean : 4879 Mean : 5.755 Mean : 4.277
## 3rd Qu.: 3295 3rd Qu.: 1.000 3rd Qu.: 3.000
## Max. :1134570 Max. :3236.000 Max. :130.000
## NA's :9 NA's :9 NA's :5404
## WAL-MART POST_OFFICES
## Min. : 1.000 Min. : 1.000
## 1st Qu.: 1.000 1st Qu.: 1.000
## Median : 1.000 Median : 1.000
## Mean : 2.059 Mean : 2.081
## 3rd Qu.: 1.750 3rd Qu.: 2.000
## Max. :26.000 Max. :225.000
## NA's :5468 NA's :117
Removing LAGOA DOS PATOS municipality so that it will not affect the later analysis. This is to eliminate the possibility of other variables would have influence to the GDP per Capita.
head(municipality[order(-municipality$GDP_CAPITA) ,], 10)
## # A tibble: 10 x 81
## CITY STATE CAPITAL IBGE_RES_POP IBGE_RES_POP_BR~ IBGE_RES_POP_ES~ IBGE_DU
## <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 PAUL~ SP 0 82146 81967 179 24311
## 2 SELV~ MS 0 6287 6287 0 2003
## 3 SÃO ~ BA 0 33183 33183 0 9503
## 4 TRIU~ RS 0 25793 25787 6 8635
## 5 BREJ~ SP 0 2573 2565 8 822
## 6 SEBA~ SP 0 3031 3031 0 1055
## 7 LOUV~ SP 0 37125 37062 63 11056
## 8 CAMP~ MT 0 5154 5147 7 1409
## 9 MERI~ SP 0 3855 3852 3 1262
## 10 EXTR~ MG 0 28599 28543 56 9057
## # ... with 74 more variables: IBGE_DU_URBAN <dbl>, IBGE_DU_RURAL <dbl>,
## # IBGE_POP <dbl>, IBGE_1 <dbl>, `IBGE_1-4` <dbl>, `IBGE_5-9` <dbl>,
## # `IBGE_10-14` <dbl>, `IBGE_15-59` <dbl>, `IBGE_60+` <dbl>,
## # IBGE_PLANTED_AREA <dbl>, `IBGE_CROP_PRODUCTION_$` <dbl>, `IDHM Ranking
## # 2010` <dbl>, IDHM <dbl>, IDHM_Renda <dbl>, IDHM_Longevidade <dbl>,
## # IDHM_Educacao <dbl>, LONG <dbl>, LAT <dbl>, ALT <dbl>, PAY_TV <dbl>,
## # FIXED_PHONES <dbl>, AREA <dbl>, REGIAO_TUR <fct>, CATEGORIA_TUR <fct>,
## # ESTIMATED_POP <dbl>, RURAL_URBAN <chr>, GVA_AGROPEC <dbl>,
## # GVA_INDUSTRY <dbl>, GVA_SERVICES <dbl>, GVA_PUBLIC <dbl>, GVA_TOTAL <dbl>,
## # TAXES <dbl>, GDP <dbl>, POP_GDP <dbl>, GDP_CAPITA <dbl>, GVA_MAIN <chr>,
## # MUN_EXPENDIT <dbl>, COMP_TOT <dbl>, COMP_A <dbl>, COMP_B <dbl>,
## # COMP_C <dbl>, COMP_D <dbl>, COMP_E <dbl>, COMP_F <dbl>, COMP_G <dbl>,
## # COMP_H <dbl>, COMP_I <dbl>, COMP_J <dbl>, COMP_K <dbl>, COMP_L <dbl>,
## # COMP_M <dbl>, COMP_N <dbl>, COMP_O <dbl>, COMP_P <dbl>, COMP_Q <dbl>,
## # COMP_R <dbl>, COMP_S <dbl>, COMP_T <dbl>, COMP_U <dbl>, HOTELS <dbl>,
## # BEDS <dbl>, Pr_Agencies <dbl>, Pu_Agencies <dbl>, Pr_Bank <dbl>,
## # Pu_Bank <dbl>, Pr_Assets <dbl>, Pu_Assets <dbl>, Cars <dbl>,
## # Motorcycles <dbl>, Wheeled_tractor <dbl>, UBER <fct>, MAC <dbl>,
## # `WAL-MART` <dbl>, POST_OFFICES <dbl>
PAULÍNIA with R$314637.7
head(municipality[order(municipality$GDP_CAPITA) ,], 10)
## # A tibble: 10 x 81
## CITY STATE CAPITAL IBGE_RES_POP IBGE_RES_POP_BR~ IBGE_RES_POP_ES~ IBGE_DU
## <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 NOVO~ BA 0 15051 15051 0 3649
## 2 NINA~ MA 0 12464 12464 0 2541
## 3 PENA~ MA 0 34267 34259 8 7901
## 4 IPIX~ AM 0 22254 22254 0 3478
## 5 PIRE~ CE 0 10216 10216 0 2790
## 6 SANT~ MA 0 11661 11661 0 2500
## 7 CAJA~ MA 0 10593 10593 0 2594
## 8 SATU~ MA 0 11990 11990 0 2508
## 9 MATÕ~ MA 0 13794 13794 0 2540
## 10 CAET~ BA 0 13639 13639 0 3210
## # ... with 74 more variables: IBGE_DU_URBAN <dbl>, IBGE_DU_RURAL <dbl>,
## # IBGE_POP <dbl>, IBGE_1 <dbl>, `IBGE_1-4` <dbl>, `IBGE_5-9` <dbl>,
## # `IBGE_10-14` <dbl>, `IBGE_15-59` <dbl>, `IBGE_60+` <dbl>,
## # IBGE_PLANTED_AREA <dbl>, `IBGE_CROP_PRODUCTION_$` <dbl>, `IDHM Ranking
## # 2010` <dbl>, IDHM <dbl>, IDHM_Renda <dbl>, IDHM_Longevidade <dbl>,
## # IDHM_Educacao <dbl>, LONG <dbl>, LAT <dbl>, ALT <dbl>, PAY_TV <dbl>,
## # FIXED_PHONES <dbl>, AREA <dbl>, REGIAO_TUR <fct>, CATEGORIA_TUR <fct>,
## # ESTIMATED_POP <dbl>, RURAL_URBAN <chr>, GVA_AGROPEC <dbl>,
## # GVA_INDUSTRY <dbl>, GVA_SERVICES <dbl>, GVA_PUBLIC <dbl>, GVA_TOTAL <dbl>,
## # TAXES <dbl>, GDP <dbl>, POP_GDP <dbl>, GDP_CAPITA <dbl>, GVA_MAIN <chr>,
## # MUN_EXPENDIT <dbl>, COMP_TOT <dbl>, COMP_A <dbl>, COMP_B <dbl>,
## # COMP_C <dbl>, COMP_D <dbl>, COMP_E <dbl>, COMP_F <dbl>, COMP_G <dbl>,
## # COMP_H <dbl>, COMP_I <dbl>, COMP_J <dbl>, COMP_K <dbl>, COMP_L <dbl>,
## # COMP_M <dbl>, COMP_N <dbl>, COMP_O <dbl>, COMP_P <dbl>, COMP_Q <dbl>,
## # COMP_R <dbl>, COMP_S <dbl>, COMP_T <dbl>, COMP_U <dbl>, HOTELS <dbl>,
## # BEDS <dbl>, Pr_Agencies <dbl>, Pu_Agencies <dbl>, Pr_Bank <dbl>,
## # Pu_Bank <dbl>, Pr_Assets <dbl>, Pu_Assets <dbl>, Cars <dbl>,
## # Motorcycles <dbl>, Wheeled_tractor <dbl>, UBER <fct>, MAC <dbl>,
## # `WAL-MART` <dbl>, POST_OFFICES <dbl>
NOVO TRIUNFO with R$3190.57
municipality %>%
filter(is.na(`IBGE_15-59`))
## # A tibble: 5 x 81
## CITY STATE CAPITAL IBGE_RES_POP IBGE_RES_POP_BR~ IBGE_RES_POP_ES~ IBGE_DU
## <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 BALN~ SC 0 NA NA NA NA
## 2 MOJU~ PA 0 NA NA NA NA
## 3 PARA~ MS 0 NA NA NA NA
## 4 PESC~ SC 0 NA NA NA NA
## 5 PINT~ RS 0 NA NA NA NA
## # ... with 74 more variables: IBGE_DU_URBAN <dbl>, IBGE_DU_RURAL <dbl>,
## # IBGE_POP <dbl>, IBGE_1 <dbl>, `IBGE_1-4` <dbl>, `IBGE_5-9` <dbl>,
## # `IBGE_10-14` <dbl>, `IBGE_15-59` <dbl>, `IBGE_60+` <dbl>,
## # IBGE_PLANTED_AREA <dbl>, `IBGE_CROP_PRODUCTION_$` <dbl>, `IDHM Ranking
## # 2010` <dbl>, IDHM <dbl>, IDHM_Renda <dbl>, IDHM_Longevidade <dbl>,
## # IDHM_Educacao <dbl>, LONG <dbl>, LAT <dbl>, ALT <dbl>, PAY_TV <dbl>,
## # FIXED_PHONES <dbl>, AREA <dbl>, REGIAO_TUR <fct>, CATEGORIA_TUR <fct>,
## # ESTIMATED_POP <dbl>, RURAL_URBAN <chr>, GVA_AGROPEC <dbl>,
## # GVA_INDUSTRY <dbl>, GVA_SERVICES <dbl>, GVA_PUBLIC <dbl>, GVA_TOTAL <dbl>,
## # TAXES <dbl>, GDP <dbl>, POP_GDP <dbl>, GDP_CAPITA <dbl>, GVA_MAIN <chr>,
## # MUN_EXPENDIT <dbl>, COMP_TOT <dbl>, COMP_A <dbl>, COMP_B <dbl>,
## # COMP_C <dbl>, COMP_D <dbl>, COMP_E <dbl>, COMP_F <dbl>, COMP_G <dbl>,
## # COMP_H <dbl>, COMP_I <dbl>, COMP_J <dbl>, COMP_K <dbl>, COMP_L <dbl>,
## # COMP_M <dbl>, COMP_N <dbl>, COMP_O <dbl>, COMP_P <dbl>, COMP_Q <dbl>,
## # COMP_R <dbl>, COMP_S <dbl>, COMP_T <dbl>, COMP_U <dbl>, HOTELS <dbl>,
## # BEDS <dbl>, Pr_Agencies <dbl>, Pu_Agencies <dbl>, Pr_Bank <dbl>,
## # Pu_Bank <dbl>, Pr_Assets <dbl>, Pu_Assets <dbl>, Cars <dbl>,
## # Motorcycles <dbl>, Wheeled_tractor <dbl>, UBER <fct>, MAC <dbl>,
## # `WAL-MART` <dbl>, POST_OFFICES <dbl>
It seems that these 5 municipality do have the various age groups especially IBGE_15-59 which is an age group that should be contributing actively in the economy. However, these 5 municipality do have ESTIMATED_POP, IBGE_CROP_PRODUCTION_$, and GDP. It seems that the data isn’t accurate so I decided to remove for accurate analysis.
municipality <- municipality %>%
filter(!is.na(`IBGE_15-59`))
summary(municipality)
## CITY STATE CAPITAL IBGE_RES_POP
## BOM JESUS : 5 MG : 853 0:5538 Min. : 805
## SÃO DOMINGOS: 5 SP : 645 1: 27 1st Qu.: 5235
## BONITO : 4 RS : 496 Median : 10934
## PLANALTO : 4 BA : 417 Mean : 34278
## SANTA HELENA: 4 PR : 399 3rd Qu.: 23424
## SANTA INÊS : 4 SC : 293 Max. :11253503
## (Other) :5539 (Other):2462
## IBGE_RES_POP_BRAS IBGE_RES_POP_ESTR IBGE_DU IBGE_DU_URBAN
## Min. : 805 Min. : 0.0 Min. : 239 Min. : 60
## 1st Qu.: 5230 1st Qu.: 0.0 1st Qu.: 1572 1st Qu.: 874
## Median : 10926 Median : 0.0 Median : 3174 Median : 1846
## Mean : 34200 Mean : 77.5 Mean : 10303 Mean : 8859
## 3rd Qu.: 23390 3rd Qu.: 10.0 3rd Qu.: 6726 3rd Qu.: 4624
## Max. :11133776 Max. :119727.0 Max. :3576148 Max. :3548433
## NA's :2 NA's :2
## IBGE_DU_RURAL IBGE_POP IBGE_1 IBGE_1-4
## Min. : 3 Min. : 174 Min. : 0.0 Min. : 5
## 1st Qu.: 487 1st Qu.: 2801 1st Qu.: 38.0 1st Qu.: 158
## Median : 931 Median : 6170 Median : 92.0 Median : 376
## Mean : 1463 Mean : 27595 Mean : 383.3 Mean : 1544
## 3rd Qu.: 1832 3rd Qu.: 15302 3rd Qu.: 232.0 3rd Qu.: 951
## Max. :33809 Max. :10463636 Max. :129464.0 Max. :514794
## NA's :73
## IBGE_5-9 IBGE_10-14 IBGE_15-59 IBGE_60+
## Min. : 7 Min. : 12 Min. : 94 Min. : 29
## 1st Qu.: 220 1st Qu.: 259 1st Qu.: 1734 1st Qu.: 341
## Median : 516 Median : 588 Median : 3841 Median : 722
## Mean : 2069 Mean : 2381 Mean : 18212 Mean : 3004
## 3rd Qu.: 1300 3rd Qu.: 1478 3rd Qu.: 9628 3rd Qu.: 1724
## Max. :684443 Max. :783702 Max. :7058221 Max. :1293012
##
## IBGE_PLANTED_AREA IBGE_CROP_PRODUCTION_$ IDHM Ranking 2010 IDHM
## Min. : 0 Min. : 0 Min. : 1 Min. :0.4180
## 1st Qu.: 911 1st Qu.: 2327 1st Qu.:1392 1st Qu.:0.5990
## Median : 3473 Median : 13845 Median :2782 Median :0.6650
## Mean : 14170 Mean : 57351 Mean :2783 Mean :0.6592
## 3rd Qu.: 11174 3rd Qu.: 55550 3rd Qu.:4173 3rd Qu.:0.7180
## Max. :1205669 Max. :3274885 Max. :5565 Max. :0.8620
## NA's :1
## IDHM_Renda IDHM_Longevidade IDHM_Educacao LONG
## Min. :0.4000 Min. :0.1870 Min. :0.2070 Min. :-72.92
## 1st Qu.:0.5720 1st Qu.:0.7690 1st Qu.:0.4900 1st Qu.:-50.87
## Median :0.6540 Median :0.8080 Median :0.5600 Median :-46.52
## Mean :0.6429 Mean :0.8015 Mean :0.5591 Mean :-46.23
## 3rd Qu.:0.7070 3rd Qu.:0.8360 3rd Qu.:0.6310 3rd Qu.:-41.41
## Max. :0.8910 Max. :0.8940 Max. :0.8250 Max. :-32.44
##
## LAT ALT PAY_TV FIXED_PHONES
## Min. :-33.688 Min. : 0.0 Min. : 1 Min. : 3
## 1st Qu.:-22.839 1st Qu.: 169.7 1st Qu.: 88 1st Qu.: 119
## Median :-18.090 Median : 406.5 Median : 247 Median : 328
## Mean :-16.446 Mean : 894.0 Mean : 3097 Mean : 6574
## 3rd Qu.: -8.490 3rd Qu.: 629.0 3rd Qu.: 816 3rd Qu.: 1151
## Max. : 4.585 Max. :874579.0 Max. :2047668 Max. :5543127
## NA's :2 NA's :1 NA's :1
## AREA REGIAO_TUR CATEGORIA_TUR
## Min. : 3.57 Corredores Das Águas: 59 A : 51
## 1st Qu.: 204.46 Vale Do Contestado : 45 B : 168
## Median : 415.92 Amazônia Atlântica : 40 C : 521
## Mean : 1515.13 Araguaia-Tocantins : 39 D :1891
## 3rd Qu.: 1025.52 Cariri : 37 E : 648
## Max. :159533.33 (Other) :3059 NA's:2286
## NA's :2286
## ESTIMATED_POP RURAL_URBAN GVA_AGROPEC GVA_INDUSTRY
## Min. : 786 Length:5565 Min. : 0 Min. : 1
## 1st Qu.: 5453 Class :character 1st Qu.: 4193 1st Qu.: 1725
## Median : 11591 Mode :character Median : 20430 Median : 7425
## Mean : 37462 Mean : 47263 Mean : 176049
## 3rd Qu.: 25306 3rd Qu.: 51238 3rd Qu.: 41011
## Max. :12176866 Max. :1402282 Max. :63306755
## NA's :1
## GVA_SERVICES GVA_PUBLIC GVA_TOTAL TAXES
## Min. : 2 Min. : 7 Min. : 17 Min. : -14159
## 1st Qu.: 10113 1st Qu.: 17260 1st Qu.: 42254 1st Qu.: 1303
## Median : 31212 Median : 35809 Median : 119481 Median : 5107
## Mean : 489855 Mean : 123844 Mean : 833592 Mean : 118962
## 3rd Qu.: 115521 3rd Qu.: 89316 3rd Qu.: 313988 3rd Qu.: 22209
## Max. :464656988 Max. :41902893 Max. :569910503 Max. :117125387
##
## GDP POP_GDP GDP_CAPITA GVA_MAIN
## Min. : 15 Min. : 815 Min. : 3191 Length:5565
## 1st Qu.: 43706 1st Qu.: 5488 1st Qu.: 9062 Class :character
## Median : 125111 Median : 11584 Median : 15866 Mode :character
## Mean : 955266 Mean : 37023 Mean : 21119
## 3rd Qu.: 329717 3rd Qu.: 25102 3rd Qu.: 26155
## Max. :687035890 Max. :12038175 Max. :314638
##
## MUN_EXPENDIT COMP_TOT COMP_A COMP_B
## Min. :1.421e+06 Min. : 6.0 Min. : 0.00 Min. : 0.000
## 1st Qu.:1.574e+07 1st Qu.: 68.0 1st Qu.: 1.00 1st Qu.: 0.000
## Median :2.746e+07 Median : 162.0 Median : 2.00 Median : 0.000
## Mean :1.044e+08 Mean : 907.5 Mean : 18.27 Mean : 1.853
## 3rd Qu.:5.679e+07 3rd Qu.: 449.0 3rd Qu.: 8.00 3rd Qu.: 2.000
## Max. :4.577e+10 Max. :530446.0 Max. :1948.00 Max. :274.000
## NA's :1490
## COMP_C COMP_D COMP_E COMP_F
## Min. : 0.0 Min. : 0.0000 Min. : 0.00 Min. : 0.00
## 1st Qu.: 3.0 1st Qu.: 0.0000 1st Qu.: 0.00 1st Qu.: 1.00
## Median : 11.0 Median : 0.0000 Median : 0.00 Median : 4.00
## Mean : 73.5 Mean : 0.4264 Mean : 2.03 Mean : 43.29
## 3rd Qu.: 39.0 3rd Qu.: 0.0000 3rd Qu.: 1.00 3rd Qu.: 15.00
## Max. :31566.0 Max. :332.0000 Max. :657.00 Max. :25222.00
##
## COMP_G COMP_H COMP_I COMP_J
## Min. : 1.0 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 32.0 1st Qu.: 1.00 1st Qu.: 2.00 1st Qu.: 0.00
## Median : 75.0 Median : 7.00 Median : 7.00 Median : 1.00
## Mean : 348.2 Mean : 41.02 Mean : 55.92 Mean : 24.76
## 3rd Qu.: 200.0 3rd Qu.: 25.00 3rd Qu.: 24.00 3rd Qu.: 5.00
## Max. :150633.0 Max. :19515.00 Max. :29290.00 Max. :38720.00
##
## COMP_K COMP_L COMP_M COMP_N
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 1.00 1st Qu.: 1.00
## Median : 0.00 Median : 0.00 Median : 4.00 Median : 4.00
## Mean : 15.56 Mean : 15.15 Mean : 51.34 Mean : 83.77
## 3rd Qu.: 2.00 3rd Qu.: 3.00 3rd Qu.: 13.00 3rd Qu.: 14.00
## Max. :23738.00 Max. :14003.00 Max. :49181.00 Max. :76757.00
##
## COMP_O COMP_P COMP_Q COMP_R
## Min. : 1.000 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 2.000 1st Qu.: 2.00 1st Qu.: 1.00 1st Qu.: 0.00
## Median : 2.000 Median : 6.00 Median : 3.00 Median : 2.00
## Mean : 3.271 Mean : 30.98 Mean : 34.18 Mean : 12.19
## 3rd Qu.: 3.000 3rd Qu.: 17.00 3rd Qu.: 12.00 3rd Qu.: 6.00
## Max. :204.000 Max. :16030.00 Max. :22248.00 Max. :6687.00
##
## COMP_S COMP_T COMP_U HOTELS
## Min. : 0.00 Min. :0 Min. : 0.00000 Min. : 1.000
## 1st Qu.: 5.00 1st Qu.:0 1st Qu.: 0.00000 1st Qu.: 1.000
## Median : 12.00 Median :0 Median : 0.00000 Median : 1.000
## Mean : 51.65 Mean :0 Mean : 0.05031 Mean : 3.131
## 3rd Qu.: 31.00 3rd Qu.:0 3rd Qu.: 0.00000 3rd Qu.: 3.000
## Max. :24832.00 Max. :0 Max. :123.00000 Max. :97.000
## NA's :4678
## BEDS Pr_Agencies Pu_Agencies Pr_Bank
## Min. : 2.0 Min. : 0.000 Min. : 0.00 Min. : 0.000
## 1st Qu.: 40.0 1st Qu.: 0.000 1st Qu.: 1.00 1st Qu.: 0.000
## Median : 82.0 Median : 1.000 Median : 2.00 Median : 1.000
## Mean : 257.5 Mean : 3.384 Mean : 2.83 Mean : 1.312
## 3rd Qu.: 200.0 3rd Qu.: 2.000 3rd Qu.: 2.00 3rd Qu.: 2.000
## Max. :13247.0 Max. :1693.000 Max. :626.00 Max. :83.000
## NA's :4678 NA's :2224 NA's :2224 NA's :2224
## Pu_Bank Pr_Assets Pu_Assets Cars
## Min. :0.00 Min. :0.000e+00 Min. :0.000e+00 Min. : 2
## 1st Qu.:1.00 1st Qu.:0.000e+00 1st Qu.:4.048e+07 1st Qu.: 602
## Median :2.00 Median :3.234e+07 Median :1.339e+08 Median : 1440
## Mean :1.58 Mean :9.190e+09 Mean :6.007e+09 Mean : 9869
## 3rd Qu.:2.00 3rd Qu.:1.149e+08 3rd Qu.:4.976e+08 3rd Qu.: 4091
## Max. :8.00 Max. :1.950e+13 Max. :8.020e+12 Max. :5740995
## NA's :2224 NA's :2224 NA's :2224 NA's :9
## Motorcycles Wheeled_tractor UBER MAC
## Min. : 4 Min. : 0.000 1 : 125 Min. : 1.000
## 1st Qu.: 591 1st Qu.: 0.000 NA's:5440 1st Qu.: 1.000
## Median : 1286 Median : 0.000 Median : 2.000
## Mean : 4883 Mean : 5.759 Mean : 4.277
## 3rd Qu.: 3299 3rd Qu.: 1.000 3rd Qu.: 3.000
## Max. :1134570 Max. :3236.000 Max. :130.000
## NA's :9 NA's :9 NA's :5399
## WAL-MART POST_OFFICES
## Min. : 1.000 Min. : 1.000
## 1st Qu.: 1.000 1st Qu.: 1.000
## Median : 1.000 Median : 1.000
## Mean : 2.059 Mean : 2.081
## 3rd Qu.: 1.750 3rd Qu.: 2.000
## Max. :26.000 Max. :225.000
## NA's :5463 NA's :117
municipality <- municipality %>%
filter(!is.na(`Cars`))
summary(municipality)
## CITY STATE CAPITAL IBGE_RES_POP
## BOM JESUS : 5 MG : 852 0:5529 Min. : 805
## BONITO : 4 SP : 645 1: 27 1st Qu.: 5238
## PLANALTO : 4 RS : 496 Median : 10942
## SANTA HELENA: 4 BA : 416 Mean : 34312
## SANTA INÊS : 4 PR : 398 3rd Qu.: 23540
## SANTA LUZIA : 4 SC : 293 Max. :11253503
## (Other) :5531 (Other):2456
## IBGE_RES_POP_BRAS IBGE_RES_POP_ESTR IBGE_DU IBGE_DU_URBAN
## Min. : 805 Min. : 0.00 Min. : 239 Min. : 60
## 1st Qu.: 5232 1st Qu.: 0.00 1st Qu.: 1574 1st Qu.: 874
## Median : 10937 Median : 0.00 Median : 3178 Median : 1851
## Mean : 34234 Mean : 77.63 Mean : 10313 Mean : 8869
## 3rd Qu.: 23421 3rd Qu.: 10.00 3rd Qu.: 6727 3rd Qu.: 4626
## Max. :11133776 Max. :119727.00 Max. :3576148 Max. :3548433
## NA's :2 NA's :2
## IBGE_DU_RURAL IBGE_POP IBGE_1 IBGE_1-4
## Min. : 3 Min. : 174 Min. : 0.0 Min. : 5.0
## 1st Qu.: 487 1st Qu.: 2803 1st Qu.: 38.0 1st Qu.: 158.0
## Median : 931 Median : 6178 Median : 92.0 Median : 377.0
## Mean : 1463 Mean : 27625 Mean : 383.7 Mean : 1546.1
## 3rd Qu.: 1833 3rd Qu.: 15303 3rd Qu.: 232.0 3rd Qu.: 951.2
## Max. :33809 Max. :10463636 Max. :129464.0 Max. :514794.0
## NA's :73
## IBGE_5-9 IBGE_10-14 IBGE_15-59 IBGE_60+
## Min. : 7.0 Min. : 12 Min. : 94 Min. : 29.0
## 1st Qu.: 220.0 1st Qu.: 259 1st Qu.: 1735 1st Qu.: 341.0
## Median : 516.5 Median : 589 Median : 3845 Median : 723.5
## Mean : 2071.5 Mean : 2384 Mean : 18232 Mean : 3007.6
## 3rd Qu.: 1300.2 3rd Qu.: 1478 3rd Qu.: 9629 3rd Qu.: 1725.2
## Max. :684443.0 Max. :783702 Max. :7058221 Max. :1293012.0
##
## IBGE_PLANTED_AREA IBGE_CROP_PRODUCTION_$ IDHM Ranking 2010 IDHM
## Min. : 0.0 Min. : 0 Min. : 1 Min. :0.4180
## 1st Qu.: 912.8 1st Qu.: 2332 1st Qu.:1390 1st Qu.:0.5990
## Median : 3477.5 Median : 13859 Median :2780 Median :0.6650
## Mean : 14163.8 Mean : 57323 Mean :2782 Mean :0.6592
## 3rd Qu.: 11180.8 3rd Qu.: 55612 3rd Qu.:4173 3rd Qu.:0.7200
## Max. :1205669.0 Max. :3274885 Max. :5565 Max. :0.8620
##
## IDHM_Renda IDHM_Longevidade IDHM_Educacao LONG
## Min. :0.4000 Min. :0.6720 Min. :0.2070 Min. :-72.92
## 1st Qu.:0.5720 1st Qu.:0.7690 1st Qu.:0.4900 1st Qu.:-50.87
## Median :0.6540 Median :0.8080 Median :0.5600 Median :-46.52
## Mean :0.6429 Mean :0.8016 Mean :0.5592 Mean :-46.23
## 3rd Qu.:0.7070 3rd Qu.:0.8360 3rd Qu.:0.6310 3rd Qu.:-41.42
## Max. :0.8910 Max. :0.8940 Max. :0.8250 Max. :-32.44
##
## LAT ALT PAY_TV FIXED_PHONES
## Min. :-33.688 Min. : 0.0 Min. : 1.0 Min. : 3
## 1st Qu.:-22.841 1st Qu.: 170.1 1st Qu.: 88.0 1st Qu.: 119
## Median :-18.094 Median : 406.6 Median : 247.5 Median : 328
## Mean :-16.453 Mean : 894.8 Mean : 3100.8 Mean : 6582
## 3rd Qu.: -8.498 3rd Qu.: 629.2 3rd Qu.: 816.2 3rd Qu.: 1152
## Max. : 4.585 Max. :874579.0 Max. :2047668.0 Max. :5543127
## NA's :1
## AREA REGIAO_TUR CATEGORIA_TUR
## Min. : 3.57 Corredores Das Águas: 59 A : 51
## 1st Qu.: 204.64 Vale Do Contestado : 45 B : 168
## Median : 415.87 Amazônia Atlântica : 40 C : 521
## Mean : 1516.24 Araguaia-Tocantins : 39 D :1890
## 3rd Qu.: 1025.17 Cariri : 37 E : 648
## Max. :159533.33 (Other) :3058 NA's:2278
## NA's :2278
## ESTIMATED_POP RURAL_URBAN GVA_AGROPEC GVA_INDUSTRY
## Min. : 786 Length:5556 Min. : 0 Min. : 1
## 1st Qu.: 5455 Class :character 1st Qu.: 4193 1st Qu.: 1728
## Median : 11596 Mode :character Median : 20436 Median : 7442
## Mean : 37494 Mean : 47265 Mean : 176301
## 3rd Qu.: 25315 3rd Qu.: 51243 3rd Qu.: 41015
## Max. :12176866 Max. :1402282 Max. :63306755
##
## GVA_SERVICES GVA_PUBLIC GVA_TOTAL TAXES
## Min. : 2 Min. : 7 Min. : 17 Min. : -14159
## 1st Qu.: 10110 1st Qu.: 17258 1st Qu.: 42447 1st Qu.: 1302
## Median : 31224 Median : 35837 Median : 119676 Median : 5108
## Mean : 490521 Mean : 123952 Mean : 834753 Mean : 119131
## 3rd Qu.: 115552 3rd Qu.: 89328 3rd Qu.: 314235 3rd Qu.: 22219
## Max. :464656988 Max. :41902893 Max. :569910503 Max. :117125387
##
## GDP POP_GDP GDP_CAPITA GVA_MAIN
## Min. : 15 Min. : 815 Min. : 3191 Length:5556
## 1st Qu.: 43691 1st Qu.: 5492 1st Qu.: 9064 Class :character
## Median : 125473 Median : 11586 Median : 15877 Mode :character
## Mean : 956593 Mean : 37060 Mean : 21131
## 3rd Qu.: 329899 3rd Qu.: 25115 3rd Qu.: 26159
## Max. :687035890 Max. :12038175 Max. :314638
##
## MUN_EXPENDIT COMP_TOT COMP_A COMP_B
## Min. :1.421e+06 Min. : 6.0 Min. : 0.00 Min. : 0.000
## 1st Qu.:1.575e+07 1st Qu.: 68.0 1st Qu.: 1.00 1st Qu.: 0.000
## Median :2.747e+07 Median : 163.0 Median : 2.00 Median : 0.000
## Mean :1.045e+08 Mean : 908.6 Mean : 18.29 Mean : 1.855
## 3rd Qu.:5.675e+07 3rd Qu.: 449.2 3rd Qu.: 8.00 3rd Qu.: 2.000
## Max. :4.577e+10 Max. :530446.0 Max. :1948.00 Max. :274.000
## NA's :1486
## COMP_C COMP_D COMP_E COMP_F
## Min. : 0.00 Min. : 0.0000 Min. : 0.000 Min. : 0.00
## 1st Qu.: 3.00 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 1.00
## Median : 11.00 Median : 0.0000 Median : 0.000 Median : 4.00
## Mean : 73.60 Mean : 0.4271 Mean : 2.031 Mean : 43.34
## 3rd Qu.: 39.25 3rd Qu.: 0.0000 3rd Qu.: 1.000 3rd Qu.: 15.00
## Max. :31566.00 Max. :332.0000 Max. :657.000 Max. :25222.00
##
## COMP_G COMP_H COMP_I COMP_J
## Min. : 1.0 Min. : 0.00 Min. : 0.00 Min. : 0.0
## 1st Qu.: 32.0 1st Qu.: 1.00 1st Qu.: 2.00 1st Qu.: 0.0
## Median : 75.0 Median : 7.00 Median : 7.00 Median : 1.0
## Mean : 348.6 Mean : 41.07 Mean : 55.99 Mean : 24.8
## 3rd Qu.: 200.0 3rd Qu.: 25.00 3rd Qu.: 24.00 3rd Qu.: 5.0
## Max. :150633.0 Max. :19515.00 Max. :29290.00 Max. :38720.0
##
## COMP_K COMP_L COMP_M COMP_N
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 1.00 1st Qu.: 1.00
## Median : 0.00 Median : 0.00 Median : 4.00 Median : 4.00
## Mean : 15.59 Mean : 15.18 Mean : 51.41 Mean : 83.89
## 3rd Qu.: 2.00 3rd Qu.: 3.00 3rd Qu.: 13.00 3rd Qu.: 14.00
## Max. :23738.00 Max. :14003.00 Max. :49181.00 Max. :76757.00
##
## COMP_O COMP_P COMP_Q COMP_R
## Min. : 1.000 Min. : 0.00 Min. : 0.00 Min. : 0.0
## 1st Qu.: 2.000 1st Qu.: 2.00 1st Qu.: 1.00 1st Qu.: 0.0
## Median : 2.000 Median : 6.00 Median : 3.00 Median : 2.0
## Mean : 3.272 Mean : 31.01 Mean : 34.23 Mean : 12.2
## 3rd Qu.: 3.000 3rd Qu.: 17.00 3rd Qu.: 12.00 3rd Qu.: 6.0
## Max. :204.000 Max. :16030.00 Max. :22248.00 Max. :6687.0
##
## COMP_S COMP_T COMP_U HOTELS
## Min. : 0.00 Min. :0 Min. : 0.0000 Min. : 1.000
## 1st Qu.: 5.00 1st Qu.:0 1st Qu.: 0.0000 1st Qu.: 1.000
## Median : 12.00 Median :0 Median : 0.0000 Median : 1.000
## Mean : 51.71 Mean :0 Mean : 0.0504 Mean : 3.134
## 3rd Qu.: 31.00 3rd Qu.:0 3rd Qu.: 0.0000 3rd Qu.: 3.000
## Max. :24832.00 Max. :0 Max. :123.0000 Max. :97.000
## NA's :4671
## BEDS Pr_Agencies Pu_Agencies Pr_Bank
## Min. : 2.0 Min. : 0.000 Min. : 0.00 Min. : 0.000
## 1st Qu.: 40.0 1st Qu.: 0.000 1st Qu.: 1.00 1st Qu.: 0.000
## Median : 82.0 Median : 1.000 Median : 2.00 Median : 1.000
## Mean : 257.9 Mean : 3.386 Mean : 2.83 Mean : 1.313
## 3rd Qu.: 200.0 3rd Qu.: 2.000 3rd Qu.: 2.00 3rd Qu.: 2.000
## Max. :13247.0 Max. :1693.000 Max. :626.00 Max. :83.000
## NA's :4671 NA's :2218 NA's :2218 NA's :2218
## Pu_Bank Pr_Assets Pu_Assets Cars
## Min. :0.00 Min. :0.000e+00 Min. :0.000e+00 Min. : 2
## 1st Qu.:1.00 1st Qu.:0.000e+00 1st Qu.:4.047e+07 1st Qu.: 602
## Median :2.00 Median :3.231e+07 Median :1.339e+08 Median : 1440
## Mean :1.58 Mean :9.198e+09 Mean :6.012e+09 Mean : 9869
## 3rd Qu.:2.00 3rd Qu.:1.150e+08 3rd Qu.:4.950e+08 3rd Qu.: 4091
## Max. :8.00 Max. :1.950e+13 Max. :8.020e+12 Max. :5740995
## NA's :2218 NA's :2218 NA's :2218
## Motorcycles Wheeled_tractor UBER MAC
## Min. : 4 Min. : 0.000 1 : 125 Min. : 1.000
## 1st Qu.: 591 1st Qu.: 0.000 NA's:5431 1st Qu.: 1.000
## Median : 1286 Median : 0.000 Median : 2.000
## Mean : 4883 Mean : 5.759 Mean : 4.277
## 3rd Qu.: 3299 3rd Qu.: 1.000 3rd Qu.: 3.000
## Max. :1134570 Max. :3236.000 Max. :130.000
## NA's :5390
## WAL-MART POST_OFFICES
## Min. : 1.000 Min. : 1.000
## 1st Qu.: 1.000 1st Qu.: 1.000
## Median : 1.000 Median : 1.000
## Mean : 2.059 Mean : 2.081
## 3rd Qu.: 1.750 3rd Qu.: 2.000
## Max. :26.000 Max. :225.000
## NA's :5454 NA's :113
It seems that these 9 municipality have missing Cars Motorcyles and Tractor data. In order to have an accurate analysis on the various factors affecting the GDP per capita at the municipality level. I have decided to remove them from the analysis.
municipality <- municipality %>%
group_by(CITY, STATE) %>%
mutate(DENSITY = (`POP_GDP`/`AREA`))
head(municipality %>%
select(`CITY`, `STATE`, `DENSITY`), n=10)
## # A tibble: 10 x 3
## # Groups: CITY, STATE [10]
## CITY STATE DENSITY
## <fct> <fct> <dbl>
## 1 ABADIA DE GOIÁS GO 54.7
## 2 ABADIA DOS DOURADOS MG 7.99
## 3 ABADIÂNIA GO 17.6
## 4 ABAETÉ MG 13.0
## 5 ABAETETUBA PA 94.3
## 6 ABAIARA CE 63.8
## 7 ABAÍRA BA 17.1
## 8 ABARÉ BA 12.4
## 9 ABATIÁ PR 34.1
## 10 ABDON BATISTA SC 11.0
summary(municipality)
## CITY STATE CAPITAL IBGE_RES_POP
## BOM JESUS : 5 MG : 852 0:5529 Min. : 805
## BONITO : 4 SP : 645 1: 27 1st Qu.: 5238
## PLANALTO : 4 RS : 496 Median : 10942
## SANTA HELENA: 4 BA : 416 Mean : 34312
## SANTA INÊS : 4 PR : 398 3rd Qu.: 23540
## SANTA LUZIA : 4 SC : 293 Max. :11253503
## (Other) :5531 (Other):2456
## IBGE_RES_POP_BRAS IBGE_RES_POP_ESTR IBGE_DU IBGE_DU_URBAN
## Min. : 805 Min. : 0.00 Min. : 239 Min. : 60
## 1st Qu.: 5232 1st Qu.: 0.00 1st Qu.: 1574 1st Qu.: 874
## Median : 10937 Median : 0.00 Median : 3178 Median : 1851
## Mean : 34234 Mean : 77.63 Mean : 10313 Mean : 8869
## 3rd Qu.: 23421 3rd Qu.: 10.00 3rd Qu.: 6727 3rd Qu.: 4626
## Max. :11133776 Max. :119727.00 Max. :3576148 Max. :3548433
## NA's :2 NA's :2
## IBGE_DU_RURAL IBGE_POP IBGE_1 IBGE_1-4
## Min. : 3 Min. : 174 Min. : 0.0 Min. : 5.0
## 1st Qu.: 487 1st Qu.: 2803 1st Qu.: 38.0 1st Qu.: 158.0
## Median : 931 Median : 6178 Median : 92.0 Median : 377.0
## Mean : 1463 Mean : 27625 Mean : 383.7 Mean : 1546.1
## 3rd Qu.: 1833 3rd Qu.: 15303 3rd Qu.: 232.0 3rd Qu.: 951.2
## Max. :33809 Max. :10463636 Max. :129464.0 Max. :514794.0
## NA's :73
## IBGE_5-9 IBGE_10-14 IBGE_15-59 IBGE_60+
## Min. : 7.0 Min. : 12 Min. : 94 Min. : 29.0
## 1st Qu.: 220.0 1st Qu.: 259 1st Qu.: 1735 1st Qu.: 341.0
## Median : 516.5 Median : 589 Median : 3845 Median : 723.5
## Mean : 2071.5 Mean : 2384 Mean : 18232 Mean : 3007.6
## 3rd Qu.: 1300.2 3rd Qu.: 1478 3rd Qu.: 9629 3rd Qu.: 1725.2
## Max. :684443.0 Max. :783702 Max. :7058221 Max. :1293012.0
##
## IBGE_PLANTED_AREA IBGE_CROP_PRODUCTION_$ IDHM Ranking 2010 IDHM
## Min. : 0.0 Min. : 0 Min. : 1 Min. :0.4180
## 1st Qu.: 912.8 1st Qu.: 2332 1st Qu.:1390 1st Qu.:0.5990
## Median : 3477.5 Median : 13859 Median :2780 Median :0.6650
## Mean : 14163.8 Mean : 57323 Mean :2782 Mean :0.6592
## 3rd Qu.: 11180.8 3rd Qu.: 55612 3rd Qu.:4173 3rd Qu.:0.7200
## Max. :1205669.0 Max. :3274885 Max. :5565 Max. :0.8620
##
## IDHM_Renda IDHM_Longevidade IDHM_Educacao LONG
## Min. :0.4000 Min. :0.6720 Min. :0.2070 Min. :-72.92
## 1st Qu.:0.5720 1st Qu.:0.7690 1st Qu.:0.4900 1st Qu.:-50.87
## Median :0.6540 Median :0.8080 Median :0.5600 Median :-46.52
## Mean :0.6429 Mean :0.8016 Mean :0.5592 Mean :-46.23
## 3rd Qu.:0.7070 3rd Qu.:0.8360 3rd Qu.:0.6310 3rd Qu.:-41.42
## Max. :0.8910 Max. :0.8940 Max. :0.8250 Max. :-32.44
##
## LAT ALT PAY_TV FIXED_PHONES
## Min. :-33.688 Min. : 0.0 Min. : 1.0 Min. : 3
## 1st Qu.:-22.841 1st Qu.: 170.1 1st Qu.: 88.0 1st Qu.: 119
## Median :-18.094 Median : 406.6 Median : 247.5 Median : 328
## Mean :-16.453 Mean : 894.8 Mean : 3100.8 Mean : 6582
## 3rd Qu.: -8.498 3rd Qu.: 629.2 3rd Qu.: 816.2 3rd Qu.: 1152
## Max. : 4.585 Max. :874579.0 Max. :2047668.0 Max. :5543127
## NA's :1
## AREA REGIAO_TUR CATEGORIA_TUR
## Min. : 3.57 Corredores Das Águas: 59 A : 51
## 1st Qu.: 204.64 Vale Do Contestado : 45 B : 168
## Median : 415.87 Amazônia Atlântica : 40 C : 521
## Mean : 1516.24 Araguaia-Tocantins : 39 D :1890
## 3rd Qu.: 1025.17 Cariri : 37 E : 648
## Max. :159533.33 (Other) :3058 NA's:2278
## NA's :2278
## ESTIMATED_POP RURAL_URBAN GVA_AGROPEC GVA_INDUSTRY
## Min. : 786 Length:5556 Min. : 0 Min. : 1
## 1st Qu.: 5455 Class :character 1st Qu.: 4193 1st Qu.: 1728
## Median : 11596 Mode :character Median : 20436 Median : 7442
## Mean : 37494 Mean : 47265 Mean : 176301
## 3rd Qu.: 25315 3rd Qu.: 51243 3rd Qu.: 41015
## Max. :12176866 Max. :1402282 Max. :63306755
##
## GVA_SERVICES GVA_PUBLIC GVA_TOTAL TAXES
## Min. : 2 Min. : 7 Min. : 17 Min. : -14159
## 1st Qu.: 10110 1st Qu.: 17258 1st Qu.: 42447 1st Qu.: 1302
## Median : 31224 Median : 35837 Median : 119676 Median : 5108
## Mean : 490521 Mean : 123952 Mean : 834753 Mean : 119131
## 3rd Qu.: 115552 3rd Qu.: 89328 3rd Qu.: 314235 3rd Qu.: 22219
## Max. :464656988 Max. :41902893 Max. :569910503 Max. :117125387
##
## GDP POP_GDP GDP_CAPITA GVA_MAIN
## Min. : 15 Min. : 815 Min. : 3191 Length:5556
## 1st Qu.: 43691 1st Qu.: 5492 1st Qu.: 9064 Class :character
## Median : 125473 Median : 11586 Median : 15877 Mode :character
## Mean : 956593 Mean : 37060 Mean : 21131
## 3rd Qu.: 329899 3rd Qu.: 25115 3rd Qu.: 26159
## Max. :687035890 Max. :12038175 Max. :314638
##
## MUN_EXPENDIT COMP_TOT COMP_A COMP_B
## Min. :1.421e+06 Min. : 6.0 Min. : 0.00 Min. : 0.000
## 1st Qu.:1.575e+07 1st Qu.: 68.0 1st Qu.: 1.00 1st Qu.: 0.000
## Median :2.747e+07 Median : 163.0 Median : 2.00 Median : 0.000
## Mean :1.045e+08 Mean : 908.6 Mean : 18.29 Mean : 1.855
## 3rd Qu.:5.675e+07 3rd Qu.: 449.2 3rd Qu.: 8.00 3rd Qu.: 2.000
## Max. :4.577e+10 Max. :530446.0 Max. :1948.00 Max. :274.000
## NA's :1486
## COMP_C COMP_D COMP_E COMP_F
## Min. : 0.00 Min. : 0.0000 Min. : 0.000 Min. : 0.00
## 1st Qu.: 3.00 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 1.00
## Median : 11.00 Median : 0.0000 Median : 0.000 Median : 4.00
## Mean : 73.60 Mean : 0.4271 Mean : 2.031 Mean : 43.34
## 3rd Qu.: 39.25 3rd Qu.: 0.0000 3rd Qu.: 1.000 3rd Qu.: 15.00
## Max. :31566.00 Max. :332.0000 Max. :657.000 Max. :25222.00
##
## COMP_G COMP_H COMP_I COMP_J
## Min. : 1.0 Min. : 0.00 Min. : 0.00 Min. : 0.0
## 1st Qu.: 32.0 1st Qu.: 1.00 1st Qu.: 2.00 1st Qu.: 0.0
## Median : 75.0 Median : 7.00 Median : 7.00 Median : 1.0
## Mean : 348.6 Mean : 41.07 Mean : 55.99 Mean : 24.8
## 3rd Qu.: 200.0 3rd Qu.: 25.00 3rd Qu.: 24.00 3rd Qu.: 5.0
## Max. :150633.0 Max. :19515.00 Max. :29290.00 Max. :38720.0
##
## COMP_K COMP_L COMP_M COMP_N
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 1.00 1st Qu.: 1.00
## Median : 0.00 Median : 0.00 Median : 4.00 Median : 4.00
## Mean : 15.59 Mean : 15.18 Mean : 51.41 Mean : 83.89
## 3rd Qu.: 2.00 3rd Qu.: 3.00 3rd Qu.: 13.00 3rd Qu.: 14.00
## Max. :23738.00 Max. :14003.00 Max. :49181.00 Max. :76757.00
##
## COMP_O COMP_P COMP_Q COMP_R
## Min. : 1.000 Min. : 0.00 Min. : 0.00 Min. : 0.0
## 1st Qu.: 2.000 1st Qu.: 2.00 1st Qu.: 1.00 1st Qu.: 0.0
## Median : 2.000 Median : 6.00 Median : 3.00 Median : 2.0
## Mean : 3.272 Mean : 31.01 Mean : 34.23 Mean : 12.2
## 3rd Qu.: 3.000 3rd Qu.: 17.00 3rd Qu.: 12.00 3rd Qu.: 6.0
## Max. :204.000 Max. :16030.00 Max. :22248.00 Max. :6687.0
##
## COMP_S COMP_T COMP_U HOTELS
## Min. : 0.00 Min. :0 Min. : 0.0000 Min. : 1.000
## 1st Qu.: 5.00 1st Qu.:0 1st Qu.: 0.0000 1st Qu.: 1.000
## Median : 12.00 Median :0 Median : 0.0000 Median : 1.000
## Mean : 51.71 Mean :0 Mean : 0.0504 Mean : 3.134
## 3rd Qu.: 31.00 3rd Qu.:0 3rd Qu.: 0.0000 3rd Qu.: 3.000
## Max. :24832.00 Max. :0 Max. :123.0000 Max. :97.000
## NA's :4671
## BEDS Pr_Agencies Pu_Agencies Pr_Bank
## Min. : 2.0 Min. : 0.000 Min. : 0.00 Min. : 0.000
## 1st Qu.: 40.0 1st Qu.: 0.000 1st Qu.: 1.00 1st Qu.: 0.000
## Median : 82.0 Median : 1.000 Median : 2.00 Median : 1.000
## Mean : 257.9 Mean : 3.386 Mean : 2.83 Mean : 1.313
## 3rd Qu.: 200.0 3rd Qu.: 2.000 3rd Qu.: 2.00 3rd Qu.: 2.000
## Max. :13247.0 Max. :1693.000 Max. :626.00 Max. :83.000
## NA's :4671 NA's :2218 NA's :2218 NA's :2218
## Pu_Bank Pr_Assets Pu_Assets Cars
## Min. :0.00 Min. :0.000e+00 Min. :0.000e+00 Min. : 2
## 1st Qu.:1.00 1st Qu.:0.000e+00 1st Qu.:4.047e+07 1st Qu.: 602
## Median :2.00 Median :3.231e+07 Median :1.339e+08 Median : 1440
## Mean :1.58 Mean :9.198e+09 Mean :6.012e+09 Mean : 9869
## 3rd Qu.:2.00 3rd Qu.:1.150e+08 3rd Qu.:4.950e+08 3rd Qu.: 4091
## Max. :8.00 Max. :1.950e+13 Max. :8.020e+12 Max. :5740995
## NA's :2218 NA's :2218 NA's :2218
## Motorcycles Wheeled_tractor UBER MAC
## Min. : 4 Min. : 0.000 1 : 125 Min. : 1.000
## 1st Qu.: 591 1st Qu.: 0.000 NA's:5431 1st Qu.: 1.000
## Median : 1286 Median : 0.000 Median : 2.000
## Mean : 4883 Mean : 5.759 Mean : 4.277
## 3rd Qu.: 3299 3rd Qu.: 1.000 3rd Qu.: 3.000
## Max. :1134570 Max. :3236.000 Max. :130.000
## NA's :5390
## WAL-MART POST_OFFICES DENSITY
## Min. : 1.000 Min. : 1.000 Min. : 0.166
## 1st Qu.: 1.000 1st Qu.: 1.000 1st Qu.: 11.946
## Median : 1.000 Median : 1.000 Median : 25.309
## Mean : 2.059 Mean : 2.081 Mean : 117.416
## 3rd Qu.: 1.750 3rd Qu.: 2.000 3rd Qu.: 55.585
## Max. :26.000 Max. :225.000 Max. :13533.497
## NA's :5454 NA's :113
Although there are still variables with missing data, but I am not going to use them as the indepedent variables in the later analysis as the factors affecting the GDP per capita at the municipality level. Let’s proceed on to view the distribution of the variables.
ggplot(data=municipality, aes(x=`GDP_CAPITA`))+
geom_histogram(bins=20, color="black", fill="light blue")
The plot reveals that GDP_CAPITA is right skewed distribution. Therefore, there are many brazil municipality earns much lower GDP_CAPITA which indicates the vast difference from the richest to the poorest municipality.
municipality <- municipality %>%
mutate(`LOG_GDP_CAPITA`=log(GDP_CAPITA))
ggplot(data=municipality, aes(x=`LOG_GDP_CAPITA`))+
geom_histogram(bins=20, color="black", fill="light blue")
Now the GDP_CAPITA is less skewed after applying the log transformation.
Active <- ggplot(data=municipality, aes(x=`IBGE_15-59`))+
geom_histogram(bins=20, color="black", fill="light blue")
CropProd <- ggplot(data=municipality, aes(x=`IBGE_CROP_PRODUCTION_$`))+
geom_histogram(bins=20, color="black", fill="light blue")
IDHM_Renda <- ggplot(data=municipality, aes(x=`IDHM_Renda`))+
geom_histogram(bins=20, color="black", fill="light blue")
IDHM_Longevidade <- ggplot(data=municipality, aes(x=`IDHM_Longevidade`))+
geom_histogram(bins=20, color="black", fill="light blue")
IDHM_Educacao <- ggplot(data=municipality, aes(x=`IDHM_Educacao`))+
geom_histogram(bins=20, color="black", fill="light blue")
TAXES <- ggplot(data=municipality, aes(x=`TAXES`))+
geom_histogram(bins=20, color="black", fill="light blue")
MUN_EXPENDIT <- ggplot(data=municipality, aes(x=`MUN_EXPENDIT`))+
geom_histogram(bins=20, color="black", fill="light blue")
GVA_AGROPEC <- ggplot(data=municipality, aes(x=`GVA_AGROPEC`))+
geom_histogram(bins=20, color="black", fill="light blue")
GVA_INDUSTRY <- ggplot(data=municipality, aes(x=`GVA_INDUSTRY`))+
geom_histogram(bins=20, color="black", fill="light blue")
GVA_SERVICES <- ggplot(data=municipality, aes(x=`GVA_SERVICES`))+
geom_histogram(bins=20, color="black", fill="light blue")
GVA_PUBLIC <- ggplot(data=municipality, aes(x=`GVA_PUBLIC`))+
geom_histogram(bins=20, color="black", fill="light blue")
Pr_Assets <- ggplot(data=municipality, aes(x=`Pr_Assets`))+
geom_histogram(bins=20, color="black", fill="light blue")
Pu_Assets <- ggplot(data=municipality, aes(x=`Pu_Assets`))+
geom_histogram(bins=20, color="black", fill="light blue")
Cars <- ggplot(data=municipality, aes(x=`Cars`))+
geom_histogram(bins=20, color="black", fill="light blue")
Motorcycles <- ggplot(data=municipality, aes(x=`Motorcycles`))+
geom_histogram(bins=20, color="black", fill="light blue")
Wheeled_tractor <- ggplot(data=municipality, aes(x=`Wheeled_tractor`))+
geom_histogram(bins=20, color="black", fill="light blue")
ggarrange(Active, CropProd, IDHM_Renda, IDHM_Longevidade, IDHM_Educacao, TAXES, MUN_EXPENDIT, GVA_AGROPEC, GVA_INDUSTRY, GVA_SERVICES, GVA_PUBLIC, Pr_Assets, Pu_Assets, Cars, Motorcycles, Wheeled_tractor, ncol=4, nrow=4)
## Warning: Removed 1486 rows containing non-finite values (stat_bin).
## Warning: Removed 2218 rows containing non-finite values (stat_bin).
## Warning: Removed 2218 rows containing non-finite values (stat_bin).
The formula to calcuate GDP is Consumption + Investment + Government + Net Exports (imports - exports).
These are the variables selected to perform multiple linear regression are IBGE_15-59, IBGE_CROP_PRODUCTION_$, IDHM_Renda, IDHM_Longevidade, IDHM_Educacao, TAXES, MUN_EXPENDIT, GVA_AGROPEC, GVA_INDUSTRY, GVA_SERVICES, GVA_PUBLIC, Pr_Assets, Pu_Assets, Cars, Motorcycles, Wheeled_tractor and DENSITY.
filter_municipality <- municipality %>%
select(`CITY`, `STATE`, `LONG`, `LAT`, `POP_GDP`, `GDP`, `AREA`, `LOG_GDP_CAPITA`, `IBGE_15-59`, `IBGE_CROP_PRODUCTION_$`, `IDHM_Renda`, `IDHM_Longevidade`, `IDHM_Educacao`, `TAXES`, `GVA_AGROPEC`, `GVA_INDUSTRY`, `GVA_SERVICES`, `GVA_PUBLIC`, `Cars`, `Motorcycles`, `Wheeled_tractor`, `DENSITY`)
summary(filter_municipality)
## CITY STATE LONG LAT
## BOM JESUS : 5 MG : 852 Min. :-72.92 Min. :-33.688
## BONITO : 4 SP : 645 1st Qu.:-50.87 1st Qu.:-22.841
## PLANALTO : 4 RS : 496 Median :-46.52 Median :-18.094
## SANTA HELENA: 4 BA : 416 Mean :-46.23 Mean :-16.453
## SANTA INÊS : 4 PR : 398 3rd Qu.:-41.42 3rd Qu.: -8.498
## SANTA LUZIA : 4 SC : 293 Max. :-32.44 Max. : 4.585
## (Other) :5531 (Other):2456
## POP_GDP GDP AREA LOG_GDP_CAPITA
## Min. : 815 Min. : 15 Min. : 3.57 Min. : 8.068
## 1st Qu.: 5492 1st Qu.: 43691 1st Qu.: 204.64 1st Qu.: 9.112
## Median : 11586 Median : 125473 Median : 415.87 Median : 9.673
## Mean : 37060 Mean : 956593 Mean : 1516.24 Mean : 9.697
## 3rd Qu.: 25115 3rd Qu.: 329899 3rd Qu.: 1025.17 3rd Qu.:10.172
## Max. :12038175 Max. :687035890 Max. :159533.33 Max. :12.659
##
## IBGE_15-59 IBGE_CROP_PRODUCTION_$ IDHM_Renda IDHM_Longevidade
## Min. : 94 Min. : 0 Min. :0.4000 Min. :0.6720
## 1st Qu.: 1735 1st Qu.: 2332 1st Qu.:0.5720 1st Qu.:0.7690
## Median : 3845 Median : 13859 Median :0.6540 Median :0.8080
## Mean : 18232 Mean : 57323 Mean :0.6429 Mean :0.8016
## 3rd Qu.: 9629 3rd Qu.: 55612 3rd Qu.:0.7070 3rd Qu.:0.8360
## Max. :7058221 Max. :3274885 Max. :0.8910 Max. :0.8940
##
## IDHM_Educacao TAXES GVA_AGROPEC GVA_INDUSTRY
## Min. :0.2070 Min. : -14159 Min. : 0 Min. : 1
## 1st Qu.:0.4900 1st Qu.: 1302 1st Qu.: 4193 1st Qu.: 1728
## Median :0.5600 Median : 5108 Median : 20436 Median : 7442
## Mean :0.5592 Mean : 119131 Mean : 47265 Mean : 176301
## 3rd Qu.:0.6310 3rd Qu.: 22219 3rd Qu.: 51243 3rd Qu.: 41015
## Max. :0.8250 Max. :117125387 Max. :1402282 Max. :63306755
##
## GVA_SERVICES GVA_PUBLIC Cars Motorcycles
## Min. : 2 Min. : 7 Min. : 2 Min. : 4
## 1st Qu.: 10110 1st Qu.: 17258 1st Qu.: 602 1st Qu.: 591
## Median : 31224 Median : 35837 Median : 1440 Median : 1286
## Mean : 490521 Mean : 123952 Mean : 9869 Mean : 4883
## 3rd Qu.: 115552 3rd Qu.: 89328 3rd Qu.: 4091 3rd Qu.: 3299
## Max. :464656988 Max. :41902893 Max. :5740995 Max. :1134570
##
## Wheeled_tractor DENSITY
## Min. : 0.000 Min. : 0.166
## 1st Qu.: 0.000 1st Qu.: 11.946
## Median : 0.000 Median : 25.309
## Mean : 5.759 Mean : 117.416
## 3rd Qu.: 1.000 3rd Qu.: 55.585
## Max. :3236.000 Max. :13533.497
##
filter_municipality <- filter_municipality %>%
mutate(`IBGE_15-59_p`=`IBGE_15-59`/`POP_GDP`) %>%
mutate(`Tax_to_GDP`=`TAXES`/`GDP`) %>%
mutate(`GVA_AGROPEC_p`=`GVA_AGROPEC`/`AREA`) %>%
mutate(`GVA_INDUSTRY_p`=`GVA_INDUSTRY`/`AREA`) %>%
mutate(`GVA_SERVICES_p`=`GVA_SERVICES`/`AREA`) %>%
mutate(`GVA_PUBLIC_p`=`GVA_PUBLIC`/`AREA`) %>%
mutate(`Cars_p`=`Cars`/`POP_GDP`) %>%
mutate(`Motorcycles_p`=`Motorcycles`/`POP_GDP`) %>%
mutate(`Wheeled_tractor_p`=`Wheeled_tractor`/`POP_GDP`) %>%
select(`CITY`, `STATE`, `LONG`, `LAT`, `POP_GDP`, `GDP`, `AREA`, `LOG_GDP_CAPITA`, `IBGE_15-59_p`, `IBGE_CROP_PRODUCTION_$`, `IDHM_Renda`, `IDHM_Longevidade`, `IDHM_Educacao`, `Tax_to_GDP`, `GVA_AGROPEC_p`, `GVA_INDUSTRY_p`, `GVA_SERVICES_p`, `GVA_PUBLIC_p`, `Cars_p`, `Motorcycles_p`, `Wheeled_tractor_p`, `DENSITY`)
summary(filter_municipality)
## CITY STATE LONG LAT
## BOM JESUS : 5 MG : 852 Min. :-72.92 Min. :-33.688
## BONITO : 4 SP : 645 1st Qu.:-50.87 1st Qu.:-22.841
## PLANALTO : 4 RS : 496 Median :-46.52 Median :-18.094
## SANTA HELENA: 4 BA : 416 Mean :-46.23 Mean :-16.453
## SANTA INÊS : 4 PR : 398 3rd Qu.:-41.42 3rd Qu.: -8.498
## SANTA LUZIA : 4 SC : 293 Max. :-32.44 Max. : 4.585
## (Other) :5531 (Other):2456
## POP_GDP GDP AREA LOG_GDP_CAPITA
## Min. : 815 Min. : 15 Min. : 3.57 Min. : 8.068
## 1st Qu.: 5492 1st Qu.: 43691 1st Qu.: 204.64 1st Qu.: 9.112
## Median : 11586 Median : 125473 Median : 415.87 Median : 9.673
## Mean : 37060 Mean : 956593 Mean : 1516.24 Mean : 9.697
## 3rd Qu.: 25115 3rd Qu.: 329899 3rd Qu.: 1025.17 3rd Qu.:10.172
## Max. :12038175 Max. :687035890 Max. :159533.33 Max. :12.659
##
## IBGE_15-59_p IBGE_CROP_PRODUCTION_$ IDHM_Renda IDHM_Longevidade
## Min. :0.0214 Min. : 0 Min. :0.4000 Min. :0.6720
## 1st Qu.:0.2687 1st Qu.: 2332 1st Qu.:0.5720 1st Qu.:0.7690
## Median :0.3760 Median : 13859 Median :0.6540 Median :0.8080
## Mean :0.3744 Mean : 57323 Mean :0.6429 Mean :0.8016
## 3rd Qu.:0.4872 3rd Qu.: 55612 3rd Qu.:0.7070 3rd Qu.:0.8360
## Max. :0.7868 Max. :3274885 Max. :0.8910 Max. :0.8940
##
## IDHM_Educacao Tax_to_GDP GVA_AGROPEC_p GVA_INDUSTRY_p
## Min. :0.2070 Min. : -2.0920 Min. : 0.000 Min. : 0.00
## 1st Qu.:0.4900 1st Qu.: 0.0297 1st Qu.: 8.549 1st Qu.: 2.79
## Median :0.5600 Median : 0.0527 Median : 37.543 Median : 17.11
## Mean :0.5592 Mean : 8.5615 Mean : 87.860 Mean : 557.29
## 3rd Qu.:0.6310 3rd Qu.: 0.0973 3rd Qu.: 127.626 3rd Qu.: 88.98
## Max. :0.8250 Max. :324.6684 Max. :4817.285 Max. :115429.11
##
## GVA_SERVICES_p GVA_PUBLIC_p Cars_p Motorcycles_p
## Min. : 0.0 Min. : 0.00 Min. :0.0000529 Min. :0.0001059
## 1st Qu.: 13.6 1st Qu.: 23.58 1st Qu.:0.0627150 1st Qu.:0.0872258
## Median : 75.9 Median : 83.88 Median :0.1728957 Median :0.1234463
## Mean : 1497.2 Mean : 422.78 Mean :0.1918069 Mean :0.1340709
## 3rd Qu.: 256.2 3rd Qu.: 190.16 3rd Qu.:0.3116654 3rd Qu.:0.1692269
## Max. :817728.0 Max. :76334.53 Max. :0.6440065 Max. :0.5500341
##
## Wheeled_tractor_p DENSITY
## Min. :0.000e+00 Min. : 0.166
## 1st Qu.:0.000e+00 1st Qu.: 11.946
## Median :0.000e+00 Median : 25.309
## Mean :1.326e-04 Mean : 117.416
## 3rd Qu.:7.844e-05 3rd Qu.: 55.585
## Max. :1.022e-02 Max. :13533.497
##
From the selected variables from municipality, the following variables IBGE_CROP_PRODUCTION_$, Tax_to_GDP, various GVA variables and Density value range differences are still quite large even after transformation. Therefore, I will apply the min-max standarisation on these variables.
filter_municipality.norm <- normalize(filter_municipality) %>%
select(`CITY`, `STATE`, `IBGE_CROP_PRODUCTION_$`, `Tax_to_GDP`, `GVA_AGROPEC_p`, `GVA_INDUSTRY_p`, `GVA_SERVICES_p`, `GVA_PUBLIC_p`, `DENSITY`)
summary(filter_municipality.norm)
## CITY STATE IBGE_CROP_PRODUCTION_$ Tax_to_GDP
## BOM JESUS : 5 MG : 852 Min. :0.0000000 Min. :0.000000
## BONITO : 4 SP : 645 1st Qu.:0.0007119 1st Qu.:0.006493
## PLANALTO : 4 RS : 496 Median :0.0042319 Median :0.006563
## SANTA HELENA: 4 BA : 416 Mean :0.0175038 Mean :0.032603
## SANTA INÊS : 4 PR : 398 3rd Qu.:0.0169813 3rd Qu.:0.006700
## SANTA LUZIA : 4 SC : 293 Max. :1.0000000 Max. :1.000000
## (Other) :5531 (Other):2456
## GVA_AGROPEC_p GVA_INDUSTRY_p GVA_SERVICES_p GVA_PUBLIC_p
## Min. :0.000000 Min. :0.0000000 Min. :0.0000000 Min. :0.0000000
## 1st Qu.:0.001775 1st Qu.:0.0000241 1st Qu.:0.0000166 1st Qu.:0.0003089
## Median :0.007793 Median :0.0001483 Median :0.0000928 Median :0.0010988
## Mean :0.018239 Mean :0.0048279 Mean :0.0018310 Mean :0.0055385
## 3rd Qu.:0.026493 3rd Qu.:0.0007708 3rd Qu.:0.0003133 3rd Qu.:0.0024912
## Max. :1.000000 Max. :1.0000000 Max. :1.0000000 Max. :1.0000000
##
## DENSITY
## Min. :0.0000000
## 1st Qu.:0.0008705
## Median :0.0018578
## Mean :0.0086638
## 3rd Qu.:0.0040950
## Max. :1.0000000
##
Min-max standardised variables from 0 to 1.
norm_municipality <- left_join(filter_municipality, filter_municipality.norm, by = c("CITY" = "CITY", "STATE" = "STATE"))
norm_municipality <- norm_municipality %>%
select(`CITY`, `STATE`, `LONG`, `LAT`, `LOG_GDP_CAPITA`, `IBGE_15-59_p`, `IBGE_CROP_PRODUCTION_$.y`, `IDHM_Renda`, `IDHM_Longevidade`, `IDHM_Educacao`, `Tax_to_GDP.y`, `GVA_AGROPEC_p.y`, `GVA_INDUSTRY_p.y`, `GVA_SERVICES_p.y`, `GVA_PUBLIC_p.y`, `Cars_p`, `Motorcycles_p`, `Wheeled_tractor_p`, `DENSITY.y`) %>%
rename(`ACTIVE`=`IBGE_15-59_p`) %>%
rename(`CROP_PROD`=`IBGE_CROP_PRODUCTION_$.y`) %>%
rename(`Tax_to_GDP`=`Tax_to_GDP.y`) %>%
rename(`GVA_AGROPEC_p`=`GVA_AGROPEC_p.y`) %>%
rename(`GVA_INDUSTRY_p`=`GVA_INDUSTRY_p.y`) %>%
rename(`GVA_SERVICES_p`=`GVA_SERVICES_p.y`) %>%
rename(`GVA_PUBLIC_p`=`GVA_PUBLIC_p.y`) %>%
rename(`DENSITY`=`DENSITY.y`)
summary(norm_municipality)
## CITY STATE LONG LAT
## BOM JESUS : 5 MG : 852 Min. :-72.92 Min. :-33.688
## BONITO : 4 SP : 645 1st Qu.:-50.87 1st Qu.:-22.841
## PLANALTO : 4 RS : 496 Median :-46.52 Median :-18.094
## SANTA HELENA: 4 BA : 416 Mean :-46.23 Mean :-16.453
## SANTA INÊS : 4 PR : 398 3rd Qu.:-41.42 3rd Qu.: -8.498
## SANTA LUZIA : 4 SC : 293 Max. :-32.44 Max. : 4.585
## (Other) :5531 (Other):2456
## LOG_GDP_CAPITA ACTIVE CROP_PROD IDHM_Renda
## Min. : 8.068 Min. :0.0214 Min. :0.0000000 Min. :0.4000
## 1st Qu.: 9.112 1st Qu.:0.2687 1st Qu.:0.0007119 1st Qu.:0.5720
## Median : 9.673 Median :0.3760 Median :0.0042319 Median :0.6540
## Mean : 9.697 Mean :0.3744 Mean :0.0175038 Mean :0.6429
## 3rd Qu.:10.172 3rd Qu.:0.4872 3rd Qu.:0.0169813 3rd Qu.:0.7070
## Max. :12.659 Max. :0.7868 Max. :1.0000000 Max. :0.8910
##
## IDHM_Longevidade IDHM_Educacao Tax_to_GDP GVA_AGROPEC_p
## Min. :0.6720 Min. :0.2070 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.7690 1st Qu.:0.4900 1st Qu.:0.006493 1st Qu.:0.001775
## Median :0.8080 Median :0.5600 Median :0.006563 Median :0.007793
## Mean :0.8016 Mean :0.5592 Mean :0.032603 Mean :0.018239
## 3rd Qu.:0.8360 3rd Qu.:0.6310 3rd Qu.:0.006700 3rd Qu.:0.026493
## Max. :0.8940 Max. :0.8250 Max. :1.000000 Max. :1.000000
##
## GVA_INDUSTRY_p GVA_SERVICES_p GVA_PUBLIC_p
## Min. :0.0000000 Min. :0.0000000 Min. :0.0000000
## 1st Qu.:0.0000241 1st Qu.:0.0000166 1st Qu.:0.0003089
## Median :0.0001483 Median :0.0000928 Median :0.0010988
## Mean :0.0048279 Mean :0.0018310 Mean :0.0055385
## 3rd Qu.:0.0007708 3rd Qu.:0.0003133 3rd Qu.:0.0024912
## Max. :1.0000000 Max. :1.0000000 Max. :1.0000000
##
## Cars_p Motorcycles_p Wheeled_tractor_p
## Min. :0.0000529 Min. :0.0001059 Min. :0.000e+00
## 1st Qu.:0.0627150 1st Qu.:0.0872258 1st Qu.:0.000e+00
## Median :0.1728957 Median :0.1234463 Median :0.000e+00
## Mean :0.1918069 Mean :0.1340709 Mean :1.326e-04
## 3rd Qu.:0.3116654 3rd Qu.:0.1692269 3rd Qu.:7.844e-05
## Max. :0.6440065 Max. :0.5500341 Max. :1.022e-02
##
## DENSITY
## Min. :0.0000000
## 1st Qu.:0.0008705
## Median :0.0018578
## Mean :0.0086638
## 3rd Qu.:0.0040950
## Max. :1.0000000
##
After the transformation and standardising, the variables value range differences are now not as a big difference before the standardisation. Let’s proceed with the analysis.
all(st_is_valid(brazil_sp))
## [1] FALSE
brazil_sp <- st_make_valid(brazil_sp)
all(st_is_valid(brazil_sp))
## [1] TRUE
brazil <- right_join(brazil_sp, municipality, by = c("name_muni" = "CITY", "abbrev_state" = "STATE"))
## Warning: Column `name_muni`/`CITY` joining factors with different levels,
## coercing to character vector
## Warning: Column `abbrev_state`/`STATE` joining factors with different levels,
## coercing to character vector
qtm(brazil, "GDP_CAPITA", borders=NULL, title="Distribution of Brazil Municipality GDP per capita, 2016", asp=1)
It seems that municipality at the central to southern side have higher GDP per capita compared to the municipality that are in the north-eastern and north-western side.
municipality.sf <- st_as_sf(municipality, coords=c("LONG", "LAT"), crs=4674) %>%
st_transform(crs=5641)
It’s important to detect multicollinearity in the independent variables to ensure the quality of the model. Correlation matrix is commonly used to visualise the relationships between the independent variables.
corrplot(cor(norm_municipality[, 5:19]), diag=FALSE, order="FPC", tl.pos="td", tl.cex=0.5, number.cex=0.5, method="number", type="upper")
The scatterplot matrix shows that Density is highly correlated to the GVA_Public, and IDHM_Renda with Cars. Therefore, I will choose Cars and GVA_Public independent variable to be excluded.
municipality.mlr <- lm(formula = `LOG_GDP_CAPITA` ~ `ACTIVE` + `CROP_PROD` + `IDHM_Renda` + `IDHM_Longevidade` + `IDHM_Educacao` + `Tax_to_GDP` + `GVA_AGROPEC_p` + `GVA_INDUSTRY_p` + `GVA_SERVICES_p` + `Motorcycles_p` + `Wheeled_tractor_p` + `DENSITY`, data=norm_municipality)
summary(municipality.mlr)
##
## Call:
## lm(formula = LOG_GDP_CAPITA ~ ACTIVE + CROP_PROD + IDHM_Renda +
## IDHM_Longevidade + IDHM_Educacao + Tax_to_GDP + GVA_AGROPEC_p +
## GVA_INDUSTRY_p + GVA_SERVICES_p + Motorcycles_p + Wheeled_tractor_p +
## DENSITY, data = norm_municipality)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.06059 -0.23145 -0.05797 0.16014 2.82933
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.18124 0.11544 44.881 < 2e-16 ***
## ACTIVE -0.23805 0.05079 -4.687 2.85e-06 ***
## CROP_PROD 2.60376 0.11839 21.992 < 2e-16 ***
## IDHM_Renda 5.53641 0.15356 36.053 < 2e-16 ***
## IDHM_Longevidade 1.06778 0.21111 5.058 4.38e-07 ***
## IDHM_Educacao 0.29022 0.10368 2.799 0.00514 **
## Tax_to_GDP 0.27128 0.06560 4.135 3.60e-05 ***
## GVA_AGROPEC_p 2.44277 0.17926 13.627 < 2e-16 ***
## GVA_INDUSTRY_p 2.96123 0.19973 14.826 < 2e-16 ***
## GVA_SERVICES_p 2.69281 0.36978 7.282 3.74e-13 ***
## Motorcycles_p -0.52673 0.08078 -6.521 7.61e-11 ***
## Wheeled_tractor_p -8.91532 11.75163 -0.759 0.44810
## DENSITY -2.17155 0.17955 -12.095 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3839 on 5543 degrees of freedom
## Multiple R-squared: 0.6851, Adjusted R-squared: 0.6844
## F-statistic: 1005 on 12 and 5543 DF, p-value: < 2.2e-16
From the MLR report, there are independent variables are not statistically significant such as Wheeled_tractor_p. So will need to revise the model with only statistically significant variables.
municipality.mlr1 <- lm(formula = `LOG_GDP_CAPITA` ~ `ACTIVE` + `CROP_PROD` + `IDHM_Renda` + `IDHM_Educacao` + `Tax_to_GDP` + `GVA_AGROPEC_p` + `GVA_INDUSTRY_p` + `GVA_SERVICES_p` + `Motorcycles_p` + `DENSITY`, data=norm_municipality)
ols_regress(municipality.mlr1)
## Model Summary
## -------------------------------------------------------------
## R 0.827 RMSE 0.385
## R-Squared 0.684 Coef. Var 3.967
## Adj. R-Squared 0.683 MSE 0.148
## Pred R-Squared 0.679 MAE 0.273
## -------------------------------------------------------------
## RMSE: Root Mean Square Error
## MSE: Mean Square Error
## MAE: Mean Absolute Error
##
## ANOVA
## ------------------------------------------------------------------------
## Sum of
## Squares DF Mean Square F Sig.
## ------------------------------------------------------------------------
## Regression 1772.804 10 177.280 1197.982 0.0000
## Residual 820.563 5545 0.148
## Total 2593.367 5555
## ------------------------------------------------------------------------
##
## Parameter Estimates
## --------------------------------------------------------------------------------------------
## model Beta Std. Error Std. Beta t Sig lower upper
## --------------------------------------------------------------------------------------------
## (Intercept) 5.723 0.045 126.716 0.000 5.634 5.812
## ACTIVE -0.254 0.050 -0.050 -5.088 0.000 -0.352 -0.156
## CROP_PROD 2.604 0.118 0.172 22.011 0.000 2.372 2.836
## IDHM_Renda 5.991 0.117 0.707 51.024 0.000 5.761 6.221
## IDHM_Educacao 0.346 0.103 0.047 3.347 0.001 0.143 0.548
## Tax_to_GDP 0.264 0.066 0.031 4.020 0.000 0.135 0.393
## GVA_AGROPEC_p 2.403 0.179 0.107 13.395 0.000 2.051 2.755
## GVA_INDUSTRY_p 2.948 0.200 0.143 14.734 0.000 2.556 3.340
## GVA_SERVICES_p 2.637 0.370 0.073 7.119 0.000 1.911 3.363
## Motorcycles_p -0.549 0.081 -0.054 -6.803 0.000 -0.708 -0.391
## DENSITY -2.178 0.180 -0.144 -12.104 0.000 -2.530 -1.825
## --------------------------------------------------------------------------------------------
ols_vif_tol(municipality.mlr1)
## Variables Tolerance VIF
## 1 ACTIVE 0.5959226 1.678070
## 2 CROP_PROD 0.9302282 1.075005
## 3 IDHM_Renda 0.2968490 3.368717
## 4 IDHM_Educacao 0.2866369 3.488734
## 5 Tax_to_GDP 0.9813735 1.018980
## 6 GVA_AGROPEC_p 0.9014675 1.109302
## 7 GVA_INDUSTRY_p 0.6024097 1.660000
## 8 GVA_SERVICES_p 0.5478887 1.825188
## 9 Motorcycles_p 0.9172266 1.090243
## 10 DENSITY 0.4006574 2.495898
All VIF values are less than 10. Therefore, I can conclude that there are no sign of multicollinearity among the independent variables.
The relationship between dependent and independent variables
ols_plot_resid_fit(municipality.mlr1)
Most of the data points are scattered around 0. Therefore, I can safely conclude that the relationships between the dependent variable and independent variables are linear.
ols_plot_resid_hist(municipality.mlr1)
The residual of the multiple linear regression resemble normal distribution.
The p-values from the 4 tests are way smaller than alpha value of 0.05. Therefore, I will reject the null hypothesis that the residual is not resemble normal distribution.
municipality.sf <- st_as_sf(norm_municipality, coords=c("LONG", "LAT"), crs=4674) %>%
st_transform(crs=5641)
brazil <- right_join(brazil_sp, norm_municipality, by = c("name_muni" = "CITY", "abbrev_state" = "STATE"))
## Warning: Column `name_muni`/`CITY` joining factors with different levels,
## coercing to character vector
## Warning: Column `abbrev_state`/`STATE` joining factors with different levels,
## coercing to character vector
municipality.res.sf <- cbind(municipality.sf, municipality.mlr1$residuals) %>%
rename(`MLR_RES`=`municipality.mlr1.residuals`)
plot_municipality.res.sf <- cbind(brazil, municipality.mlr1$residuals) %>%
rename(`MLR_RES`=`municipality.mlr1.residuals`)
any(is.na(st_dimension(municipality.res.sf)))
## [1] FALSE
any(is.na(st_dimension(plot_municipality.res.sf)))
## [1] FALSE
municipality.sp <- as_Spatial(municipality.res.sf)
municipality.sp
## class : SpatialPointsDataFrame
## features : 5556
## extent : 1671725, 6175358, 6039171, 10507274 (xmin, xmax, ymin, ymax)
## crs : +proj=merc +lat_ts=-2 +lon_0=-43 +x_0=5000000 +y_0=10000000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs
## variables : 18
## names : CITY, STATE, LOG_GDP_CAPITA, ACTIVE, CROP_PROD, IDHM_Renda, IDHM_Longevidade, IDHM_Educacao, Tax_to_GDP, GVA_AGROPEC_p, GVA_INDUSTRY_p, GVA_SERVICES_p, GVA_PUBLIC_p, Cars_p, Motorcycles_p, ...
## min values : ABADIA DE GOIÁS, AC, 8.06795486320249, 0.0214045886961388, 0, 0.4, 0.672, 0.207, 0, 0, 0, 0, 0, 5.2940865053735e-05, 0.00010588173010747, ...
## max values : ZORTÉA, TO, 12.6591770653549, 0.786835582484164, 1, 0.891, 0.894, 0.825, 1, 1, 1, 1, 1, 0.644006508768758, 0.550034078939216, ...
qtm(plot_municipality.res.sf, "MLR_RES", borders=NULL, title="Distribution of the MLR residual of the Brazil Municipality GDP per capita, 2016", asp=1)
## Variable(s) "MLR_RES" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
These map shows that there is a sign of spatial autocorrelation. In order to confirm this analysis, Moran’s I test will be performed.
coords <- coordinates(municipality.sp)
k1 <- knn2nb(knearneigh(coords))
k1dists <- unlist(nbdists(k1, coords, longlat=FALSE))
summary(k1dists)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 645.7 9667.7 13936.5 17772.1 20606.1 363945.4
The summary report shows that the largest first nearest neighbour distance is 363945.4 metres, so using this as the upper threshold gives certainty that all units will have at least one neighbour.
nb <- dnearneigh(coords, 0, 363945.4, longlat=FALSE)
nb_lw <- nb2listw(nb, style='B')
summary(nb_lw)
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 5556
## Number of nonzero links: 2685926
## Percentage nonzero weights: 8.701008
## Average number of links: 483.428
## Link number distribution:
##
## 1 7 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
## 1 1 2 5 4 7 5 18 6 4 11 1 1 4 4 5 4 4 2 4
## 27 28 29 30 31 32 33 34 35 36 38 39 40 41 43 44 45 46 47 48
## 7 7 3 9 3 1 8 7 8 5 10 2 2 2 4 1 4 7 9 3
## 49 50 51 52 53 54 55 56 57 58 59 60 61 63 64 65 66 67 68 69
## 4 8 10 11 12 2 7 7 3 7 5 5 1 2 3 6 4 3 2 4
## 71 72 73 74 75 76 77 78 79 80 81 83 84 86 87 88 90 92 94 95
## 6 2 7 4 3 1 2 9 2 3 6 1 2 1 2 1 1 1 1 1
## 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
## 2 2 6 6 1 6 2 1 4 1 4 3 3 4 4 2 3 1 4 2
## 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
## 3 1 3 2 3 3 4 4 5 4 1 6 4 5 4 1 5 6 8 7
## 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 156
## 7 5 6 7 8 12 8 4 6 6 8 3 4 4 3 5 7 2 6 2
## 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 176 177
## 5 5 4 4 6 5 4 5 5 1 5 8 5 4 1 5 4 1 6 3
## 178 179 180 181 182 183 184 185 186 187 189 190 191 192 193 194 195 196 197 198
## 7 7 3 1 3 2 2 1 3 2 2 1 5 5 3 4 5 2 2 10
## 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
## 4 3 2 2 6 3 1 2 6 6 6 5 2 5 3 8 7 5 4 5
## 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
## 5 6 7 8 3 8 5 4 3 4 9 6 5 9 2 6 5 3 5 7
## 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258
## 6 7 7 9 6 4 6 8 11 5 6 3 3 8 4 2 14 5 9 8
## 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
## 12 8 8 10 7 3 4 10 5 13 3 8 3 12 4 5 9 3 4 1
## 279 280 281 282 283 284 285 286 287 288 289 290 291 292 294 295 296 297 298 299
## 4 3 7 2 3 6 4 3 3 6 5 4 9 5 2 8 4 8 4 8
## 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
## 6 11 4 4 4 4 4 8 6 9 3 10 3 2 5 5 11 9 14 5
## 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339
## 8 7 6 4 11 7 4 7 4 8 6 3 11 4 2 9 6 8 6 12
## 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
## 9 7 5 8 7 10 7 6 10 11 3 10 8 7 2 5 8 6 11 10
## 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
## 6 6 13 8 13 16 7 13 7 12 10 12 9 11 11 6 9 1 6 5
## 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399
## 7 12 8 5 6 5 4 7 7 8 4 8 8 4 7 4 8 5 6 3
## 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419
## 5 10 5 12 11 11 3 6 4 5 9 3 5 8 4 6 4 7 8 4
## 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
## 3 4 7 4 2 8 2 6 6 1 8 4 4 5 4 3 6 4 2 4
## 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
## 3 5 2 6 7 4 4 5 7 5 4 6 6 5 7 3 7 4 10 5
## 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479
## 8 7 5 6 6 6 3 4 7 5 10 2 8 8 9 2 10 8 6 5
## 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
## 2 1 3 11 7 4 5 3 6 8 5 4 6 4 5 9 6 4 6 5
## 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
## 6 6 8 7 9 6 8 9 3 6 5 8 7 8 7 5 10 5 3 5
## 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539
## 3 12 5 4 4 7 9 5 5 6 10 9 11 6 8 4 7 7 13 3
## 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559
## 4 9 6 4 7 6 12 12 8 10 7 12 6 10 15 13 12 10 5 8
## 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579
## 4 11 13 12 8 13 12 8 9 7 10 13 6 6 7 7 11 9 4 7
## 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599
## 8 9 10 9 13 15 9 7 11 8 9 9 13 13 6 8 5 6 9 14
## 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619
## 9 7 11 7 10 15 11 11 11 9 13 10 13 13 10 12 6 13 14 8
## 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639
## 17 14 6 12 10 16 4 12 8 13 10 12 11 14 10 9 14 7 9 8
## 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659
## 18 9 9 7 8 14 14 15 6 9 16 11 10 7 10 10 9 13 15 6
## 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679
## 8 7 14 9 15 8 7 13 12 8 9 12 15 4 14 4 15 12 19 12
## 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699
## 11 11 11 5 9 10 13 13 6 14 12 7 5 8 12 7 5 10 7 19
## 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719
## 15 9 2 12 7 7 6 11 8 7 10 13 5 9 9 18 7 15 4 11
## 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739
## 13 6 13 8 6 10 8 13 10 9 9 6 7 6 13 11 9 9 5 11
## 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759
## 11 9 6 5 10 14 7 9 6 3 7 8 11 8 12 9 7 8 10 3
## 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779
## 6 11 11 14 6 7 6 12 7 5 9 11 9 5 11 2 10 8 8 8
## 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799
## 16 8 8 11 9 8 1 7 8 11 8 6 8 8 8 4 10 9 5 7
## 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819
## 15 10 8 7 5 5 4 8 8 6 7 6 6 4 14 6 5 6 6 8
## 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839
## 6 5 4 4 3 4 5 1 9 5 4 6 7 5 2 4 3 1 2 4
## 840 841 843 844 846 847 848 853
## 1 1 3 1 1 1 3 1
## 1 least connected region:
## 1769 with 1 link
## 1 most connected region:
## 727 with 853 links
##
## Weights style: B
## Weights constants summary:
## n nn S0 S1 S2
## B 5556 30869136 2685926 5371852 6323372784
Null Hypothesis: The distribution of the residual for the regression explainatory model are randomly distributed.
Alternative Hypothesis: The distribution of the residual for the regression explainatory model are not randomly distributed.
The 95% confident interval will be used.
lm.morantest(municipality.mlr1, nb_lw)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = LOG_GDP_CAPITA ~ ACTIVE + CROP_PROD + IDHM_Renda +
## IDHM_Educacao + Tax_to_GDP + GVA_AGROPEC_p + GVA_INDUSTRY_p +
## GVA_SERVICES_p + Motorcycles_p + DENSITY, data = norm_municipality)
## weights: nb_lw
##
## Moran I statistic standard deviate = 57.895, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 4.088429e-02 -4.296324e-04 5.092249e-07
Since p-value less than alpha value of 0.05, I will reject the null hypothesis that the residual for the regression explainatory model are randomly distributed.
Since the Observed Global Moran I statistic is more than 0, I can infer than the residual for the regression explainatory model resembles cluster distribution.
Using both the fixed and adaptive bandwidth schemes to determine which scheme is much suitable for the explanatory model.
There are two possible approaches can be used to determine the stopping rule:
We can distinguish the continuous kernel that weights all the observations of the kernel with compact support for which the weight of observations is zero beyond a certain distance.
Therefore, I decided to use Gaussian, Exponential, Tricube, and Boxcar kernel. Leaving bisquare kernel out as it’s the explantory model isn’t about time.
Adaptive argument is set to FALSE indicates that we are interested to compute the fixed bandwidth.
CV score approach was defined as the stopping rule. #### Gaussian kernel Kernel is selected as gaussian to determine the optimal fixed bandwidth to use in explanatory model.
bw.fixed.CV_G <- bw.gwr(formula = `LOG_GDP_CAPITA` ~ `ACTIVE` + `CROP_PROD` + `IDHM_Renda` + `IDHM_Educacao` +
`Tax_to_GDP` + `GVA_AGROPEC_p` + `GVA_INDUSTRY_p` + `GVA_SERVICES_p` + `Motorcycles_p` + `DENSITY`, data=municipality.sp, approach="CV", kernel="gaussian", adaptive=FALSE, longlat=FALSE)
## Take a cup of tea and have a break, it will take a few minutes.
## -----A kind suggestion from GWmodel development group
## Fixed bandwidth: 3921308 CV score: 828.5185
## Fixed bandwidth: 2423986 CV score: 823.9807
## Fixed bandwidth: 1498590 CV score: 813.2785
## Fixed bandwidth: 926664.5 CV score: 791.7091
## Fixed bandwidth: 573194.8 CV score: 765.0163
## Fixed bandwidth: 354738.5 CV score: 758.1873
## Fixed bandwidth: 219725.1 CV score: 838.9721
## Fixed bandwidth: 438181.4 CV score: 750.6512
## Fixed bandwidth: 489751.9 CV score: 756.1913
## Fixed bandwidth: 406309 CV score: 748.7573
## Fixed bandwidth: 386610.8 CV score: 749.3409
## Fixed bandwidth: 418483.2 CV score: 749.208
## Fixed bandwidth: 398785 CV score: 748.74
## Fixed bandwidth: 394134.9 CV score: 748.8633
## Fixed bandwidth: 401658.9 CV score: 748.7179
## Fixed bandwidth: 403435.1 CV score: 748.7226
## Fixed bandwidth: 400561.2 CV score: 748.7218
## Fixed bandwidth: 402337.4 CV score: 748.7181
## Fixed bandwidth: 401239.6 CV score: 748.7188
## Fixed bandwidth: 401918.1 CV score: 748.7177
## Fixed bandwidth: 402078.2 CV score: 748.7178
The recommended bandwidth is 401918.1 metres. SIRGAS 2000, EPSG 5641 projects in metres.
Kernel is selected as exponential to determine the optimal fixed bandwidth to use in explanatory model.
bw.fixed.CV_E <- bw.gwr(formula = `LOG_GDP_CAPITA` ~ `ACTIVE` + `CROP_PROD` + `IDHM_Renda` + `IDHM_Educacao` +
`Tax_to_GDP` + `GVA_AGROPEC_p` + `GVA_INDUSTRY_p` + `GVA_SERVICES_p` + `Motorcycles_p` + `DENSITY`, data=municipality.sp, approach="CV", kernel="exponential", adaptive=FALSE, longlat=FALSE)
## Take a cup of tea and have a break, it will take a few minutes.
## -----A kind suggestion from GWmodel development group
## Fixed bandwidth: 3921308 CV score: 821.6475
## Fixed bandwidth: 2423986 CV score: 815.9319
## Fixed bandwidth: 1498590 CV score: 807.0082
## Fixed bandwidth: 926664.5 CV score: 793.0969
## Fixed bandwidth: 573194.8 CV score: 773.0634
## Fixed bandwidth: 354738.5 CV score: 748.9576
## Fixed bandwidth: 219725.1 CV score: 727.9797
## Fixed bandwidth: 136282.2 CV score: 758.5732
## Fixed bandwidth: 271295.6 CV score: 735.9561
## Fixed bandwidth: 187852.7 CV score: 723.1758
## Fixed bandwidth: 168154.6 CV score: 722.6557
## Fixed bandwidth: 155980.4 CV score: 727.2693
## Fixed bandwidth: 175678.6 CV score: 722.2196
## Fixed bandwidth: 180328.7 CV score: 722.4227
## Fixed bandwidth: 172804.7 CV score: 722.2501
## Fixed bandwidth: 177454.8 CV score: 722.2652
## Fixed bandwidth: 174580.9 CV score: 722.2147
## Fixed bandwidth: 173902.4 CV score: 722.2216
## Fixed bandwidth: 175000.2 CV score: 722.2143
## Fixed bandwidth: 175259.3 CV score: 722.2155
## Fixed bandwidth: 174840 CV score: 722.2141
## Fixed bandwidth: 174741 CV score: 722.2142
The recommended bandwidth is 174840 metres. SIRGAS 2000, EPSG 5641 projects in metres.
Kernel is selected as tricube to determine the optimal fixed bandwidth to use in explanatory model.
bw.fixed.CV_T <- bw.gwr(formula = `LOG_GDP_CAPITA` ~ `ACTIVE` + `CROP_PROD` + `IDHM_Renda` + `IDHM_Educacao` +
`Tax_to_GDP` + `GVA_AGROPEC_p` + `GVA_INDUSTRY_p` + `GVA_SERVICES_p` + `Motorcycles_p` + `DENSITY`, data=municipality.sp, approach="CV", kernel="tricube", adaptive=FALSE, longlat=FALSE)
## Take a cup of tea and have a break, it will take a few minutes.
## -----A kind suggestion from GWmodel development group
## Fixed bandwidth: 3921308 CV score: 820.8845
## Fixed bandwidth: 2423986 CV score: 796.9854
## Fixed bandwidth: 1498590 CV score: 768.9411
## Fixed bandwidth: 926664.5 CV score: 751.5087
## Fixed bandwidth: 573194.8 CV score: 1413.193
## Fixed bandwidth: 1145121 CV score: 781.8078
## Fixed bandwidth: 791651.1 CV score: 835.6445
## Fixed bandwidth: 1010107 CV score: 759.8688
## Fixed bandwidth: 875093.9 CV score: 756.8009
## Fixed bandwidth: 958536.8 CV score: 753.6546
## Fixed bandwidth: 906966.3 CV score: 751.8146
## Fixed bandwidth: 938838.6 CV score: 752.0208
## Fixed bandwidth: 919140.4 CV score: 751.4305
## Fixed bandwidth: 914490.3 CV score: 751.4925
## Fixed bandwidth: 922014.4 CV score: 751.4366
## Fixed bandwidth: 917364.2 CV score: 751.4431
## Fixed bandwidth: 920238.2 CV score: 751.4291
## Fixed bandwidth: 920916.6 CV score: 751.4306
## Fixed bandwidth: 919818.9 CV score: 751.4291
The recommended bandwidth is 919818.9 metres. SIRGAS 2000, EPSG 5641 projects in metres.
Kernel is selected as boxcar to determine the optimal fixed bandwidth to use in explanatory model.
bw.fixed.CV_B <- bw.gwr(formula = `LOG_GDP_CAPITA` ~ `ACTIVE` + `CROP_PROD` + `IDHM_Renda` + `IDHM_Educacao` +
`Tax_to_GDP` + `GVA_AGROPEC_p` + `GVA_INDUSTRY_p` + `GVA_SERVICES_p` + `Motorcycles_p` + `DENSITY`, data=municipality.sp, approach="CV", kernel="boxcar", adaptive=FALSE, longlat=FALSE)
## Take a cup of tea and have a break, it will take a few minutes.
## -----A kind suggestion from GWmodel development group
## Fixed bandwidth: 3921308 CV score: 831.6411
## Fixed bandwidth: 2423986 CV score: 826.3529
## Fixed bandwidth: 1498590 CV score: 795.8339
## Fixed bandwidth: 926664.5 CV score: 862.7637
## Fixed bandwidth: 1852060 CV score: 815.5966
## Fixed bandwidth: 1280134 CV score: 785.4436
## Fixed bandwidth: 1145121 CV score: 856.6601
## Fixed bandwidth: 1363577 CV score: 791.6162
## Fixed bandwidth: 1228564 CV score: 784.0101
## Fixed bandwidth: 1196691 CV score: 784.3704
## Fixed bandwidth: 1248262 CV score: 784.7651
## Fixed bandwidth: 1216389 CV score: 783.1303
## Fixed bandwidth: 1208865 CV score: 784.6228
## Fixed bandwidth: 1221040 CV score: 783.432
## Fixed bandwidth: 1213516 CV score: 783.8824
## Fixed bandwidth: 1218166 CV score: 783.4497
## Fixed bandwidth: 1215292 CV score: 783.7936
## Fixed bandwidth: 1217068 CV score: 783.3919
## Fixed bandwidth: 1215970 CV score: 783.0702
## Fixed bandwidth: 1215711 CV score: 783.8288
## Fixed bandwidth: 1216130 CV score: 783.0849
## Fixed bandwidth: 1215871 CV score: 783.0412
## Fixed bandwidth: 1215810 CV score: 783.0579
## Fixed bandwidth: 1215909 CV score: 783.0566
## Fixed bandwidth: 1215848 CV score: 783.0521
## Fixed bandwidth: 1215886 CV score: 783.0513
## Fixed bandwidth: 1215862 CV score: 783.0446
## Fixed bandwidth: 1215877 CV score: 783.0459
## Fixed bandwidth: 1215868 CV score: 783.0416
## Fixed bandwidth: 1215873 CV score: 783.0471
## Fixed bandwidth: 1215870 CV score: 783.0411
The recommended bandwidth is 1215870 metres. SIRGAS 2000, EPSG 5641 projects in metres.
Calibrate the gwr model using fixed bandwidth CV approach and gaussian kernel
gwr.fixed.CV_G <- gwr.basic(formula = `LOG_GDP_CAPITA` ~ `ACTIVE` + `CROP_PROD` + `IDHM_Renda` + `IDHM_Educacao` + `Tax_to_GDP` + `GVA_AGROPEC_p` + `GVA_INDUSTRY_p` + `GVA_SERVICES_p` + `Motorcycles_p` + `DENSITY`, data=municipality.sp, bw=bw.fixed.CV_G, kernel="gaussian", longlat=FALSE)
gwr.fixed.CV_G
## ***********************************************************************
## * Package GWmodel *
## ***********************************************************************
## Program starts at: 2020-06-01 00:22:50
## Call:
## gwr.basic(formula = LOG_GDP_CAPITA ~ ACTIVE + CROP_PROD + IDHM_Renda +
## IDHM_Educacao + Tax_to_GDP + GVA_AGROPEC_p + GVA_INDUSTRY_p +
## GVA_SERVICES_p + Motorcycles_p + DENSITY, data = municipality.sp,
## bw = bw.fixed.CV_G, kernel = "gaussian", longlat = FALSE)
##
## Dependent (y) variable: LOG_GDP_CAPITA
## Independent variables: ACTIVE CROP_PROD IDHM_Renda IDHM_Educacao Tax_to_GDP GVA_AGROPEC_p GVA_INDUSTRY_p GVA_SERVICES_p Motorcycles_p DENSITY
## Number of data points: 5556
## ***********************************************************************
## * Results of Global Regression *
## ***********************************************************************
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.02479 -0.23134 -0.05998 0.16161 2.84433
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.72299 0.04516 126.716 < 2e-16 ***
## ACTIVE -0.25430 0.04998 -5.088 3.73e-07 ***
## CROP_PROD 2.60383 0.11829 22.011 < 2e-16 ***
## IDHM_Renda 5.99105 0.11742 51.024 < 2e-16 ***
## IDHM_Educacao 0.34560 0.10327 3.347 0.000823 ***
## Tax_to_GDP 0.26422 0.06573 4.020 5.90e-05 ***
## GVA_AGROPEC_p 2.40308 0.17939 13.395 < 2e-16 ***
## GVA_INDUSTRY_p 2.94822 0.20010 14.734 < 2e-16 ***
## GVA_SERVICES_p 2.63698 0.37041 7.119 1.23e-12 ***
## Motorcycles_p -0.54948 0.08077 -6.803 1.14e-11 ***
## DENSITY -2.17765 0.17992 -12.104 < 2e-16 ***
##
## ---Significance stars
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Residual standard error: 0.3847 on 5545 degrees of freedom
## Multiple R-squared: 0.6836
## Adjusted R-squared: 0.683
## F-statistic: 1198 on 10 and 5545 DF, p-value: < 2.2e-16
## ***Extra Diagnostic information
## Residual sum of squares: 820.5633
## Sigma(hat): 0.3843732
## AIC: 5164.603
## AICc: 5164.659
## ***********************************************************************
## * Results of Geographically Weighted Regression *
## ***********************************************************************
##
## *********************Model calibration information*********************
## Kernel function: gaussian
## Fixed bandwidth: 401918.1
## Regression points: the same locations as observations are used.
## Distance metric: Euclidean distance metric is used.
##
## ****************Summary of GWR coefficient estimates:******************
## Min. 1st Qu. Median 3rd Qu. Max.
## Intercept 5.265084 5.797682 6.461716 6.877838 8.3785
## ACTIVE -1.406758 -0.243319 -0.138034 0.055753 0.3291
## CROP_PROD 0.364832 2.626697 2.826030 2.988650 6.3465
## IDHM_Renda 1.217225 3.802496 4.561548 5.547635 7.0873
## IDHM_Educacao -1.058786 0.123465 0.366746 0.734702 1.9798
## Tax_to_GDP -0.539751 0.195392 0.317037 0.364099 0.8434
## GVA_AGROPEC_p -1.679836 1.615586 2.302621 3.743916 51.8273
## GVA_INDUSTRY_p 2.266221 2.732653 3.654443 14.196330 478.1091
## GVA_SERVICES_p -24.493220 -4.333014 2.717253 3.558280 3494.3192
## Motorcycles_p -1.990709 -0.878039 -0.516541 -0.035091 2.4642
## DENSITY -614.073572 -3.125840 -2.165609 -1.671781 0.6530
## ************************Diagnostic information*************************
## Number of data points: 5556
## Effective number of parameters (2trace(S) - trace(S'S)): 122.4976
## Effective degrees of freedom (n-2trace(S) + trace(S'S)): 5433.502
## AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 4420.078
## AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 4324.954
## Residual sum of squares: 697.1337
## R-square value: 0.7311859
## Adjusted R-square value: 0.7251244
##
## ***********************************************************************
## Program stops at: 2020-06-01 00:23:20
The computed CV approach fixed bandwidth is 401918.1 meters. Therefore, Geographically Weighted Regression perform better than the global model when comparing the adjusted R-square value of 72.51% to 68.3%.
Calibrate the gwr model using fixed bandwidth CV approach and exponential kernel
gwr.fixed.CV_E <- gwr.basic(formula = `LOG_GDP_CAPITA` ~ `ACTIVE` + `CROP_PROD` + `IDHM_Renda` + `IDHM_Educacao` + `Tax_to_GDP` + `GVA_AGROPEC_p` + `GVA_INDUSTRY_p` + `GVA_SERVICES_p` + `Motorcycles_p` + `DENSITY`, data=municipality.sp, bw=bw.fixed.CV_E, kernel="exponential", longlat=FALSE)
gwr.fixed.CV_E
## ***********************************************************************
## * Package GWmodel *
## ***********************************************************************
## Program starts at: 2020-06-01 00:23:20
## Call:
## gwr.basic(formula = LOG_GDP_CAPITA ~ ACTIVE + CROP_PROD + IDHM_Renda +
## IDHM_Educacao + Tax_to_GDP + GVA_AGROPEC_p + GVA_INDUSTRY_p +
## GVA_SERVICES_p + Motorcycles_p + DENSITY, data = municipality.sp,
## bw = bw.fixed.CV_E, kernel = "exponential", longlat = FALSE)
##
## Dependent (y) variable: LOG_GDP_CAPITA
## Independent variables: ACTIVE CROP_PROD IDHM_Renda IDHM_Educacao Tax_to_GDP GVA_AGROPEC_p GVA_INDUSTRY_p GVA_SERVICES_p Motorcycles_p DENSITY
## Number of data points: 5556
## ***********************************************************************
## * Results of Global Regression *
## ***********************************************************************
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.02479 -0.23134 -0.05998 0.16161 2.84433
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.72299 0.04516 126.716 < 2e-16 ***
## ACTIVE -0.25430 0.04998 -5.088 3.73e-07 ***
## CROP_PROD 2.60383 0.11829 22.011 < 2e-16 ***
## IDHM_Renda 5.99105 0.11742 51.024 < 2e-16 ***
## IDHM_Educacao 0.34560 0.10327 3.347 0.000823 ***
## Tax_to_GDP 0.26422 0.06573 4.020 5.90e-05 ***
## GVA_AGROPEC_p 2.40308 0.17939 13.395 < 2e-16 ***
## GVA_INDUSTRY_p 2.94822 0.20010 14.734 < 2e-16 ***
## GVA_SERVICES_p 2.63698 0.37041 7.119 1.23e-12 ***
## Motorcycles_p -0.54948 0.08077 -6.803 1.14e-11 ***
## DENSITY -2.17765 0.17992 -12.104 < 2e-16 ***
##
## ---Significance stars
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Residual standard error: 0.3847 on 5545 degrees of freedom
## Multiple R-squared: 0.6836
## Adjusted R-squared: 0.683
## F-statistic: 1198 on 10 and 5545 DF, p-value: < 2.2e-16
## ***Extra Diagnostic information
## Residual sum of squares: 820.5633
## Sigma(hat): 0.3843732
## AIC: 5164.603
## AICc: 5164.659
## ***********************************************************************
## * Results of Geographically Weighted Regression *
## ***********************************************************************
##
## *********************Model calibration information*********************
## Kernel function: exponential
## Fixed bandwidth: 174840
## Regression points: the same locations as observations are used.
## Distance metric: Euclidean distance metric is used.
##
## ****************Summary of GWR coefficient estimates:******************
## Min. 1st Qu. Median 3rd Qu. Max.
## Intercept 5.176310 5.951734 6.658053 7.202706 8.9568
## ACTIVE -1.668626 -0.251338 -0.085436 0.139949 1.1271
## CROP_PROD 0.033343 2.445772 2.901713 3.441034 9.7218
## IDHM_Renda 0.213223 3.343640 4.191685 5.279799 8.5153
## IDHM_Educacao -2.398352 0.159412 0.470445 0.782195 2.3238
## Tax_to_GDP -1.025798 0.134425 0.295558 0.418167 1.4777
## GVA_AGROPEC_p -3.685029 1.865751 2.616245 3.909798 52.9926
## GVA_INDUSTRY_p 1.918322 3.269334 5.327727 14.181813 347.1924
## GVA_SERVICES_p -35.308270 -1.164288 2.994232 4.802045 991.9113
## Motorcycles_p -2.760046 -0.930877 -0.326533 0.069421 3.1566
## DENSITY -546.450640 -4.468837 -2.719354 -1.701557 4.3855
## ************************Diagnostic information*************************
## Number of data points: 5556
## Effective number of parameters (2trace(S) - trace(S'S)): 461.7452
## Effective degrees of freedom (n-2trace(S) + trace(S'S)): 5094.255
## AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 4139.218
## AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 3803.393
## Residual sum of squares: 611.2036
## R-square value: 0.7643205
## Adjusted R-square value: 0.7429542
##
## ***********************************************************************
## Program stops at: 2020-06-01 00:23:52
The computed CV approach fixed bandwidth is 174840 meters. Therefore, Geographically Weighted Regression perform much better than the global model when comparing the adjusted R-square value of 74.29% to 68.3%.
Calibrate the gwr model using fixed bandwidth CV approach and tricube kernel
gwr.fixed.CV_T <- gwr.basic(formula = `LOG_GDP_CAPITA` ~ `ACTIVE` + `CROP_PROD` + `IDHM_Renda` + `IDHM_Educacao` + `Tax_to_GDP` + `GVA_AGROPEC_p` + `GVA_INDUSTRY_p` + `GVA_SERVICES_p` + `Motorcycles_p` + `DENSITY`, data=municipality.sp, bw=bw.fixed.CV_T, kernel="tricube", longlat=FALSE)
gwr.fixed.CV_T
## ***********************************************************************
## * Package GWmodel *
## ***********************************************************************
## Program starts at: 2020-06-01 00:23:53
## Call:
## gwr.basic(formula = LOG_GDP_CAPITA ~ ACTIVE + CROP_PROD + IDHM_Renda +
## IDHM_Educacao + Tax_to_GDP + GVA_AGROPEC_p + GVA_INDUSTRY_p +
## GVA_SERVICES_p + Motorcycles_p + DENSITY, data = municipality.sp,
## bw = bw.fixed.CV_T, kernel = "tricube", longlat = FALSE)
##
## Dependent (y) variable: LOG_GDP_CAPITA
## Independent variables: ACTIVE CROP_PROD IDHM_Renda IDHM_Educacao Tax_to_GDP GVA_AGROPEC_p GVA_INDUSTRY_p GVA_SERVICES_p Motorcycles_p DENSITY
## Number of data points: 5556
## ***********************************************************************
## * Results of Global Regression *
## ***********************************************************************
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.02479 -0.23134 -0.05998 0.16161 2.84433
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.72299 0.04516 126.716 < 2e-16 ***
## ACTIVE -0.25430 0.04998 -5.088 3.73e-07 ***
## CROP_PROD 2.60383 0.11829 22.011 < 2e-16 ***
## IDHM_Renda 5.99105 0.11742 51.024 < 2e-16 ***
## IDHM_Educacao 0.34560 0.10327 3.347 0.000823 ***
## Tax_to_GDP 0.26422 0.06573 4.020 5.90e-05 ***
## GVA_AGROPEC_p 2.40308 0.17939 13.395 < 2e-16 ***
## GVA_INDUSTRY_p 2.94822 0.20010 14.734 < 2e-16 ***
## GVA_SERVICES_p 2.63698 0.37041 7.119 1.23e-12 ***
## Motorcycles_p -0.54948 0.08077 -6.803 1.14e-11 ***
## DENSITY -2.17765 0.17992 -12.104 < 2e-16 ***
##
## ---Significance stars
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Residual standard error: 0.3847 on 5545 degrees of freedom
## Multiple R-squared: 0.6836
## Adjusted R-squared: 0.683
## F-statistic: 1198 on 10 and 5545 DF, p-value: < 2.2e-16
## ***Extra Diagnostic information
## Residual sum of squares: 820.5633
## Sigma(hat): 0.3843732
## AIC: 5164.603
## AICc: 5164.659
## ***********************************************************************
## * Results of Geographically Weighted Regression *
## ***********************************************************************
##
## *********************Model calibration information*********************
## Kernel function: tricube
## Fixed bandwidth: 919818.9
## Regression points: the same locations as observations are used.
## Distance metric: Euclidean distance metric is used.
##
## ****************Summary of GWR coefficient estimates:******************
## Min. 1st Qu. Median 3rd Qu. Max.
## Intercept 4.9730e+00 5.9860e+00 6.6411e+00 7.0872e+00 9.0284
## ACTIVE -1.5765e+00 -2.0615e-01 -9.9553e-02 7.2503e-02 1.6210
## CROP_PROD -1.4175e+00 2.5016e+00 2.7133e+00 2.9378e+00 11.8210
## IDHM_Renda -7.1716e-01 3.6167e+00 4.2948e+00 5.4358e+00 7.7656
## IDHM_Educacao -1.5918e+00 1.2904e-01 3.0675e-01 7.1644e-01 2.2535
## Tax_to_GDP -1.0157e+00 1.5356e-01 3.2041e-01 3.7791e-01 1.4675
## GVA_AGROPEC_p 4.2029e-01 1.5223e+00 2.6206e+00 4.1755e+00 94.4428
## GVA_INDUSTRY_p -2.6225e+02 2.7303e+00 6.4762e+00 1.5172e+01 1107.4919
## GVA_SERVICES_p -1.0092e+02 -1.2339e+01 2.8469e+00 6.9412e+00 8658.2546
## Motorcycles_p -2.5269e+00 -8.9511e-01 -3.8825e-01 -6.4233e-03 3.8880
## DENSITY -1.5086e+03 -5.2478e+00 -2.1602e+00 -1.5950e+00 0.8277
## ************************Diagnostic information*************************
## Number of data points: 5556
## Effective number of parameters (2trace(S) - trace(S'S)): 125.5376
## Effective degrees of freedom (n-2trace(S) + trace(S'S)): 5430.462
## AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 4366.325
## AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 4258.005
## Residual sum of squares: 687.2599
## R-square value: 0.7349932
## Adjusted R-square value: 0.7288659
##
## ***********************************************************************
## Program stops at: 2020-06-01 00:24:22
The computed CV approach fixed bandwidth is 919818.9 meters. Therefore, Geographically Weighted Regression perform better than the global model when comparing the adjusted R-square value of 72.88% to 68.3%.
Calibrate the gwr model using fixed bandwidth CV approach and boxcar kernel
gwr.fixed.CV_B <- gwr.basic(formula = `LOG_GDP_CAPITA` ~ `ACTIVE` + `CROP_PROD` + `IDHM_Renda` + `IDHM_Educacao` + `Tax_to_GDP` + `GVA_AGROPEC_p` + `GVA_INDUSTRY_p` + `GVA_SERVICES_p` + `Motorcycles_p` + `DENSITY`, data=municipality.sp, bw=bw.fixed.CV_B, kernel="boxcar", longlat=FALSE)
gwr.fixed.CV_B
## ***********************************************************************
## * Package GWmodel *
## ***********************************************************************
## Program starts at: 2020-06-01 00:24:22
## Call:
## gwr.basic(formula = LOG_GDP_CAPITA ~ ACTIVE + CROP_PROD + IDHM_Renda +
## IDHM_Educacao + Tax_to_GDP + GVA_AGROPEC_p + GVA_INDUSTRY_p +
## GVA_SERVICES_p + Motorcycles_p + DENSITY, data = municipality.sp,
## bw = bw.fixed.CV_B, kernel = "boxcar", longlat = FALSE)
##
## Dependent (y) variable: LOG_GDP_CAPITA
## Independent variables: ACTIVE CROP_PROD IDHM_Renda IDHM_Educacao Tax_to_GDP GVA_AGROPEC_p GVA_INDUSTRY_p GVA_SERVICES_p Motorcycles_p DENSITY
## Number of data points: 5556
## ***********************************************************************
## * Results of Global Regression *
## ***********************************************************************
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.02479 -0.23134 -0.05998 0.16161 2.84433
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.72299 0.04516 126.716 < 2e-16 ***
## ACTIVE -0.25430 0.04998 -5.088 3.73e-07 ***
## CROP_PROD 2.60383 0.11829 22.011 < 2e-16 ***
## IDHM_Renda 5.99105 0.11742 51.024 < 2e-16 ***
## IDHM_Educacao 0.34560 0.10327 3.347 0.000823 ***
## Tax_to_GDP 0.26422 0.06573 4.020 5.90e-05 ***
## GVA_AGROPEC_p 2.40308 0.17939 13.395 < 2e-16 ***
## GVA_INDUSTRY_p 2.94822 0.20010 14.734 < 2e-16 ***
## GVA_SERVICES_p 2.63698 0.37041 7.119 1.23e-12 ***
## Motorcycles_p -0.54948 0.08077 -6.803 1.14e-11 ***
## DENSITY -2.17765 0.17992 -12.104 < 2e-16 ***
##
## ---Significance stars
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Residual standard error: 0.3847 on 5545 degrees of freedom
## Multiple R-squared: 0.6836
## Adjusted R-squared: 0.683
## F-statistic: 1198 on 10 and 5545 DF, p-value: < 2.2e-16
## ***Extra Diagnostic information
## Residual sum of squares: 820.5633
## Sigma(hat): 0.3843732
## AIC: 5164.603
## AICc: 5164.659
## ***********************************************************************
## * Results of Geographically Weighted Regression *
## ***********************************************************************
##
## *********************Model calibration information*********************
## Kernel function: boxcar
## Fixed bandwidth: 1215870
## Regression points: the same locations as observations are used.
## Distance metric: Euclidean distance metric is used.
##
## ****************Summary of GWR coefficient estimates:******************
## Min. 1st Qu. Median 3rd Qu. Max.
## Intercept 5.2934e+00 5.7158e+00 6.0343e+00 6.6970e+00 8.0851
## ACTIVE -7.5488e-01 -3.3754e-01 -2.3353e-01 -5.2666e-02 0.1899
## CROP_PROD -2.7472e+00 2.6198e+00 2.8172e+00 2.9654e+00 4.8954
## IDHM_Renda 1.7394e+00 4.2193e+00 5.3522e+00 5.8233e+00 7.4844
## IDHM_Educacao -5.8823e-01 1.3403e-01 4.6681e-01 6.2849e-01 1.7079
## Tax_to_GDP -3.7680e-01 2.3842e-01 2.7714e-01 3.7639e-01 0.7596
## GVA_AGROPEC_p 5.1365e-01 1.7560e+00 1.9203e+00 3.6318e+00 94.9415
## GVA_INDUSTRY_p 1.9289e+00 2.5826e+00 2.7282e+00 1.3528e+01 882.7677
## GVA_SERVICES_p -4.4551e+02 -1.4802e+01 2.5782e+00 2.7700e+00 6807.2077
## Motorcycles_p -1.3965e+00 -8.1781e-01 -5.5685e-01 -9.9723e-02 1.6870
## DENSITY -1.2338e+03 -2.0473e+00 -1.9759e+00 -1.3951e+00 0.6952
## ************************Diagnostic information*************************
## Number of data points: 5556
## Effective number of parameters (2trace(S) - trace(S'S)): 33.83365
## Effective degrees of freedom (n-2trace(S) + trace(S'S)): 5522.166
## AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 4742.686
## AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 4706.401
## Residual sum of squares: 754.2708
## R-square value: 0.7091539
## Adjusted R-square value: 0.7073716
##
## ***********************************************************************
## Program stops at: 2020-06-01 00:24:49
The computed CV approach fixed bandwidth is 1215870 meters. Therefore, Geographically Weighted Regression perform slightly better than the global model when comparing the adjusted R-square value of 70.73% to 68.3%.
Adaptive argument is set to FALSE indicates that we are interested to compute the fixed bandwidth. AIC corrected approach was define as the stopping rule.
Kernel is selected as gaussian to determine the optimal fixed bandwidth to use in explanatory model.
bw.fixed.AIC_G <- bw.gwr(formula = `LOG_GDP_CAPITA` ~ `ACTIVE` + `CROP_PROD` + `IDHM_Renda` + `IDHM_Educacao` + `Tax_to_GDP` + `GVA_AGROPEC_p` + `GVA_INDUSTRY_p` + `GVA_SERVICES_p` + `Motorcycles_p` + `DENSITY`, data=municipality.sp, approach="AIC", kernel="gaussian", adaptive=FALSE, longlat=FALSE)
## Take a cup of tea and have a break, it will take a few minutes.
## -----A kind suggestion from GWmodel development group
## Fixed bandwidth: 3921308 AICc value: 5143.85
## Fixed bandwidth: 2423986 AICc value: 5112.478
## Fixed bandwidth: 1498590 AICc value: 5037.006
## Fixed bandwidth: 926664.5 AICc value: 4869.68
## Fixed bandwidth: 573194.8 AICc value: 4606.194
## Fixed bandwidth: 354738.5 AICc value: 4359.868
## Fixed bandwidth: 219725.1 AICc value: 4118.859
## Fixed bandwidth: 136282.2 AICc value: 3756.752
## Fixed bandwidth: 84711.68 AICc value: 3522.908
## Fixed bandwidth: 52839.34 AICc value: 4353.739
## Fixed bandwidth: 104409.9 AICc value: 3555.17
## Fixed bandwidth: 72537.53 AICc value: 3626.098
## Fixed bandwidth: 92235.72 AICc value: 3515.556
## Fixed bandwidth: 96885.83 AICc value: 3525.416
## Fixed bandwidth: 89361.79 AICc value: 3514.417
## Fixed bandwidth: 87585.61 AICc value: 3516.044
## Fixed bandwidth: 90459.53 AICc value: 3514.331
## Fixed bandwidth: 91137.98 AICc value: 3514.607
## Fixed bandwidth: 90040.23 AICc value: 3514.284
## Fixed bandwidth: 89781.09 AICc value: 3514.304
## Fixed bandwidth: 90200.39 AICc value: 3514.291
## Fixed bandwidth: 89941.25 AICc value: 3514.288
## Fixed bandwidth: 90101.41 AICc value: 3514.285
## Fixed bandwidth: 90002.43 AICc value: 3514.285
## Fixed bandwidth: 90063.6 AICc value: 3514.285
The recommended bandwidth is 90040.23 metres. SIRGAS 2000, EPSG 5641 projects in metres.
Kernel is selected as exponential to determine the optimal fixed bandwidth to use in explanatory model.
bw.fixed.AIC_E <- bw.gwr(formula = `LOG_GDP_CAPITA` ~ `ACTIVE` + `CROP_PROD` + `IDHM_Renda` + `IDHM_Educacao` + `Tax_to_GDP` + `GVA_AGROPEC_p` + `GVA_INDUSTRY_p` + `GVA_SERVICES_p` + `Motorcycles_p` + `DENSITY`, data=municipality.sp, approach="AIC", kernel="exponential", adaptive=FALSE, longlat=FALSE)
## Take a cup of tea and have a break, it will take a few minutes.
## -----A kind suggestion from GWmodel development group
## Fixed bandwidth: 3921308 AICc value: 5094.666
## Fixed bandwidth: 2423986 AICc value: 5053.162
## Fixed bandwidth: 1498590 AICc value: 4986.454
## Fixed bandwidth: 926664.5 AICc value: 4876.304
## Fixed bandwidth: 573194.8 AICc value: 4697.228
## Fixed bandwidth: 354738.5 AICc value: 4455.492
## Fixed bandwidth: 219725.1 AICc value: 4231.835
## Fixed bandwidth: 136282.2 AICc value: 4050.145
## Fixed bandwidth: 84711.68 AICc value: 3906.213
## Fixed bandwidth: 52839.34 AICc value: 4129.374
## Fixed bandwidth: 104409.9 AICc value: 3961.999
## Fixed bandwidth: 72537.53 AICc value: 3897.558
## Fixed bandwidth: 65013.49 AICc value: 3926.057
## Fixed bandwidth: 77187.64 AICc value: 3895.783
## Fixed bandwidth: 80061.57 AICc value: 3898.286
## Fixed bandwidth: 75411.46 AICc value: 3895.476
## Fixed bandwidth: 74313.71 AICc value: 3895.858
## Fixed bandwidth: 76089.9 AICc value: 3895.466
## Fixed bandwidth: 76509.2 AICc value: 3895.54
## Fixed bandwidth: 75830.76 AICc value: 3895.451
## Fixed bandwidth: 75670.6 AICc value: 3895.453
## Fixed bandwidth: 75929.74 AICc value: 3895.454
## Fixed bandwidth: 75769.58 AICc value: 3895.45
## Fixed bandwidth: 75731.77 AICc value: 3895.451
## Fixed bandwidth: 75792.95 AICc value: 3895.45
The recommended bandwidth is 75792.95 metres. SIRGAS 2000, EPSG 5641 projects in metres.
Kernel is selected as tricube to determine the optimal fixed bandwidth to use in explanatory model.
bw.fixed.AIC_T <- bw.gwr(formula = `LOG_GDP_CAPITA` ~ `ACTIVE` + `CROP_PROD` + `IDHM_Renda` + `IDHM_Educacao` + `Tax_to_GDP` + `GVA_AGROPEC_p` + `GVA_INDUSTRY_p` + `GVA_SERVICES_p` + `Motorcycles_p` + `DENSITY`, data=municipality.sp, approach="AIC", kernel="tricube", adaptive=FALSE, longlat=FALSE)
## Take a cup of tea and have a break, it will take a few minutes.
## -----A kind suggestion from GWmodel development group
## Fixed bandwidth: 3921308 AICc value: 5091.349
## Fixed bandwidth: 2423986 AICc value: 4898.866
## Fixed bandwidth: 1498590 AICc value: 4618.208
## Fixed bandwidth: 926664.5 AICc value: 4370.623
## Fixed bandwidth: 573194.8 AICc value: 4062.316
## Error in gw_reg(X, Y, W.i, TRUE, i) : inv(): matrix seems singular
## Fixed bandwidth: 354738.5 AICc value: Inf
## Fixed bandwidth: 708208.2 AICc value: 4227.289
## Fixed bandwidth: 489751.9 AICc value: 3930.828
## Fixed bandwidth: 438181.4 AICc value: 3816.826
## Fixed bandwidth: 406309 AICc value: 3737.548
## Fixed bandwidth: 386610.8 AICc value: 5038.342
## Fixed bandwidth: 418483.2 AICc value: 3769.375
## Fixed bandwidth: 398785 AICc value: 3714.956
## Fixed bandwidth: 394134.9 AICc value: 3701.17
## Fixed bandwidth: 391260.9 AICc value: 3694.928
## Fixed bandwidth: 389484.8 AICc value: 4108.241
## Fixed bandwidth: 392358.7 AICc value: 3696.536
## Fixed bandwidth: 390582.5 AICc value: 3885.94
## Fixed bandwidth: 391680.2 AICc value: 3694.849
## Fixed bandwidth: 391939.4 AICc value: 3695.489
## Fixed bandwidth: 391520.1 AICc value: 3694.456
## Fixed bandwidth: 391421.1 AICc value: 3694.214
## Fixed bandwidth: 391359.9 AICc value: 3694.064
## Fixed bandwidth: 391322.1 AICc value: 3693.972
## Fixed bandwidth: 391298.8 AICc value: 3693.919
## Fixed bandwidth: 391284.3 AICc value: 3693.975
## Fixed bandwidth: 391307.7 AICc value: 3693.938
## Fixed bandwidth: 391293.2 AICc value: 3693.902
## Fixed bandwidth: 391289.8 AICc value: 3693.928
## Fixed bandwidth: 391295.3 AICc value: 3693.927
## Fixed bandwidth: 391291.9 AICc value: 3693.96
## Fixed bandwidth: 391294 AICc value: 3693.908
## Fixed bandwidth: 391292.7 AICc value: 3693.911
## Fixed bandwidth: 391293.5 AICc value: 3693.909
## Fixed bandwidth: 391293.1 AICc value: 3693.907
## Fixed bandwidth: 391293.4 AICc value: 3693.911
## Fixed bandwidth: 391293.2 AICc value: 3693.938
## Fixed bandwidth: 391293.3 AICc value: 3693.903
## Fixed bandwidth: 391293.2 AICc value: 3693.904
## Fixed bandwidth: 391293.3 AICc value: 3693.902
## Fixed bandwidth: 391293.3 AICc value: 3693.896
## Fixed bandwidth: 391293.3 AICc value: 3693.947
## Fixed bandwidth: 391293.3 AICc value: 3693.945
## Fixed bandwidth: 391293.3 AICc value: 3693.908
## Fixed bandwidth: 391293.3 AICc value: 3693.888
## Fixed bandwidth: 391293.3 AICc value: 3693.906
## Fixed bandwidth: 391293.3 AICc value: 3693.912
## Fixed bandwidth: 391293.3 AICc value: 3693.916
## Fixed bandwidth: 391293.3 AICc value: 3693.906
## Fixed bandwidth: 391293.3 AICc value: 3693.893
## Fixed bandwidth: 391293.3 AICc value: 3693.962
## Fixed bandwidth: 391293.3 AICc value: 3693.917
## Fixed bandwidth: 391293.3 AICc value: 3693.912
The recommended bandwidth is 391293.3 metres. SIRGAS 2000, EPSG 5641 projects in metres.
Kernel is selected as boxcar to determine the optimal fixed bandwidth to use in explanatory model.
bw.fixed.AIC_B <- bw.gwr(formula = `LOG_GDP_CAPITA` ~ `ACTIVE` + `CROP_PROD` + `IDHM_Renda` + `IDHM_Educacao` + `Tax_to_GDP` + `GVA_AGROPEC_p` + `GVA_INDUSTRY_p` + `GVA_SERVICES_p` + `Motorcycles_p` + `DENSITY`, data=municipality.sp, approach="AIC", kernel="boxcar", adaptive=FALSE, longlat=FALSE)
## Take a cup of tea and have a break, it will take a few minutes.
## -----A kind suggestion from GWmodel development group
## Fixed bandwidth: 3921308 AICc value: 5165.244
## Fixed bandwidth: 2423986 AICc value: 5129.259
## Fixed bandwidth: 1498590 AICc value: 4836.09
## Fixed bandwidth: 926664.5 AICc value: 4616.234
## Fixed bandwidth: 573194.8 AICc value: 4350.017
## Error in gw_reg(X, Y, W.i, TRUE, i) : inv(): matrix seems singular
## Fixed bandwidth: 354738.5 AICc value: Inf
## Fixed bandwidth: 708208.2 AICc value: 4446.121
## Fixed bandwidth: 489751.9 AICc value: 4218.065
## Fixed bandwidth: 438181.4 AICc value: 4116.091
## Fixed bandwidth: 406309 AICc value: 4047.251
## Fixed bandwidth: 386610.8 AICc value: 6294.373
## Fixed bandwidth: 418483.2 AICc value: 4082.792
## Fixed bandwidth: 398785 AICc value: 4041.819
## Fixed bandwidth: 394134.9 AICc value: 4021.187
## Fixed bandwidth: 391260.9 AICc value: 4021.159
## Fixed bandwidth: 389484.8 AICc value: 6432.356
## Fixed bandwidth: 392358.7 AICc value: 4016.053
## Fixed bandwidth: 393037.1 AICc value: 4017.982
## Fixed bandwidth: 391939.4 AICc value: 4014.325
## Fixed bandwidth: 391680.2 AICc value: 4022.02
## Fixed bandwidth: 392099.6 AICc value: 4015.37
## Fixed bandwidth: 391840.4 AICc value: 4020.414
## Fixed bandwidth: 392000.6 AICc value: 4014.912
## Fixed bandwidth: 391901.6 AICc value: 4014.401
## Fixed bandwidth: 391962.8 AICc value: 4014.828
## Fixed bandwidth: 391925 AICc value: 4014.566
## Fixed bandwidth: 391948.3 AICc value: 4014.712
## Fixed bandwidth: 391933.9 AICc value: 4014.245
## Fixed bandwidth: 391930.5 AICc value: 4014.38
## Fixed bandwidth: 391936 AICc value: 4014.334
## Fixed bandwidth: 391932.6 AICc value: 4014.291
## Fixed bandwidth: 391934.7 AICc value: 4014.33
## Fixed bandwidth: 391933.4 AICc value: 4014.245
The recommended bandwidth is 391933.4 metres. SIRGAS 2000, EPSG 5641 projects in metres.
Calibrate the gwr model using fixed bandwidth AIC approach and gaussian kernel
gwr.fixed.AIC_G <- gwr.basic(formula = `LOG_GDP_CAPITA` ~ `ACTIVE` + `CROP_PROD` + `IDHM_Renda` + `IDHM_Educacao` + `Tax_to_GDP` + `GVA_AGROPEC_p` + `GVA_INDUSTRY_p` + `GVA_SERVICES_p` + `Motorcycles_p` + `DENSITY`, data=municipality.sp, bw=bw.fixed.AIC_G, kernel="gaussian", longlat=FALSE)
gwr.fixed.AIC_G
## ***********************************************************************
## * Package GWmodel *
## ***********************************************************************
## Program starts at: 2020-06-01 01:12:35
## Call:
## gwr.basic(formula = LOG_GDP_CAPITA ~ ACTIVE + CROP_PROD + IDHM_Renda +
## IDHM_Educacao + Tax_to_GDP + GVA_AGROPEC_p + GVA_INDUSTRY_p +
## GVA_SERVICES_p + Motorcycles_p + DENSITY, data = municipality.sp,
## bw = bw.fixed.AIC_G, kernel = "gaussian", longlat = FALSE)
##
## Dependent (y) variable: LOG_GDP_CAPITA
## Independent variables: ACTIVE CROP_PROD IDHM_Renda IDHM_Educacao Tax_to_GDP GVA_AGROPEC_p GVA_INDUSTRY_p GVA_SERVICES_p Motorcycles_p DENSITY
## Number of data points: 5556
## ***********************************************************************
## * Results of Global Regression *
## ***********************************************************************
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.02479 -0.23134 -0.05998 0.16161 2.84433
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.72299 0.04516 126.716 < 2e-16 ***
## ACTIVE -0.25430 0.04998 -5.088 3.73e-07 ***
## CROP_PROD 2.60383 0.11829 22.011 < 2e-16 ***
## IDHM_Renda 5.99105 0.11742 51.024 < 2e-16 ***
## IDHM_Educacao 0.34560 0.10327 3.347 0.000823 ***
## Tax_to_GDP 0.26422 0.06573 4.020 5.90e-05 ***
## GVA_AGROPEC_p 2.40308 0.17939 13.395 < 2e-16 ***
## GVA_INDUSTRY_p 2.94822 0.20010 14.734 < 2e-16 ***
## GVA_SERVICES_p 2.63698 0.37041 7.119 1.23e-12 ***
## Motorcycles_p -0.54948 0.08077 -6.803 1.14e-11 ***
## DENSITY -2.17765 0.17992 -12.104 < 2e-16 ***
##
## ---Significance stars
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Residual standard error: 0.3847 on 5545 degrees of freedom
## Multiple R-squared: 0.6836
## Adjusted R-squared: 0.683
## F-statistic: 1198 on 10 and 5545 DF, p-value: < 2.2e-16
## ***Extra Diagnostic information
## Residual sum of squares: 820.5633
## Sigma(hat): 0.3843732
## AIC: 5164.603
## AICc: 5164.659
## ***********************************************************************
## * Results of Geographically Weighted Regression *
## ***********************************************************************
##
## *********************Model calibration information*********************
## Kernel function: gaussian
## Fixed bandwidth: 90040.23
## Regression points: the same locations as observations are used.
## Distance metric: Euclidean distance metric is used.
##
## ****************Summary of GWR coefficient estimates:******************
## Min. 1st Qu. Median 3rd Qu. Max.
## Intercept 2.8257e-02 6.5711e+00 7.3013e+00 8.0684e+00 1.5736e+01
## ACTIVE -3.8999e+00 -3.3639e-01 6.6353e-02 3.1430e-01 4.3735e+00
## CROP_PROD -7.7238e+01 1.8838e+00 3.5224e+00 5.8029e+00 1.1644e+02
## IDHM_Renda -1.5220e+01 1.7816e+00 2.7862e+00 4.1355e+00 1.8382e+01
## IDHM_Educacao -6.7649e+00 1.0695e-01 7.1227e-01 1.2790e+00 6.6307e+00
## Tax_to_GDP -1.8031e+01 -2.9342e-02 2.2304e-01 4.5555e-01 1.2721e+01
## GVA_AGROPEC_p -3.6435e+02 2.0092e+00 3.1908e+00 6.8620e+00 3.4522e+02
## GVA_INDUSTRY_p -1.1714e+05 1.0936e+01 2.3151e+01 6.2805e+01 6.4254e+04
## GVA_SERVICES_p -1.3769e+05 -1.0818e+01 1.5560e+01 1.5446e+02 3.1104e+05
## Motorcycles_p -1.2670e+01 -8.3082e-01 -2.6010e-01 5.2737e-01 1.9263e+01
## DENSITY -9.8356e+03 -7.6701e+01 -1.3863e+01 -3.3212e+00 3.5849e+03
## ************************Diagnostic information*************************
## Number of data points: 5556
## Effective number of parameters (2trace(S) - trace(S'S)): 1170.938
## Effective degrees of freedom (n-2trace(S) + trace(S'S)): 4385.062
## AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 3514.284
## AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 2255.085
## Residual sum of squares: 414.8347
## R-square value: 0.8400401
## Adjusted R-square value: 0.7973165
##
## ***********************************************************************
## Program stops at: 2020-06-01 01:13:22
The computed AIC approach fixed bandwidth is 90040.23 meters. Therefore, Geographically Weighted Regression perform much better than the global model when comparing the adjusted R-square value of 79.73% to 68.3%.
Calibrate the gwr model using fixed bandwidth AIC approach and exponential kernel
gwr.fixed.AIC_E <- gwr.basic(formula = `LOG_GDP_CAPITA` ~ `ACTIVE` + `CROP_PROD` + `IDHM_Renda` + `IDHM_Educacao` + `Tax_to_GDP` + `GVA_AGROPEC_p` + `GVA_INDUSTRY_p` + `GVA_SERVICES_p` + `Motorcycles_p` + `DENSITY`, data=municipality.sp, bw=bw.fixed.AIC_E, kernel="exponential", longlat=FALSE)
gwr.fixed.AIC_E
## ***********************************************************************
## * Package GWmodel *
## ***********************************************************************
## Program starts at: 2020-06-01 01:13:22
## Call:
## gwr.basic(formula = LOG_GDP_CAPITA ~ ACTIVE + CROP_PROD + IDHM_Renda +
## IDHM_Educacao + Tax_to_GDP + GVA_AGROPEC_p + GVA_INDUSTRY_p +
## GVA_SERVICES_p + Motorcycles_p + DENSITY, data = municipality.sp,
## bw = bw.fixed.AIC_E, kernel = "exponential", longlat = FALSE)
##
## Dependent (y) variable: LOG_GDP_CAPITA
## Independent variables: ACTIVE CROP_PROD IDHM_Renda IDHM_Educacao Tax_to_GDP GVA_AGROPEC_p GVA_INDUSTRY_p GVA_SERVICES_p Motorcycles_p DENSITY
## Number of data points: 5556
## ***********************************************************************
## * Results of Global Regression *
## ***********************************************************************
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.02479 -0.23134 -0.05998 0.16161 2.84433
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.72299 0.04516 126.716 < 2e-16 ***
## ACTIVE -0.25430 0.04998 -5.088 3.73e-07 ***
## CROP_PROD 2.60383 0.11829 22.011 < 2e-16 ***
## IDHM_Renda 5.99105 0.11742 51.024 < 2e-16 ***
## IDHM_Educacao 0.34560 0.10327 3.347 0.000823 ***
## Tax_to_GDP 0.26422 0.06573 4.020 5.90e-05 ***
## GVA_AGROPEC_p 2.40308 0.17939 13.395 < 2e-16 ***
## GVA_INDUSTRY_p 2.94822 0.20010 14.734 < 2e-16 ***
## GVA_SERVICES_p 2.63698 0.37041 7.119 1.23e-12 ***
## Motorcycles_p -0.54948 0.08077 -6.803 1.14e-11 ***
## DENSITY -2.17765 0.17992 -12.104 < 2e-16 ***
##
## ---Significance stars
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Residual standard error: 0.3847 on 5545 degrees of freedom
## Multiple R-squared: 0.6836
## Adjusted R-squared: 0.683
## F-statistic: 1198 on 10 and 5545 DF, p-value: < 2.2e-16
## ***Extra Diagnostic information
## Residual sum of squares: 820.5633
## Sigma(hat): 0.3843732
## AIC: 5164.603
## AICc: 5164.659
## ***********************************************************************
## * Results of Geographically Weighted Regression *
## ***********************************************************************
##
## *********************Model calibration information*********************
## Kernel function: exponential
## Fixed bandwidth: 75792.95
## Regression points: the same locations as observations are used.
## Distance metric: Euclidean distance metric is used.
##
## ****************Summary of GWR coefficient estimates:******************
## Min. 1st Qu. Median 3rd Qu. Max.
## Intercept 2.6299e+00 6.3908e+00 7.0839e+00 7.8239e+00 12.0606
## ACTIVE -4.0205e+00 -3.6832e-01 8.1133e-03 2.4287e-01 2.6469
## CROP_PROD -7.3186e+00 2.1196e+00 3.2989e+00 5.1594e+00 39.6905
## IDHM_Renda -2.2667e+00 2.2955e+00 3.2570e+00 4.5676e+00 12.0929
## IDHM_Educacao -4.6508e+00 1.0640e-01 6.5050e-01 1.1625e+00 6.2806
## Tax_to_GDP -5.6094e+00 1.3852e-02 2.2810e-01 4.4913e-01 5.1552
## GVA_AGROPEC_p -4.1117e+01 1.8532e+00 3.0207e+00 5.1391e+00 182.6994
## GVA_INDUSTRY_p -1.4964e+03 6.4562e+00 1.4236e+01 2.1844e+01 10749.3724
## GVA_SERVICES_p -4.4531e+03 -7.9965e+00 5.7557e+00 2.8351e+01 13098.7791
## Motorcycles_p -9.8775e+00 -9.9089e-01 -3.2202e-01 3.5303e-01 3.0699
## DENSITY -2.6585e+03 -1.6648e+01 -5.4921e+00 -2.0745e+00 698.6406
## ************************Diagnostic information*************************
## Number of data points: 5556
## Effective number of parameters (2trace(S) - trace(S'S)): 1429.249
## Effective degrees of freedom (n-2trace(S) + trace(S'S)): 4126.751
## AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 3895.45
## AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 2483.158
## Residual sum of squares: 426.0122
## R-square value: 0.8357301
## Adjusted R-square value: 0.7788235
##
## ***********************************************************************
## Program stops at: 2020-06-01 01:13:58
The computed AIC approach fixed bandwidth is 75792.95 meters. Therefore, Geographically Weighted Regression perform much better than the global model when comparing the adjusted R-square value of 77.88% to 68.3%.
Calibrate the gwr model using fixed bandwidth AIC approach and tricube kernel
gwr.fixed.AIC_T <- gwr.basic(formula = `LOG_GDP_CAPITA` ~ `ACTIVE` + `CROP_PROD` + `IDHM_Renda` + `IDHM_Educacao` + `Tax_to_GDP` + `GVA_AGROPEC_p` + `GVA_INDUSTRY_p` + `GVA_SERVICES_p` + `Motorcycles_p` + `DENSITY`, data=municipality.sp, bw=bw.fixed.AIC_T, kernel="tricube", longlat=FALSE)
gwr.fixed.AIC_T
## ***********************************************************************
## * Package GWmodel *
## ***********************************************************************
## Program starts at: 2020-06-01 01:13:58
## Call:
## gwr.basic(formula = LOG_GDP_CAPITA ~ ACTIVE + CROP_PROD + IDHM_Renda +
## IDHM_Educacao + Tax_to_GDP + GVA_AGROPEC_p + GVA_INDUSTRY_p +
## GVA_SERVICES_p + Motorcycles_p + DENSITY, data = municipality.sp,
## bw = bw.fixed.AIC_T, kernel = "tricube", longlat = FALSE)
##
## Dependent (y) variable: LOG_GDP_CAPITA
## Independent variables: ACTIVE CROP_PROD IDHM_Renda IDHM_Educacao Tax_to_GDP GVA_AGROPEC_p GVA_INDUSTRY_p GVA_SERVICES_p Motorcycles_p DENSITY
## Number of data points: 5556
## ***********************************************************************
## * Results of Global Regression *
## ***********************************************************************
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.02479 -0.23134 -0.05998 0.16161 2.84433
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.72299 0.04516 126.716 < 2e-16 ***
## ACTIVE -0.25430 0.04998 -5.088 3.73e-07 ***
## CROP_PROD 2.60383 0.11829 22.011 < 2e-16 ***
## IDHM_Renda 5.99105 0.11742 51.024 < 2e-16 ***
## IDHM_Educacao 0.34560 0.10327 3.347 0.000823 ***
## Tax_to_GDP 0.26422 0.06573 4.020 5.90e-05 ***
## GVA_AGROPEC_p 2.40308 0.17939 13.395 < 2e-16 ***
## GVA_INDUSTRY_p 2.94822 0.20010 14.734 < 2e-16 ***
## GVA_SERVICES_p 2.63698 0.37041 7.119 1.23e-12 ***
## Motorcycles_p -0.54948 0.08077 -6.803 1.14e-11 ***
## DENSITY -2.17765 0.17992 -12.104 < 2e-16 ***
##
## ---Significance stars
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Residual standard error: 0.3847 on 5545 degrees of freedom
## Multiple R-squared: 0.6836
## Adjusted R-squared: 0.683
## F-statistic: 1198 on 10 and 5545 DF, p-value: < 2.2e-16
## ***Extra Diagnostic information
## Residual sum of squares: 820.5633
## Sigma(hat): 0.3843732
## AIC: 5164.603
## AICc: 5164.659
## ***********************************************************************
## * Results of Geographically Weighted Regression *
## ***********************************************************************
##
## *********************Model calibration information*********************
## Kernel function: tricube
## Fixed bandwidth: 391293.3
## Regression points: the same locations as observations are used.
## Distance metric: Euclidean distance metric is used.
##
## ****************Summary of GWR coefficient estimates:******************
## Min. 1st Qu. Median 3rd Qu. Max.
## Intercept -6.4151e+01 6.2967e+00 7.1773e+00 7.9077e+00 2.6443e+01
## ACTIVE -1.7650e+01 -2.4833e-01 1.5415e-02 2.1114e-01 9.3344e+00
## CROP_PROD -8.8237e+02 1.9792e+00 3.1569e+00 5.2889e+00 1.3133e+02
## IDHM_Renda -3.0635e+00 2.1923e+00 3.1902e+00 4.5380e+00 4.5434e+01
## IDHM_Educacao -8.9792e+01 2.0206e-01 6.2083e-01 1.1179e+00 4.4608e+00
## Tax_to_GDP -8.7908e+00 2.1355e-02 2.6398e-01 4.4643e-01 1.1498e+04
## GVA_AGROPEC_p -1.7388e+03 2.2401e+00 3.1236e+00 6.1020e+00 4.5311e+02
## GVA_INDUSTRY_p -1.7361e+05 8.4900e+00 1.7267e+01 5.1963e+01 7.4706e+04
## GVA_SERVICES_p -7.4641e+04 -1.2482e+01 7.1216e+00 9.4441e+01 7.9844e+05
## Motorcycles_p -1.7687e+01 -9.2755e-01 -2.5149e-01 3.7870e-01 7.9117e+01
## DENSITY -1.2026e+04 -4.4272e+01 -8.1272e+00 -1.9993e+00 6.5203e+03
## ************************Diagnostic information*************************
## Number of data points: 5556
## Effective number of parameters (2trace(S) - trace(S'S)): 498.7027
## Effective degrees of freedom (n-2trace(S) + trace(S'S)): 5057.297
## AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 3693.888
## AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 3205.566
## Residual sum of squares: 537.2729
## R-square value: 0.7928281
## Adjusted R-square value: 0.7723947
##
## ***********************************************************************
## Program stops at: 2020-06-01 01:14:37
The computed AIC approach fixed bandwidth is 391293.3 meters. Therefore, Geographically Weighted Regression perform better than the global model when comparing the adjusted R-square value of 77.23% to 68.3%.
Calibrate the gwr model using fixed bandwidth AIC approach and boxcar kernel
gwr.fixed.AIC_B <- gwr.basic(formula = `LOG_GDP_CAPITA` ~ `ACTIVE` + `ACTIVE` + `CROP_PROD` + `IDHM_Renda` + `IDHM_Educacao` + `Tax_to_GDP` + `GVA_AGROPEC_p` + `GVA_INDUSTRY_p` + `GVA_SERVICES_p` + `Motorcycles_p` + `DENSITY`, data=municipality.sp, bw=bw.fixed.AIC_B, kernel="boxcar", longlat=FALSE)
gwr.fixed.AIC_B
## ***********************************************************************
## * Package GWmodel *
## ***********************************************************************
## Program starts at: 2020-06-01 01:14:37
## Call:
## gwr.basic(formula = LOG_GDP_CAPITA ~ ACTIVE + ACTIVE + CROP_PROD +
## IDHM_Renda + IDHM_Educacao + Tax_to_GDP + GVA_AGROPEC_p +
## GVA_INDUSTRY_p + GVA_SERVICES_p + Motorcycles_p + DENSITY,
## data = municipality.sp, bw = bw.fixed.AIC_B, kernel = "boxcar",
## longlat = FALSE)
##
## Dependent (y) variable: LOG_GDP_CAPITA
## Independent variables: ACTIVE CROP_PROD IDHM_Renda IDHM_Educacao Tax_to_GDP GVA_AGROPEC_p GVA_INDUSTRY_p GVA_SERVICES_p Motorcycles_p DENSITY
## Number of data points: 5556
## ***********************************************************************
## * Results of Global Regression *
## ***********************************************************************
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.02479 -0.23134 -0.05998 0.16161 2.84433
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.72299 0.04516 126.716 < 2e-16 ***
## ACTIVE -0.25430 0.04998 -5.088 3.73e-07 ***
## CROP_PROD 2.60383 0.11829 22.011 < 2e-16 ***
## IDHM_Renda 5.99105 0.11742 51.024 < 2e-16 ***
## IDHM_Educacao 0.34560 0.10327 3.347 0.000823 ***
## Tax_to_GDP 0.26422 0.06573 4.020 5.90e-05 ***
## GVA_AGROPEC_p 2.40308 0.17939 13.395 < 2e-16 ***
## GVA_INDUSTRY_p 2.94822 0.20010 14.734 < 2e-16 ***
## GVA_SERVICES_p 2.63698 0.37041 7.119 1.23e-12 ***
## Motorcycles_p -0.54948 0.08077 -6.803 1.14e-11 ***
## DENSITY -2.17765 0.17992 -12.104 < 2e-16 ***
##
## ---Significance stars
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Residual standard error: 0.3847 on 5545 degrees of freedom
## Multiple R-squared: 0.6836
## Adjusted R-squared: 0.683
## F-statistic: 1198 on 10 and 5545 DF, p-value: < 2.2e-16
## ***Extra Diagnostic information
## Residual sum of squares: 820.5633
## Sigma(hat): 0.3843732
## AIC: 5164.603
## AICc: 5164.659
## ***********************************************************************
## * Results of Geographically Weighted Regression *
## ***********************************************************************
##
## *********************Model calibration information*********************
## Kernel function: boxcar
## Fixed bandwidth: 391933.4
## Regression points: the same locations as observations are used.
## Distance metric: Euclidean distance metric is used.
##
## ****************Summary of GWR coefficient estimates:******************
## Min. 1st Qu. Median 3rd Qu. Max.
## Intercept -8.1169e+01 6.2361e+00 6.9853e+00 7.6813e+00 2.6341e+01
## ACTIVE -1.7650e+01 -2.0778e-01 2.7505e-03 2.0193e-01 9.2111e+00
## CROP_PROD -8.8237e+02 2.1135e+00 2.8401e+00 3.9492e+00 1.1221e+02
## IDHM_Renda -6.2392e+00 2.6604e+00 3.5832e+00 4.7895e+00 4.4701e+01
## IDHM_Educacao -8.8758e+01 1.2332e-01 4.5885e-01 8.5653e-01 4.1336e+00
## Tax_to_GDP -6.3913e+00 6.3357e-02 2.7342e-01 4.2180e-01 1.3922e+04
## GVA_AGROPEC_p -1.7388e+03 2.3063e+00 3.0772e+00 5.8059e+00 5.4343e+02
## GVA_INDUSTRY_p -1.7361e+05 4.1734e+00 1.5710e+01 3.6609e+01 9.0123e+04
## GVA_SERVICES_p -1.0941e+05 -1.6212e+01 4.1636e+00 6.8712e+01 7.9844e+05
## Motorcycles_p -1.4587e+01 -9.4441e-01 -2.5447e-01 2.2839e-01 7.9117e+01
## DENSITY -1.2026e+04 -3.0892e+01 -4.4322e+00 -1.6991e+00 6.5203e+03
## ************************Diagnostic information*************************
## Number of data points: 5556
## Effective number of parameters (2trace(S) - trace(S'S)): 223.2541
## Effective degrees of freedom (n-2trace(S) + trace(S'S)): 5332.746
## AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 4014.245
## AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 3770.039
## Residual sum of squares: 615.926
## R-square value: 0.7624995
## Adjusted R-square value: 0.7525547
##
## ***********************************************************************
## Program stops at: 2020-06-01 01:15:10
The computed AIC approach fixed bandwidth is 391933.4 meters. Therefore, Geographically Weighted Regression perform better than the global model when comparing the adjusted R-square value of 75.25% to 68.3%.
I am not able to build the adaptive bandwidth model, due to some error.
| MLR Global model - Adjusted R-square | ||
|---|---|---|
| 68.30% | ||
| Fixed Bandwidth | ||
| CV approach | AIC approach | |
| Gaussian | 72.51% | 79.73% |
| Exponential | 74.29% | 77.88% |
| Tricube | 72.88% | 77.23% |
| Boxcar | 70.73% | 75.25% |
In conclusion, fixed bandwidth scheme CV approach with exponential kernel achieved 74.29% the best results among other kernels whereas AIC approach with Gaussian kernel achieved better adjusted r-square results of 79.73% than CV approach and among other kernels.
Therefore, I will be using the fixed bandwidth scheme AIC approach with Gaussian kernel which is a much suitable scheme for the explanatory model.
gwr.fixed.AIC_G.output <- as.data.frame(gwr.fixed.AIC_G$SDF)
municipality.sf.fixed <- cbind(municipality.res.sf, as.matrix(gwr.fixed.AIC_G.output)) %>%
st_transform(crs=5641)
plot_municipality.sf.fixed <- cbind(brazil, municipality.sf.fixed)
municipality.sf.fixed
## Simple feature collection with 5556 features and 59 fields
## geometry type: POINT
## dimension: XY
## bbox: xmin: 1671725 ymin: 6039171 xmax: 6175358 ymax: 10507270
## projected CRS: SIRGAS 2000 / Brazil Mercator
## First 10 features:
## CITY STATE LOG_GDP_CAPITA ACTIVE CROP_PROD IDHM_Renda
## 1 ABADIA DE GOIÁS GO 9.936176 0.4398361 0.0005627679 0.687
## 2 ABADIA DOS DOURADOS MG 10.150023 0.3849652 0.0055015672 0.693
## 3 ABADIÂNIA GO 9.656845 0.3742335 0.0101026448 0.671
## 4 ABAETÉ MG 9.811943 0.5081446 0.0022907675 0.720
## 5 ABAETETUBA PA 9.014613 0.3522319 0.2140142326 0.579
## 6 ABAIARA CE 8.759419 0.2291213 0.0015982241 0.540
## 7 ABAÍRA BA 8.851191 0.2517369 0.0012211116 0.577
## 8 ABARÉ BA 8.741424 0.2701239 0.0069501677 0.533
## 9 ABATIÁ PR 9.960510 0.4682489 0.0030361371 0.676
## 10 ABDON BATISTA SC 10.116137 0.1830340 0.0079987542 0.660
## IDHM_Longevidade IDHM_Educacao Tax_to_GDP GVA_AGROPEC_p GVA_INDUSTRY_p
## 1 0.830 0.622 0.384402097 8.739862e-06 1.646729e-03
## 2 0.839 0.563 0.225167016 1.190405e-02 2.548443e-04
## 3 0.841 0.579 0.006687133 8.508967e-06 1.386643e-04
## 4 0.848 0.556 0.006594110 1.300356e-02 1.478121e-04
## 5 0.798 0.537 0.006635263 1.810342e-02 3.152496e-04
## 6 0.748 0.612 0.006571229 5.112596e-03 2.824461e-07
## 7 0.746 0.510 0.006498189 4.782320e-06 5.528207e-05
## 8 0.776 0.460 0.006402248 1.186907e-03 3.573635e-08
## 9 0.804 0.596 0.006402202 6.656371e-02 3.348251e-04
## 10 0.812 0.625 0.006511414 2.187962e-02 1.307337e-04
## GVA_SERVICES_p GVA_PUBLIC_p Cars_p Motorcycles_p Wheeled_tractor_p
## 1 6.207535e-04 3.283947e-03 0.26797467 0.15472495 0.0000000000
## 2 8.701156e-05 4.175596e-04 0.31647009 0.16228506 0.0000000000
## 3 1.617050e-04 7.946324e-04 0.15401313 0.07738644 0.0000000000
## 4 1.156503e-07 6.205967e-04 0.29388309 0.12526512 0.0000000000
## 5 3.554306e-04 3.959969e-03 0.03473219 0.16889570 0.0000000000
## 6 1.545709e-07 2.618142e-03 0.04815815 0.14578072 0.0000000000
## 7 4.084189e-05 6.922032e-04 0.09726444 0.07555363 0.0000000000
## 8 2.813300e-05 5.264113e-07 0.03074377 0.07683434 0.0000000000
## 9 2.299050e-04 1.953311e-03 0.27812700 0.11699808 0.0000000000
## 10 8.255990e-05 9.855805e-04 0.37294612 0.13183034 0.0007642339
## DENSITY MLR_RES Intercept ACTIVE.1 CROP_PROD.1 IDHM_Renda.1
## 1 0.0040285371 -0.021525607 6.004270 -1.4556088 3.2186464 5.664373
## 2 0.0005778995 0.165590675 9.329219 -2.0352341 1.6386020 2.222952
## 3 0.0012905348 -0.174664331 7.006757 -2.0239588 1.7923173 3.880532
## 4 0.0009463723 -0.256029008 6.681357 -0.8411842 3.3884076 4.453483
## 5 0.0069579912 -0.769627532 8.252784 -0.7143963 -0.2604294 3.097369
## 6 0.0046995112 -0.279822050 7.209222 0.3502375 -0.1061308 2.783619
## 7 0.0012513547 -0.401807617 7.518919 0.9752288 8.5112189 1.999835
## 8 0.0009057347 -0.243597570 7.401603 0.9001395 1.0963542 2.125483
## 9 0.0025060281 -0.000728362 8.206344 0.1303839 3.7448812 1.316491
## 10 0.0008031046 0.268061833 8.647440 -0.5833374 4.4054582 1.878267
## IDHM_Educacao.1 Tax_to_GDP.1 GVA_AGROPEC_p.1 GVA_INDUSTRY_p.1
## 1 1.1868414 -0.01840324 3.265623 21.01094
## 2 0.6127015 -0.33016473 7.843243 91.47929
## 3 1.6171108 0.26290475 5.928779 14.94256
## 4 1.1289073 -0.20769164 4.174620 12.70081
## 5 -2.2295975 -0.09460073 15.673502 71.17725
## 6 0.1422196 0.18314450 9.728982 92.50568
## 7 -0.2909252 0.54847231 10.439417 14.94238
## 8 0.1366724 0.26601534 10.457195 86.51482
## 9 1.1020167 0.31570059 1.421343 23.83867
## 10 1.0252986 0.25341843 1.970881 17.05609
## GVA_SERVICES_p.1 Motorcycles_p.1 DENSITY.1 y yhat residual
## 1 52.87972 -0.7881302 -18.424911 9.936176 9.859702 0.076473813
## 2 53.02909 -1.8126429 -49.243822 10.150023 10.164525 -0.014501894
## 3 32.03506 0.0837554 -9.707707 9.656845 9.810590 -0.153744809
## 4 -13.84041 -1.5178639 -3.197355 9.811943 9.957486 -0.145542558
## 5 188.72356 1.7664503 -13.401707 9.014613 9.119228 -0.104614988
## 6 248.04474 0.2840500 -36.220973 8.759419 8.801689 -0.042269765
## 7 237.81273 1.4378743 -49.204201 8.851191 8.841564 0.009626918
## 8 245.66447 0.7866376 -44.125433 8.741424 8.889627 -0.148203219
## 9 36.45285 0.7325809 -24.698302 9.960510 9.962325 -0.001814586
## 10 29.15162 -1.8216733 -20.424039 10.116137 10.249230 -0.133092807
## CV_Score Stud_residual Intercept_SE ACTIVE_SE CROP_PROD_SE IDHM_Renda_SE
## 1 0 0.293902944 0.8532545 0.4216349 0.4924783 1.3424691
## 2 0 -0.072324106 1.0741316 0.6600426 0.5150670 1.6752342
## 3 0 -0.530961662 0.7926921 0.4079757 0.4670149 1.3272212
## 4 0 -0.501361348 0.5166165 0.4049914 1.0049737 0.9192358
## 5 0 -0.449799247 1.0049186 0.6903946 0.5303559 2.2401706
## 6 0 -0.150186515 0.4869485 0.3180481 2.7848161 0.9940732
## 7 0 0.033584851 0.7725896 0.4279241 2.2736486 1.5271234
## 8 0 -0.511303511 0.6093465 0.4813629 0.8416859 1.3435190
## 9 0 -0.006105785 0.5232439 0.3193145 0.7370620 0.9866189
## 10 0 -0.456726383 0.3517349 0.2114982 0.9641323 0.6201882
## IDHM_Educacao_SE Tax_to_GDP_SE GVA_AGROPEC_p_SE GVA_INDUSTRY_p_SE
## 1 0.7783307 0.3995442 2.7158503 5.028017
## 2 0.9200140 0.4960820 2.5749588 10.776400
## 3 0.8484875 0.4091672 3.1085952 5.064700
## 4 0.6967534 0.3189980 1.2245434 2.329391
## 5 0.8365106 0.8043059 3.3834675 15.703312
## 6 0.5228890 0.4731880 3.8195944 37.790003
## 7 0.8346191 0.9223246 6.6949332 5.999319
## 8 0.5426785 0.5630112 2.7926083 20.913609
## 9 0.5099078 0.3060778 0.6983001 3.298068
## 10 0.5233711 0.2426113 0.8848133 1.918717
## GVA_SERVICES_p_SE Motorcycles_p_SE DENSITY_SE Intercept_TV ACTIVE_TV
## 1 14.804972 0.5787158 3.785806 7.036905 -3.4522966
## 2 38.401531 0.8042715 7.706704 8.685360 -3.0834889
## 3 12.584413 0.5829810 3.007633 8.839191 -4.9609791
## 4 5.064343 0.5986356 1.445846 12.932913 -2.0770423
## 5 38.955268 1.6719096 4.006860 8.212390 -1.0347653
## 6 105.627312 0.3904421 16.511910 14.804895 1.1012094
## 7 129.132502 0.8639726 28.225046 9.732099 2.2789762
## 8 100.596726 0.6348841 16.417360 12.146787 1.8699812
## 9 9.425280 0.5833704 4.358072 15.683591 0.4083243
## 10 8.894951 0.4195394 2.967113 24.585104 -2.7581198
## CROP_PROD_TV IDHM_Renda_TV IDHM_Educacao_TV Tax_to_GDP_TV GVA_AGROPEC_p_TV
## 1 6.53561053 4.219369 1.5248548 -0.04606059 1.202431
## 2 3.18133779 1.326950 0.6659697 -0.66554469 3.045968
## 3 3.83781632 2.923802 1.9058746 0.64253618 1.907221
## 4 3.37163797 4.844767 1.6202395 -0.65107498 3.409124
## 5 -0.49104645 1.382649 -2.6653548 -0.11761785 4.632379
## 6 -0.03811052 2.800216 0.2719881 0.38704381 2.547125
## 7 3.74341875 1.309544 -0.3485724 0.59466301 1.559301
## 8 1.30256930 1.582027 0.2518478 0.47248675 3.744598
## 9 5.08082281 1.334346 2.1612077 1.03143892 2.035433
## 10 4.56935037 3.028544 1.9590278 1.04454485 2.227454
## GVA_INDUSTRY_p_TV GVA_SERVICES_p_TV Motorcycles_p_TV DENSITY_TV Local_R2
## 1 4.178773 3.571754 -1.3618605 -4.866840 0.6734432
## 2 8.488854 1.380911 -2.2537699 -6.389739 0.7952409
## 3 2.950334 2.545614 0.1436675 -3.227690 0.6449241
## 4 5.452418 -2.732914 -2.5355392 -2.211408 0.5416894
## 5 4.532627 4.844622 1.0565465 -3.344690 0.9357458
## 6 2.447888 2.348301 0.7275087 -2.193627 0.9699173
## 7 2.490680 1.841618 1.6642591 -1.743281 0.9594300
## 8 4.136772 2.442072 1.2390256 -2.687730 0.9528288
## 9 7.228072 3.867561 1.2557731 -5.667254 0.7270545
## 10 8.889321 3.277322 -4.3420789 -6.883471 0.7999090
## coords.x1 coords.x2 geometry
## 1 4283475 8120684 POINT (4283475 8120684)
## 2 4510843 7920105 POINT (4510843 7920105)
## 3 4363770 8187113 POINT (4363770 8187113)
## 4 4727856 7842028 POINT (4727856 7842028)
## 5 4345348 9809515 POINT (4345348 9809515)
## 6 5439719 9184727 POINT (5439719 9184727)
## 7 5148899 8521972 POINT (5148899 8521972)
## 8 5432038 9032202 POINT (5432038 9032202)
## 9 4186466 7350101 POINT (4186466 7350101)
## 10 4107171 6821956 POINT (4107171 6821956)
Check for any empty geometry after joining
any(is.na(st_dimension(municipality.sf.fixed)))
## [1] FALSE
any(is.na(st_dimension(plot_municipality.sf.fixed)))
## [1] FALSE
summary(gwr.fixed.AIC_G$SDF$yhat)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.422 9.132 9.739 9.694 10.164 12.973
tm_shape(plot_municipality.sf.fixed) +
tm_polygons(col="Local_R2", style="quantile", palette="Reds", border.alpha=0) +
tm_compass(type="4star", position = c("left", "bottom")) +
tm_layout(main.title="Distribution of the Local R2 values of the regression explainatory model", main.title.size=0.65, frame=FALSE)
From these values, they are in a range of between 0.460 to 1. Local R2 shows how well does the local regression model fits the observed y values. If the values are low it would mean that the local model is performing poorly. Therefore, mapping the local R2 values on the brazil municipality spatialpolygons to determine where does GWR predicts better and where it doesn’t could provide insights on which variables affecs the regression model.
From the choropleth map, the municipality on the outer part of brazil have a larger local R2 values compared to the municipality in the inner part of brazil. These would mean that GDP per capita and the selected independent variables in the explainatory model predicts better. However, for those municipality in the inner part of brazil would perhaps need other independent variables to explain what factors affecting the affecting the GDP per capita GDP per capita.
tm_shape(plot_municipality.sf.fixed) +
tm_polygons(col="residual", style="quantile", palette="Reds", border.alpha=0) +
tm_compass(type="4star", position = c("left", "bottom")) +
tm_layout(main.title="Distribution of the local residual of the regression explainatory model", main.title.size=0.65, frame=FALSE)
The residual values are obtained from the fitted y values subtracted from the observed y values. The plotted residuals have a mixed range of -0.859 to 0.026, and 0.026 to 2.120 for some municipality. Therefore, the regression explainatory model need some revision on the selected variables that affects the municipality GDP per capita.
tm_shape(plot_municipality.sf.fixed) +
tm_polygons(col="yhat", style="quantile", palette="Reds", border.alpha=0) +
tm_compass(type="4star", position = c("left", "bottom")) +
tm_layout(main.title="Distribution of the yhat values of the regression explainatory model", main.title.size=0.65, frame=FALSE)
The plotted residuals have a higher yhat values in most of the central municipality than some outer part of brazil. Therefore, the regression explainatory model need some revision on the selected variables that affects the municipality GDP per capita.
dnb <- dnearneigh(coords, 0, 363945.4, longlat=FALSE)
dnb
## Neighbour list object:
## Number of regions: 5556
## Number of nonzero links: 2685926
## Percentage nonzero weights: 8.701008
## Average number of links: 483.428
unit of measurement is metres
dnb_lw <- nb2listw(dnb, style = 'B')
summary(dnb_lw)
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 5556
## Number of nonzero links: 2685926
## Percentage nonzero weights: 8.701008
## Average number of links: 483.428
## Link number distribution:
##
## 1 7 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
## 1 1 2 5 4 7 5 18 6 4 11 1 1 4 4 5 4 4 2 4
## 27 28 29 30 31 32 33 34 35 36 38 39 40 41 43 44 45 46 47 48
## 7 7 3 9 3 1 8 7 8 5 10 2 2 2 4 1 4 7 9 3
## 49 50 51 52 53 54 55 56 57 58 59 60 61 63 64 65 66 67 68 69
## 4 8 10 11 12 2 7 7 3 7 5 5 1 2 3 6 4 3 2 4
## 71 72 73 74 75 76 77 78 79 80 81 83 84 86 87 88 90 92 94 95
## 6 2 7 4 3 1 2 9 2 3 6 1 2 1 2 1 1 1 1 1
## 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
## 2 2 6 6 1 6 2 1 4 1 4 3 3 4 4 2 3 1 4 2
## 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
## 3 1 3 2 3 3 4 4 5 4 1 6 4 5 4 1 5 6 8 7
## 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 156
## 7 5 6 7 8 12 8 4 6 6 8 3 4 4 3 5 7 2 6 2
## 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 176 177
## 5 5 4 4 6 5 4 5 5 1 5 8 5 4 1 5 4 1 6 3
## 178 179 180 181 182 183 184 185 186 187 189 190 191 192 193 194 195 196 197 198
## 7 7 3 1 3 2 2 1 3 2 2 1 5 5 3 4 5 2 2 10
## 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
## 4 3 2 2 6 3 1 2 6 6 6 5 2 5 3 8 7 5 4 5
## 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
## 5 6 7 8 3 8 5 4 3 4 9 6 5 9 2 6 5 3 5 7
## 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258
## 6 7 7 9 6 4 6 8 11 5 6 3 3 8 4 2 14 5 9 8
## 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
## 12 8 8 10 7 3 4 10 5 13 3 8 3 12 4 5 9 3 4 1
## 279 280 281 282 283 284 285 286 287 288 289 290 291 292 294 295 296 297 298 299
## 4 3 7 2 3 6 4 3 3 6 5 4 9 5 2 8 4 8 4 8
## 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
## 6 11 4 4 4 4 4 8 6 9 3 10 3 2 5 5 11 9 14 5
## 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339
## 8 7 6 4 11 7 4 7 4 8 6 3 11 4 2 9 6 8 6 12
## 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
## 9 7 5 8 7 10 7 6 10 11 3 10 8 7 2 5 8 6 11 10
## 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
## 6 6 13 8 13 16 7 13 7 12 10 12 9 11 11 6 9 1 6 5
## 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399
## 7 12 8 5 6 5 4 7 7 8 4 8 8 4 7 4 8 5 6 3
## 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419
## 5 10 5 12 11 11 3 6 4 5 9 3 5 8 4 6 4 7 8 4
## 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
## 3 4 7 4 2 8 2 6 6 1 8 4 4 5 4 3 6 4 2 4
## 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
## 3 5 2 6 7 4 4 5 7 5 4 6 6 5 7 3 7 4 10 5
## 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479
## 8 7 5 6 6 6 3 4 7 5 10 2 8 8 9 2 10 8 6 5
## 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
## 2 1 3 11 7 4 5 3 6 8 5 4 6 4 5 9 6 4 6 5
## 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
## 6 6 8 7 9 6 8 9 3 6 5 8 7 8 7 5 10 5 3 5
## 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539
## 3 12 5 4 4 7 9 5 5 6 10 9 11 6 8 4 7 7 13 3
## 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559
## 4 9 6 4 7 6 12 12 8 10 7 12 6 10 15 13 12 10 5 8
## 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579
## 4 11 13 12 8 13 12 8 9 7 10 13 6 6 7 7 11 9 4 7
## 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599
## 8 9 10 9 13 15 9 7 11 8 9 9 13 13 6 8 5 6 9 14
## 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619
## 9 7 11 7 10 15 11 11 11 9 13 10 13 13 10 12 6 13 14 8
## 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639
## 17 14 6 12 10 16 4 12 8 13 10 12 11 14 10 9 14 7 9 8
## 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659
## 18 9 9 7 8 14 14 15 6 9 16 11 10 7 10 10 9 13 15 6
## 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679
## 8 7 14 9 15 8 7 13 12 8 9 12 15 4 14 4 15 12 19 12
## 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699
## 11 11 11 5 9 10 13 13 6 14 12 7 5 8 12 7 5 10 7 19
## 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719
## 15 9 2 12 7 7 6 11 8 7 10 13 5 9 9 18 7 15 4 11
## 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739
## 13 6 13 8 6 10 8 13 10 9 9 6 7 6 13 11 9 9 5 11
## 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759
## 11 9 6 5 10 14 7 9 6 3 7 8 11 8 12 9 7 8 10 3
## 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779
## 6 11 11 14 6 7 6 12 7 5 9 11 9 5 11 2 10 8 8 8
## 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799
## 16 8 8 11 9 8 1 7 8 11 8 6 8 8 8 4 10 9 5 7
## 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819
## 15 10 8 7 5 5 4 8 8 6 7 6 6 4 14 6 5 6 6 8
## 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839
## 6 5 4 4 3 4 5 1 9 5 4 6 7 5 2 4 3 1 2 4
## 840 841 843 844 846 847 848 853
## 1 1 3 1 1 1 3 1
## 1 least connected region:
## 1769 with 1 link
## 1 most connected region:
## 727 with 853 links
##
## Weights style: B
## Weights constants summary:
## n nn S0 S1 S2
## B 5556 30869136 2685926 5371852 6323372784
fips <- order(plot_municipality.sf.fixed$name_muni)
gi.fixed <- localG(plot_municipality.sf.fixed$Local_R2, dnb_lw)
municipality.gi <- cbind(plot_municipality.sf.fixed, as.matrix(gi.fixed))
names(municipality.gi)[81] <- "gstat"
tm_shape(municipality.gi)+
tm_fill(col="gstat", style="pretty", palette="-RdBu", title="local Gi")+
tm_borders(alpha=0.1)
## Variable(s) "gstat" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
In the cold and hotspot analysis on local R2 values, the municipality on the upper right to the left outer part of brazil shows that the regression explainatory model using GDP per capita and the selected independent variables predicts better in those municipality. Compared to those cold spots in the inner municipality of brazil that need other independent variables to explain what factors affecting the affecting the GDP per capita GDP per capita.