Note: I had all chunk displaying tmaps set to eval=FALSE, as when I tried to knit the file into html, it is so slow that the knitting couldn’t be completed. All the code chunks are working fine when executed individually.
In this take-home exercise, you are tasked to determine factors affecting the unequal development of Brazil at the municipality level by using the data provided. The specific task of the analysis are as follows: * Prepare a choropleth map showing the distribution of GDP per capita, 2016 at municipality level. * Calibrate an explanatory model to explain factors affecting the GDP per capita at the municipality level by using multiple linear regression method. * Prepare a choropleth map showing the distribution of the residual of the GDP per capita. * Calibrate an explanatory model to explain factors affecting the GDP per capita at the municipality level by using geographically weighted regression method. SMU Classification: Restricted * Prepare a series of choropleth maps showing the outputs of the geographically weighted regression model.
packages = c('olsrr', 'corrplot', 'ggpubr', 'sf', 'spdep', 'GWmodel', 'tmap', 'tidyverse', 'geobr', 'heatmaply')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}
library(geobr)
muni <- read_municipality(year=2016)
Note: I had some errors going on when trying to use the above functions of geobr (“SSL certificate problem: certificate has expired/Error in read.table(file = file, header = header, sep = sep, quote = quote, : no lines available in input”), when I checked with the user guide, It seems that it is the problem with MacOS that I’m using, so I installed gdal using homebrew according to the instructions and tried to debug, with no luck however, the error persists. Therefore, I went to the data source(IBGE) provided by geobr’s developer and downloaded the municipality shape file for my use, I manually imported the municipality shape file in the code chunk below.
muni <- st_read(dsn = "data/geospatial", layer = "BRMUE250GC_SIR")
## Reading layer `BRMUE250GC_SIR' from data source `/Users/tianyunzhao/Desktop/IS415/Take-home exercise/Take-home_Ex04/Take-home_Ex04/data/geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 5572 features and 2 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -73.99045 ymin: -33.75118 xmax: -28.83591 ymax: 5.271841
## CRS: 4674
Checking CRS:
st_crs(muni)
## Coordinate Reference System:
## User input: 4674
## wkt:
## GEOGCS["SIRGAS 2000",
## DATUM["Sistema_de_Referencia_Geocentrico_para_las_AmericaS_2000",
## SPHEROID["GRS 1980",6378137,298.257222101,
## AUTHORITY["EPSG","7019"]],
## TOWGS84[0,0,0,0,0,0,0],
## AUTHORITY["EPSG","6674"]],
## PRIMEM["Greenwich",0,
## AUTHORITY["EPSG","8901"]],
## UNIT["degree",0.0174532925199433,
## AUTHORITY["EPSG","9122"]],
## AUTHORITY["EPSG","4674"]]
CRS: 4674
Make sure all geometry is valid:
all(st_is_valid(muni))
## [1] TRUE
Check the extent of muni:
st_bbox(muni)
## xmin ymin xmax ymax
## -73.990450 -33.751178 -28.835908 5.271841
cities <- read_delim("data/aspatial/BRAZIL_CITIES.csv", delim = ";")
## Parsed with column specification:
## cols(
## .default = col_double(),
## CITY = col_character(),
## STATE = col_character(),
## AREA = col_number(),
## REGIAO_TUR = col_character(),
## CATEGORIA_TUR = col_character(),
## RURAL_URBAN = col_character(),
## GVA_MAIN = col_character()
## )
## See spec(...) for full column specifications.
read_csv2() uses ; for the field separator and , for the decimal point. This is common in some European countries.
dict <- read_csv2("data/aspatial/Data_Dictionary.csv")
## Using ',' as decimal and '.' as grouping mark. Use read_delim() for more control.
## Warning: Missing column names filled in: 'X6' [6]
## Parsed with column specification:
## cols(
## FIELD = col_character(),
## DESCRIPTION = col_character(),
## REFERENCE = col_character(),
## UNIT = col_character(),
## SOURCE = col_character(),
## X6 = col_character()
## )
Check the dictionary to get an understanding of the column headers:
View(dict)
Checking for NA:
summary(cities)
## CITY STATE CAPITAL IBGE_RES_POP
## Length:5573 Length:5573 Min. :0.000000 Min. : 805
## Class :character Class :character 1st Qu.:0.000000 1st Qu.: 5235
## Mode :character Mode :character Median :0.000000 Median : 10934
## Mean :0.004845 Mean : 34278
## 3rd Qu.:0.000000 3rd Qu.: 23424
## Max. :1.000000 Max. :11253503
## NA's :8
## IBGE_RES_POP_BRAS IBGE_RES_POP_ESTR IBGE_DU IBGE_DU_URBAN
## Min. : 805 Min. : 0.0 Min. : 239 Min. : 60
## 1st Qu.: 5230 1st Qu.: 0.0 1st Qu.: 1572 1st Qu.: 874
## Median : 10926 Median : 0.0 Median : 3174 Median : 1846
## Mean : 34200 Mean : 77.5 Mean : 10303 Mean : 8859
## 3rd Qu.: 23390 3rd Qu.: 10.0 3rd Qu.: 6726 3rd Qu.: 4624
## Max. :11133776 Max. :119727.0 Max. :3576148 Max. :3548433
## NA's :8 NA's :8 NA's :10 NA's :10
## IBGE_DU_RURAL IBGE_POP IBGE_1 IBGE_1-4
## Min. : 3 Min. : 174 Min. : 0.0 Min. : 5
## 1st Qu.: 487 1st Qu.: 2801 1st Qu.: 38.0 1st Qu.: 158
## Median : 931 Median : 6170 Median : 92.0 Median : 376
## Mean : 1463 Mean : 27595 Mean : 383.3 Mean : 1544
## 3rd Qu.: 1832 3rd Qu.: 15302 3rd Qu.: 232.0 3rd Qu.: 951
## Max. :33809 Max. :10463636 Max. :129464.0 Max. :514794
## NA's :81 NA's :8 NA's :8 NA's :8
## IBGE_5-9 IBGE_10-14 IBGE_15-59 IBGE_60+
## Min. : 7 Min. : 12 Min. : 94 Min. : 29
## 1st Qu.: 220 1st Qu.: 259 1st Qu.: 1734 1st Qu.: 341
## Median : 516 Median : 588 Median : 3841 Median : 722
## Mean : 2069 Mean : 2381 Mean : 18212 Mean : 3004
## 3rd Qu.: 1300 3rd Qu.: 1478 3rd Qu.: 9628 3rd Qu.: 1724
## Max. :684443 Max. :783702 Max. :7058221 Max. :1293012
## NA's :8 NA's :8 NA's :8 NA's :8
## IBGE_PLANTED_AREA IBGE_CROP_PRODUCTION_$ IDHM Ranking 2010 IDHM
## Min. : 0.0 Min. : 0 Min. : 1 Min. :0.4180
## 1st Qu.: 910.2 1st Qu.: 2326 1st Qu.:1392 1st Qu.:0.5990
## Median : 3471.5 Median : 13846 Median :2783 Median :0.6650
## Mean : 14179.9 Mean : 57384 Mean :2783 Mean :0.6592
## 3rd Qu.: 11194.2 3rd Qu.: 55619 3rd Qu.:4174 3rd Qu.:0.7180
## Max. :1205669.0 Max. :3274885 Max. :5565 Max. :0.8620
## NA's :3 NA's :3 NA's :8 NA's :8
## IDHM_Renda IDHM_Longevidade IDHM_Educacao LONG
## Min. :0.4000 Min. :0.6720 Min. :0.2070 Min. :-72.92
## 1st Qu.:0.5720 1st Qu.:0.7690 1st Qu.:0.4900 1st Qu.:-50.87
## Median :0.6540 Median :0.8080 Median :0.5600 Median :-46.52
## Mean :0.6429 Mean :0.8016 Mean :0.5591 Mean :-46.23
## 3rd Qu.:0.7070 3rd Qu.:0.8360 3rd Qu.:0.6310 3rd Qu.:-41.40
## Max. :0.8910 Max. :0.8940 Max. :0.8250 Max. :-32.44
## NA's :8 NA's :8 NA's :8 NA's :9
## LAT ALT PAY_TV FIXED_PHONES
## Min. :-33.688 Min. : 0.0 Min. : 1 Min. : 3
## 1st Qu.:-22.838 1st Qu.: 169.8 1st Qu.: 88 1st Qu.: 119
## Median :-18.089 Median : 406.5 Median : 247 Median : 327
## Mean :-16.444 Mean : 893.8 Mean : 3094 Mean : 6567
## 3rd Qu.: -8.489 3rd Qu.: 628.9 3rd Qu.: 815 3rd Qu.: 1151
## Max. : 4.585 Max. :874579.0 Max. :2047668 Max. :5543127
## NA's :9 NA's :9 NA's :3 NA's :3
## AREA REGIAO_TUR CATEGORIA_TUR ESTIMATED_POP
## Min. : 3.57 Length:5573 Length:5573 Min. : 786
## 1st Qu.: 204.44 Class :character Class :character 1st Qu.: 5454
## Median : 416.59 Mode :character Mode :character Median : 11590
## Mean : 1517.44 Mean : 37432
## 3rd Qu.: 1026.57 3rd Qu.: 25296
## Max. :159533.33 Max. :12176866
## NA's :3 NA's :3
## RURAL_URBAN GVA_AGROPEC GVA_INDUSTRY GVA_SERVICES
## Length:5573 Min. : 0 Min. : 1 Min. : 2
## Class :character 1st Qu.: 4189 1st Qu.: 1726 1st Qu.: 10112
## Mode :character Median : 20426 Median : 7424 Median : 31211
## Mean : 47271 Mean : 175928 Mean : 489451
## 3rd Qu.: 51227 3rd Qu.: 41022 3rd Qu.: 115406
## Max. :1402282 Max. :63306755 Max. :464656988
## NA's :3 NA's :3 NA's :3
## GVA_PUBLIC GVA_TOTAL TAXES GDP
## Min. : 7 Min. : 17 Min. : -14159 Min. : 15
## 1st Qu.: 17267 1st Qu.: 42253 1st Qu.: 1305 1st Qu.: 43709
## Median : 35866 Median : 119492 Median : 5100 Median : 125153
## Mean : 123768 Mean : 832987 Mean : 118864 Mean : 954584
## 3rd Qu.: 89245 3rd Qu.: 313963 3rd Qu.: 22197 3rd Qu.: 329539
## Max. :41902893 Max. :569910503 Max. :117125387 Max. :687035890
## NA's :3 NA's :3 NA's :3 NA's :3
## POP_GDP GDP_CAPITA GVA_MAIN MUN_EXPENDIT
## Min. : 815 Min. : 3191 Length:5573 Min. :1.421e+06
## 1st Qu.: 5483 1st Qu.: 9058 Class :character 1st Qu.:1.573e+07
## Median : 11578 Median : 15870 Mode :character Median :2.746e+07
## Mean : 36998 Mean : 21126 Mean :1.043e+08
## 3rd Qu.: 25085 3rd Qu.: 26155 3rd Qu.:5.666e+07
## Max. :12038175 Max. :314638 Max. :4.577e+10
## NA's :3 NA's :3 NA's :1492
## COMP_TOT COMP_A COMP_B COMP_C
## Min. : 6.0 Min. : 0.00 Min. : 0.000 Min. : 0.00
## 1st Qu.: 68.0 1st Qu.: 1.00 1st Qu.: 0.000 1st Qu.: 3.00
## Median : 162.0 Median : 2.00 Median : 0.000 Median : 11.00
## Mean : 906.8 Mean : 18.25 Mean : 1.852 Mean : 73.44
## 3rd Qu.: 448.0 3rd Qu.: 8.00 3rd Qu.: 2.000 3rd Qu.: 39.00
## Max. :530446.0 Max. :1948.00 Max. :274.000 Max. :31566.00
## NA's :3 NA's :3 NA's :3 NA's :3
## COMP_D COMP_E COMP_F COMP_G
## Min. : 0.0000 Min. : 0.000 Min. : 0.00 Min. : 1.0
## 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 1.00 1st Qu.: 32.0
## Median : 0.0000 Median : 0.000 Median : 4.00 Median : 74.5
## Mean : 0.4262 Mean : 2.029 Mean : 43.26 Mean : 348.0
## 3rd Qu.: 0.0000 3rd Qu.: 1.000 3rd Qu.: 15.00 3rd Qu.: 199.0
## Max. :332.0000 Max. :657.000 Max. :25222.00 Max. :150633.0
## NA's :3 NA's :3 NA's :3 NA's :3
## COMP_H COMP_I COMP_J COMP_K
## Min. : 0 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 1 1st Qu.: 2.00 1st Qu.: 0.00 1st Qu.: 0.00
## Median : 7 Median : 7.00 Median : 1.00 Median : 0.00
## Mean : 41 Mean : 55.88 Mean : 24.74 Mean : 15.55
## 3rd Qu.: 25 3rd Qu.: 24.00 3rd Qu.: 5.00 3rd Qu.: 2.00
## Max. :19515 Max. :29290.00 Max. :38720.00 Max. :23738.00
## NA's :3 NA's :3 NA's :3 NA's :3
## COMP_L COMP_M COMP_N COMP_O
## Min. : 0.00 Min. : 0.00 Min. : 0.0 Min. : 0.000
## 1st Qu.: 0.00 1st Qu.: 1.00 1st Qu.: 1.0 1st Qu.: 2.000
## Median : 0.00 Median : 4.00 Median : 4.0 Median : 2.000
## Mean : 15.14 Mean : 51.29 Mean : 83.7 Mean : 3.269
## 3rd Qu.: 3.00 3rd Qu.: 13.00 3rd Qu.: 14.0 3rd Qu.: 3.000
## Max. :14003.00 Max. :49181.00 Max. :76757.0 Max. :204.000
## NA's :3 NA's :3 NA's :3 NA's :3
## COMP_P COMP_Q COMP_R COMP_S
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 2.00 1st Qu.: 1.00 1st Qu.: 0.00 1st Qu.: 5.00
## Median : 6.00 Median : 3.00 Median : 2.00 Median : 12.00
## Mean : 30.96 Mean : 34.15 Mean : 12.18 Mean : 51.61
## 3rd Qu.: 17.00 3rd Qu.: 12.00 3rd Qu.: 6.00 3rd Qu.: 31.00
## Max. :16030.00 Max. :22248.00 Max. :6687.00 Max. :24832.00
## NA's :3 NA's :3 NA's :3 NA's :3
## COMP_T COMP_U HOTELS BEDS
## Min. :0 Min. : 0.00000 Min. : 1.000 Min. : 2.0
## 1st Qu.:0 1st Qu.: 0.00000 1st Qu.: 1.000 1st Qu.: 40.0
## Median :0 Median : 0.00000 Median : 1.000 Median : 82.0
## Mean :0 Mean : 0.05027 Mean : 3.131 Mean : 257.5
## 3rd Qu.:0 3rd Qu.: 0.00000 3rd Qu.: 3.000 3rd Qu.: 200.0
## Max. :0 Max. :123.00000 Max. :97.000 Max. :13247.0
## NA's :3 NA's :3 NA's :4686 NA's :4686
## Pr_Agencies Pu_Agencies Pr_Bank Pu_Bank
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. :0.00
## 1st Qu.: 0.000 1st Qu.: 1.000 1st Qu.: 0.000 1st Qu.:1.00
## Median : 1.000 Median : 2.000 Median : 1.000 Median :2.00
## Mean : 3.383 Mean : 2.829 Mean : 1.312 Mean :1.58
## 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.:2.00
## Max. :1693.000 Max. :626.000 Max. :83.000 Max. :8.00
## NA's :2231 NA's :2231 NA's :2231 NA's :2231
## Pr_Assets Pu_Assets Cars Motorcycles
## Min. :0.000e+00 Min. :0.000e+00 Min. : 2 Min. : 4
## 1st Qu.:0.000e+00 1st Qu.:4.047e+07 1st Qu.: 602 1st Qu.: 591
## Median :3.231e+07 Median :1.339e+08 Median : 1438 Median : 1285
## Mean :9.180e+09 Mean :6.005e+09 Mean : 9859 Mean : 4879
## 3rd Qu.:1.148e+08 3rd Qu.:4.970e+08 3rd Qu.: 4086 3rd Qu.: 3294
## Max. :1.947e+13 Max. :8.016e+12 Max. :5740995 Max. :1134570
## NA's :2231 NA's :2231 NA's :11 NA's :11
## Wheeled_tractor UBER MAC WAL-MART
## Min. : 0.000 Min. :1 Min. : 1.000 Min. : 1.000
## 1st Qu.: 0.000 1st Qu.:1 1st Qu.: 1.000 1st Qu.: 1.000
## Median : 0.000 Median :1 Median : 2.000 Median : 1.000
## Mean : 5.754 Mean :1 Mean : 4.277 Mean : 2.059
## 3rd Qu.: 1.000 3rd Qu.:1 3rd Qu.: 3.000 3rd Qu.: 1.750
## Max. :3236.000 Max. :1 Max. :130.000 Max. :26.000
## NA's :11 NA's :5448 NA's :5407 NA's :5471
## POST_OFFICES
## Min. : 1.000
## 1st Qu.: 1.000
## Median : 1.000
## Mean : 2.081
## 3rd Qu.: 2.000
## Max. :225.000
## NA's :120
We can see that there are 3 NA values for GDP_CAPITA, which is crucial for our analysis. Therefore, we proceed to drop them.
cities <- cities %>%
filter(!is.na(GDP_CAPITA))
summary(cities)
## CITY STATE CAPITAL IBGE_RES_POP
## Length:5570 Length:5570 Min. :0.000000 Min. : 805
## Class :character Class :character 1st Qu.:0.000000 1st Qu.: 5235
## Mode :character Mode :character Median :0.000000 Median : 10934
## Mean :0.004847 Mean : 34278
## 3rd Qu.:0.000000 3rd Qu.: 23424
## Max. :1.000000 Max. :11253503
## NA's :5
## IBGE_RES_POP_BRAS IBGE_RES_POP_ESTR IBGE_DU IBGE_DU_URBAN
## Min. : 805 Min. : 0.0 Min. : 239 Min. : 60
## 1st Qu.: 5230 1st Qu.: 0.0 1st Qu.: 1572 1st Qu.: 874
## Median : 10926 Median : 0.0 Median : 3174 Median : 1846
## Mean : 34200 Mean : 77.5 Mean : 10303 Mean : 8859
## 3rd Qu.: 23390 3rd Qu.: 10.0 3rd Qu.: 6726 3rd Qu.: 4624
## Max. :11133776 Max. :119727.0 Max. :3576148 Max. :3548433
## NA's :5 NA's :5 NA's :7 NA's :7
## IBGE_DU_RURAL IBGE_POP IBGE_1 IBGE_1-4
## Min. : 3 Min. : 174 Min. : 0.0 Min. : 5
## 1st Qu.: 487 1st Qu.: 2801 1st Qu.: 38.0 1st Qu.: 158
## Median : 931 Median : 6170 Median : 92.0 Median : 376
## Mean : 1463 Mean : 27595 Mean : 383.3 Mean : 1544
## 3rd Qu.: 1832 3rd Qu.: 15302 3rd Qu.: 232.0 3rd Qu.: 951
## Max. :33809 Max. :10463636 Max. :129464.0 Max. :514794
## NA's :78 NA's :5 NA's :5 NA's :5
## IBGE_5-9 IBGE_10-14 IBGE_15-59 IBGE_60+
## Min. : 7 Min. : 12 Min. : 94 Min. : 29
## 1st Qu.: 220 1st Qu.: 259 1st Qu.: 1734 1st Qu.: 341
## Median : 516 Median : 588 Median : 3841 Median : 722
## Mean : 2069 Mean : 2381 Mean : 18212 Mean : 3004
## 3rd Qu.: 1300 3rd Qu.: 1478 3rd Qu.: 9628 3rd Qu.: 1724
## Max. :684443 Max. :783702 Max. :7058221 Max. :1293012
## NA's :5 NA's :5 NA's :5 NA's :5
## IBGE_PLANTED_AREA IBGE_CROP_PRODUCTION_$ IDHM Ranking 2010 IDHM
## Min. : 0.0 Min. : 0 Min. : 1 Min. :0.4180
## 1st Qu.: 910.2 1st Qu.: 2326 1st Qu.:1392 1st Qu.:0.5990
## Median : 3471.5 Median : 13846 Median :2782 Median :0.6650
## Mean : 14179.9 Mean : 57384 Mean :2783 Mean :0.6592
## 3rd Qu.: 11194.2 3rd Qu.: 55619 3rd Qu.:4173 3rd Qu.:0.7180
## Max. :1205669.0 Max. :3274885 Max. :5565 Max. :0.8620
## NA's :6 NA's :6
## IDHM_Renda IDHM_Longevidade IDHM_Educacao LONG
## Min. :0.4000 Min. :0.6720 Min. :0.2070 Min. :-72.92
## 1st Qu.:0.5720 1st Qu.:0.7690 1st Qu.:0.4900 1st Qu.:-50.87
## Median :0.6540 Median :0.8080 Median :0.5600 Median :-46.52
## Mean :0.6429 Mean :0.8016 Mean :0.5591 Mean :-46.23
## 3rd Qu.:0.7070 3rd Qu.:0.8360 3rd Qu.:0.6310 3rd Qu.:-41.41
## Max. :0.8910 Max. :0.8940 Max. :0.8250 Max. :-32.44
## NA's :6 NA's :6 NA's :6 NA's :7
## LAT ALT PAY_TV FIXED_PHONES
## Min. :-33.688 Min. : 0.0 Min. : 1 Min. : 3
## 1st Qu.:-22.838 1st Qu.: 169.7 1st Qu.: 88 1st Qu.: 119
## Median :-18.090 Median : 406.5 Median : 247 Median : 327
## Mean :-16.445 Mean : 894.0 Mean : 3095 Mean : 6568
## 3rd Qu.: -8.489 3rd Qu.: 629.0 3rd Qu.: 815 3rd Qu.: 1151
## Max. : 4.585 Max. :874579.0 Max. :2047668 Max. :5543127
## NA's :7 NA's :7 NA's :1 NA's :1
## AREA REGIAO_TUR CATEGORIA_TUR ESTIMATED_POP
## Min. : 3.57 Length:5570 Length:5570 Min. : 786
## 1st Qu.: 204.43 Class :character Class :character 1st Qu.: 5454
## Median : 415.92 Mode :character Mode :character Median : 11591
## Mean : 1515.89 Mean : 37437
## 3rd Qu.: 1026.38 3rd Qu.: 25297
## Max. :159533.33 Max. :12176866
## NA's :1 NA's :1
## RURAL_URBAN GVA_AGROPEC GVA_INDUSTRY GVA_SERVICES
## Length:5570 Min. : 0 Min. : 1 Min. : 2
## Class :character 1st Qu.: 4189 1st Qu.: 1726 1st Qu.: 10112
## Mode :character Median : 20426 Median : 7424 Median : 31211
## Mean : 47271 Mean : 175928 Mean : 489451
## 3rd Qu.: 51227 3rd Qu.: 41022 3rd Qu.: 115406
## Max. :1402282 Max. :63306755 Max. :464656988
##
## GVA_PUBLIC GVA_TOTAL TAXES GDP
## Min. : 7 Min. : 17 Min. : -14159 Min. : 15
## 1st Qu.: 17267 1st Qu.: 42253 1st Qu.: 1305 1st Qu.: 43709
## Median : 35866 Median : 119492 Median : 5100 Median : 125153
## Mean : 123768 Mean : 832987 Mean : 118864 Mean : 954584
## 3rd Qu.: 89245 3rd Qu.: 313963 3rd Qu.: 22197 3rd Qu.: 329539
## Max. :41902893 Max. :569910503 Max. :117125387 Max. :687035890
##
## POP_GDP GDP_CAPITA GVA_MAIN MUN_EXPENDIT
## Min. : 815 Min. : 3191 Length:5570 Min. :1.421e+06
## 1st Qu.: 5483 1st Qu.: 9058 Class :character 1st Qu.:1.573e+07
## Median : 11578 Median : 15870 Mode :character Median :2.746e+07
## Mean : 36998 Mean : 21126 Mean :1.043e+08
## 3rd Qu.: 25085 3rd Qu.: 26155 3rd Qu.:5.672e+07
## Max. :12038175 Max. :314638 Max. :4.577e+10
## NA's :1491
## COMP_TOT COMP_A COMP_B COMP_C
## Min. : 6.0 Min. : 0.00 Min. : 0.000 Min. : 0.00
## 1st Qu.: 68.0 1st Qu.: 1.00 1st Qu.: 0.000 1st Qu.: 3.00
## Median : 162.0 Median : 2.00 Median : 0.000 Median : 11.00
## Mean : 906.8 Mean : 18.25 Mean : 1.852 Mean : 73.44
## 3rd Qu.: 448.0 3rd Qu.: 8.00 3rd Qu.: 2.000 3rd Qu.: 39.00
## Max. :530446.0 Max. :1948.00 Max. :274.000 Max. :31566.00
##
## COMP_D COMP_E COMP_F COMP_G
## Min. : 0.0000 Min. : 0.000 Min. : 0.00 Min. : 1.0
## 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 1.00 1st Qu.: 32.0
## Median : 0.0000 Median : 0.000 Median : 4.00 Median : 74.5
## Mean : 0.4262 Mean : 2.029 Mean : 43.26 Mean : 348.0
## 3rd Qu.: 0.0000 3rd Qu.: 1.000 3rd Qu.: 15.00 3rd Qu.: 199.0
## Max. :332.0000 Max. :657.000 Max. :25222.00 Max. :150633.0
##
## COMP_H COMP_I COMP_J COMP_K
## Min. : 0 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 1 1st Qu.: 2.00 1st Qu.: 0.00 1st Qu.: 0.00
## Median : 7 Median : 7.00 Median : 1.00 Median : 0.00
## Mean : 41 Mean : 55.88 Mean : 24.74 Mean : 15.55
## 3rd Qu.: 25 3rd Qu.: 24.00 3rd Qu.: 5.00 3rd Qu.: 2.00
## Max. :19515 Max. :29290.00 Max. :38720.00 Max. :23738.00
##
## COMP_L COMP_M COMP_N COMP_O
## Min. : 0.00 Min. : 0.00 Min. : 0.0 Min. : 0.000
## 1st Qu.: 0.00 1st Qu.: 1.00 1st Qu.: 1.0 1st Qu.: 2.000
## Median : 0.00 Median : 4.00 Median : 4.0 Median : 2.000
## Mean : 15.14 Mean : 51.29 Mean : 83.7 Mean : 3.269
## 3rd Qu.: 3.00 3rd Qu.: 13.00 3rd Qu.: 14.0 3rd Qu.: 3.000
## Max. :14003.00 Max. :49181.00 Max. :76757.0 Max. :204.000
##
## COMP_P COMP_Q COMP_R COMP_S
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 2.00 1st Qu.: 1.00 1st Qu.: 0.00 1st Qu.: 5.00
## Median : 6.00 Median : 3.00 Median : 2.00 Median : 12.00
## Mean : 30.96 Mean : 34.15 Mean : 12.18 Mean : 51.61
## 3rd Qu.: 17.00 3rd Qu.: 12.00 3rd Qu.: 6.00 3rd Qu.: 31.00
## Max. :16030.00 Max. :22248.00 Max. :6687.00 Max. :24832.00
##
## COMP_T COMP_U HOTELS BEDS
## Min. :0 Min. : 0.00000 Min. : 1.000 Min. : 2.0
## 1st Qu.:0 1st Qu.: 0.00000 1st Qu.: 1.000 1st Qu.: 40.0
## Median :0 Median : 0.00000 Median : 1.000 Median : 82.0
## Mean :0 Mean : 0.05027 Mean : 3.131 Mean : 257.5
## 3rd Qu.:0 3rd Qu.: 0.00000 3rd Qu.: 3.000 3rd Qu.: 200.0
## Max. :0 Max. :123.00000 Max. :97.000 Max. :13247.0
## NA's :4683 NA's :4683
## Pr_Agencies Pu_Agencies Pr_Bank Pu_Bank
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. :0.00
## 1st Qu.: 0.000 1st Qu.: 1.000 1st Qu.: 0.000 1st Qu.:1.00
## Median : 1.000 Median : 2.000 Median : 1.000 Median :2.00
## Mean : 3.383 Mean : 2.829 Mean : 1.312 Mean :1.58
## 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.:2.00
## Max. :1693.000 Max. :626.000 Max. :83.000 Max. :8.00
## NA's :2228 NA's :2228 NA's :2228 NA's :2228
## Pr_Assets Pu_Assets Cars Motorcycles
## Min. :0.000e+00 Min. :0.000e+00 Min. : 2 Min. : 4
## 1st Qu.:0.000e+00 1st Qu.:4.047e+07 1st Qu.: 602 1st Qu.: 591
## Median :3.231e+07 Median :1.339e+08 Median : 1440 Median : 1285
## Mean :9.180e+09 Mean :6.005e+09 Mean : 9861 Mean : 4879
## 3rd Qu.:1.148e+08 3rd Qu.:4.970e+08 3rd Qu.: 4086 3rd Qu.: 3295
## Max. :1.947e+13 Max. :8.016e+12 Max. :5740995 Max. :1134570
## NA's :2228 NA's :2228 NA's :9 NA's :9
## Wheeled_tractor UBER MAC WAL-MART
## Min. : 0.000 Min. :1 Min. : 1.000 Min. : 1.000
## 1st Qu.: 0.000 1st Qu.:1 1st Qu.: 1.000 1st Qu.: 1.000
## Median : 0.000 Median :1 Median : 2.000 Median : 1.000
## Mean : 5.755 Mean :1 Mean : 4.277 Mean : 2.059
## 3rd Qu.: 1.000 3rd Qu.:1 3rd Qu.: 3.000 3rd Qu.: 1.750
## Max. :3236.000 Max. :1 Max. :130.000 Max. :26.000
## NA's :9 NA's :5445 NA's :5404 NA's :5468
## POST_OFFICES
## Min. : 1.000
## 1st Qu.: 1.000
## Median : 1.000
## Mean : 2.081
## 3rd Qu.: 2.000
## Max. :225.000
## NA's :117
Then, we can see that there are 7 rows without LONGLAT data. We proceed to see the city names of these occurances.
cities[!complete.cases(cities$LONG),]
## # A tibble: 7 x 81
## CITY STATE CAPITAL IBGE_RES_POP IBGE_RES_POP_BR… IBGE_RES_POP_ES… IBGE_DU
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Baln… SC 0 NA NA NA NA
## 2 Moju… PA 0 NA NA NA NA
## 3 Para… MS 0 NA NA NA NA
## 4 Pesc… SC 0 NA NA NA NA
## 5 Pinh… RS 0 2130 2130 0 745
## 6 Pint… RS 0 NA NA NA NA
## 7 Sant… BA 0 9648 9648 0 2891
## # … with 74 more variables: IBGE_DU_URBAN <dbl>, IBGE_DU_RURAL <dbl>,
## # IBGE_POP <dbl>, IBGE_1 <dbl>, `IBGE_1-4` <dbl>, `IBGE_5-9` <dbl>,
## # `IBGE_10-14` <dbl>, `IBGE_15-59` <dbl>, `IBGE_60+` <dbl>,
## # IBGE_PLANTED_AREA <dbl>, `IBGE_CROP_PRODUCTION_$` <dbl>, `IDHM Ranking
## # 2010` <dbl>, IDHM <dbl>, IDHM_Renda <dbl>, IDHM_Longevidade <dbl>,
## # IDHM_Educacao <dbl>, LONG <dbl>, LAT <dbl>, ALT <dbl>, PAY_TV <dbl>,
## # FIXED_PHONES <dbl>, AREA <dbl>, REGIAO_TUR <chr>, CATEGORIA_TUR <chr>,
## # ESTIMATED_POP <dbl>, RURAL_URBAN <chr>, GVA_AGROPEC <dbl>,
## # GVA_INDUSTRY <dbl>, GVA_SERVICES <dbl>, GVA_PUBLIC <dbl>, ` GVA_TOTAL
## # ` <dbl>, TAXES <dbl>, GDP <dbl>, POP_GDP <dbl>, GDP_CAPITA <dbl>,
## # GVA_MAIN <chr>, MUN_EXPENDIT <dbl>, COMP_TOT <dbl>, COMP_A <dbl>,
## # COMP_B <dbl>, COMP_C <dbl>, COMP_D <dbl>, COMP_E <dbl>, COMP_F <dbl>,
## # COMP_G <dbl>, COMP_H <dbl>, COMP_I <dbl>, COMP_J <dbl>, COMP_K <dbl>,
## # COMP_L <dbl>, COMP_M <dbl>, COMP_N <dbl>, COMP_O <dbl>, COMP_P <dbl>,
## # COMP_Q <dbl>, COMP_R <dbl>, COMP_S <dbl>, COMP_T <dbl>, COMP_U <dbl>,
## # HOTELS <dbl>, BEDS <dbl>, Pr_Agencies <dbl>, Pu_Agencies <dbl>,
## # Pr_Bank <dbl>, Pu_Bank <dbl>, Pr_Assets <dbl>, Pu_Assets <dbl>, Cars <dbl>,
## # Motorcycles <dbl>, Wheeled_tractor <dbl>, UBER <dbl>, MAC <dbl>,
## # `WAL-MART` <dbl>, POST_OFFICES <dbl>
Then, using the city names, I am able to obtain the LONGLAT data from https://www.latlong.net/, and then manually fill in the coordinates:
cities$LONG[cities$CITY == "Balneário Rincão"] <- -49.221458
cities$LAT[cities$CITY == "Balneário Rincão"] <- -28.819550
cities$LONG[cities$CITY == "Mojuí Dos Campos"] <- -54.643299
cities$LAT[cities$CITY == "Mojuí Dos Campos"] <- -2.682650
cities$LONG[cities$CITY == "Paraíso Das Águas"] <- -53.012230
cities$LAT[cities$CITY == "Paraíso Das Águas"] <- -19.023310
cities$LONG[cities$CITY == "Pescaria Brava"] <- -48.886219
cities$LAT[cities$CITY == "Pescaria Brava"] <- -28.396391
cities$LONG[cities$CITY == "Pinhal Da Serra"] <- -51.170410
cities$LAT[cities$CITY == "Pinhal Da Serra"] <- -27.872141
cities$LONG[cities$CITY == "Pinto Bandeira"] <- -51.450291
cities$LAT[cities$CITY == "Pinto Bandeira"] <- -29.099859
cities$LONG[cities$CITY == "Santa Terezinha"] <- -50.510399
cities$LAT[cities$CITY == "Santa Terezinha"] <- -10.475950
Checking again:
summary(cities)
## CITY STATE CAPITAL IBGE_RES_POP
## Length:5570 Length:5570 Min. :0.000000 Min. : 805
## Class :character Class :character 1st Qu.:0.000000 1st Qu.: 5235
## Mode :character Mode :character Median :0.000000 Median : 10934
## Mean :0.004847 Mean : 34278
## 3rd Qu.:0.000000 3rd Qu.: 23424
## Max. :1.000000 Max. :11253503
## NA's :5
## IBGE_RES_POP_BRAS IBGE_RES_POP_ESTR IBGE_DU IBGE_DU_URBAN
## Min. : 805 Min. : 0.0 Min. : 239 Min. : 60
## 1st Qu.: 5230 1st Qu.: 0.0 1st Qu.: 1572 1st Qu.: 874
## Median : 10926 Median : 0.0 Median : 3174 Median : 1846
## Mean : 34200 Mean : 77.5 Mean : 10303 Mean : 8859
## 3rd Qu.: 23390 3rd Qu.: 10.0 3rd Qu.: 6726 3rd Qu.: 4624
## Max. :11133776 Max. :119727.0 Max. :3576148 Max. :3548433
## NA's :5 NA's :5 NA's :7 NA's :7
## IBGE_DU_RURAL IBGE_POP IBGE_1 IBGE_1-4
## Min. : 3 Min. : 174 Min. : 0.0 Min. : 5
## 1st Qu.: 487 1st Qu.: 2801 1st Qu.: 38.0 1st Qu.: 158
## Median : 931 Median : 6170 Median : 92.0 Median : 376
## Mean : 1463 Mean : 27595 Mean : 383.3 Mean : 1544
## 3rd Qu.: 1832 3rd Qu.: 15302 3rd Qu.: 232.0 3rd Qu.: 951
## Max. :33809 Max. :10463636 Max. :129464.0 Max. :514794
## NA's :78 NA's :5 NA's :5 NA's :5
## IBGE_5-9 IBGE_10-14 IBGE_15-59 IBGE_60+
## Min. : 7 Min. : 12 Min. : 94 Min. : 29
## 1st Qu.: 220 1st Qu.: 259 1st Qu.: 1734 1st Qu.: 341
## Median : 516 Median : 588 Median : 3841 Median : 722
## Mean : 2069 Mean : 2381 Mean : 18212 Mean : 3004
## 3rd Qu.: 1300 3rd Qu.: 1478 3rd Qu.: 9628 3rd Qu.: 1724
## Max. :684443 Max. :783702 Max. :7058221 Max. :1293012
## NA's :5 NA's :5 NA's :5 NA's :5
## IBGE_PLANTED_AREA IBGE_CROP_PRODUCTION_$ IDHM Ranking 2010 IDHM
## Min. : 0.0 Min. : 0 Min. : 1 Min. :0.4180
## 1st Qu.: 910.2 1st Qu.: 2326 1st Qu.:1392 1st Qu.:0.5990
## Median : 3471.5 Median : 13846 Median :2782 Median :0.6650
## Mean : 14179.9 Mean : 57384 Mean :2783 Mean :0.6592
## 3rd Qu.: 11194.2 3rd Qu.: 55619 3rd Qu.:4173 3rd Qu.:0.7180
## Max. :1205669.0 Max. :3274885 Max. :5565 Max. :0.8620
## NA's :6 NA's :6
## IDHM_Renda IDHM_Longevidade IDHM_Educacao LONG
## Min. :0.4000 Min. :0.6720 Min. :0.2070 Min. :-72.92
## 1st Qu.:0.5720 1st Qu.:0.7690 1st Qu.:0.4900 1st Qu.:-50.87
## Median :0.6540 Median :0.8080 Median :0.5600 Median :-46.53
## Mean :0.6429 Mean :0.8016 Mean :0.5591 Mean :-46.24
## 3rd Qu.:0.7070 3rd Qu.:0.8360 3rd Qu.:0.6310 3rd Qu.:-41.42
## Max. :0.8910 Max. :0.8940 Max. :0.8250 Max. :-32.44
## NA's :6 NA's :6 NA's :6
## LAT ALT PAY_TV FIXED_PHONES
## Min. :-33.688 Min. : 0.0 Min. : 1 Min. : 3
## 1st Qu.:-22.840 1st Qu.: 169.7 1st Qu.: 88 1st Qu.: 119
## Median :-18.091 Median : 406.5 Median : 247 Median : 327
## Mean :-16.448 Mean : 894.0 Mean : 3095 Mean : 6568
## 3rd Qu.: -8.497 3rd Qu.: 629.0 3rd Qu.: 815 3rd Qu.: 1151
## Max. : 4.585 Max. :874579.0 Max. :2047668 Max. :5543127
## NA's :7 NA's :1 NA's :1
## AREA REGIAO_TUR CATEGORIA_TUR ESTIMATED_POP
## Min. : 3.57 Length:5570 Length:5570 Min. : 786
## 1st Qu.: 204.43 Class :character Class :character 1st Qu.: 5454
## Median : 415.92 Mode :character Mode :character Median : 11591
## Mean : 1515.89 Mean : 37437
## 3rd Qu.: 1026.38 3rd Qu.: 25297
## Max. :159533.33 Max. :12176866
## NA's :1 NA's :1
## RURAL_URBAN GVA_AGROPEC GVA_INDUSTRY GVA_SERVICES
## Length:5570 Min. : 0 Min. : 1 Min. : 2
## Class :character 1st Qu.: 4189 1st Qu.: 1726 1st Qu.: 10112
## Mode :character Median : 20426 Median : 7424 Median : 31211
## Mean : 47271 Mean : 175928 Mean : 489451
## 3rd Qu.: 51227 3rd Qu.: 41022 3rd Qu.: 115406
## Max. :1402282 Max. :63306755 Max. :464656988
##
## GVA_PUBLIC GVA_TOTAL TAXES GDP
## Min. : 7 Min. : 17 Min. : -14159 Min. : 15
## 1st Qu.: 17267 1st Qu.: 42253 1st Qu.: 1305 1st Qu.: 43709
## Median : 35866 Median : 119492 Median : 5100 Median : 125153
## Mean : 123768 Mean : 832987 Mean : 118864 Mean : 954584
## 3rd Qu.: 89245 3rd Qu.: 313963 3rd Qu.: 22197 3rd Qu.: 329539
## Max. :41902893 Max. :569910503 Max. :117125387 Max. :687035890
##
## POP_GDP GDP_CAPITA GVA_MAIN MUN_EXPENDIT
## Min. : 815 Min. : 3191 Length:5570 Min. :1.421e+06
## 1st Qu.: 5483 1st Qu.: 9058 Class :character 1st Qu.:1.573e+07
## Median : 11578 Median : 15870 Mode :character Median :2.746e+07
## Mean : 36998 Mean : 21126 Mean :1.043e+08
## 3rd Qu.: 25085 3rd Qu.: 26155 3rd Qu.:5.672e+07
## Max. :12038175 Max. :314638 Max. :4.577e+10
## NA's :1491
## COMP_TOT COMP_A COMP_B COMP_C
## Min. : 6.0 Min. : 0.00 Min. : 0.000 Min. : 0.00
## 1st Qu.: 68.0 1st Qu.: 1.00 1st Qu.: 0.000 1st Qu.: 3.00
## Median : 162.0 Median : 2.00 Median : 0.000 Median : 11.00
## Mean : 906.8 Mean : 18.25 Mean : 1.852 Mean : 73.44
## 3rd Qu.: 448.0 3rd Qu.: 8.00 3rd Qu.: 2.000 3rd Qu.: 39.00
## Max. :530446.0 Max. :1948.00 Max. :274.000 Max. :31566.00
##
## COMP_D COMP_E COMP_F COMP_G
## Min. : 0.0000 Min. : 0.000 Min. : 0.00 Min. : 1.0
## 1st Qu.: 0.0000 1st Qu.: 0.000 1st Qu.: 1.00 1st Qu.: 32.0
## Median : 0.0000 Median : 0.000 Median : 4.00 Median : 74.5
## Mean : 0.4262 Mean : 2.029 Mean : 43.26 Mean : 348.0
## 3rd Qu.: 0.0000 3rd Qu.: 1.000 3rd Qu.: 15.00 3rd Qu.: 199.0
## Max. :332.0000 Max. :657.000 Max. :25222.00 Max. :150633.0
##
## COMP_H COMP_I COMP_J COMP_K
## Min. : 0 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 1 1st Qu.: 2.00 1st Qu.: 0.00 1st Qu.: 0.00
## Median : 7 Median : 7.00 Median : 1.00 Median : 0.00
## Mean : 41 Mean : 55.88 Mean : 24.74 Mean : 15.55
## 3rd Qu.: 25 3rd Qu.: 24.00 3rd Qu.: 5.00 3rd Qu.: 2.00
## Max. :19515 Max. :29290.00 Max. :38720.00 Max. :23738.00
##
## COMP_L COMP_M COMP_N COMP_O
## Min. : 0.00 Min. : 0.00 Min. : 0.0 Min. : 0.000
## 1st Qu.: 0.00 1st Qu.: 1.00 1st Qu.: 1.0 1st Qu.: 2.000
## Median : 0.00 Median : 4.00 Median : 4.0 Median : 2.000
## Mean : 15.14 Mean : 51.29 Mean : 83.7 Mean : 3.269
## 3rd Qu.: 3.00 3rd Qu.: 13.00 3rd Qu.: 14.0 3rd Qu.: 3.000
## Max. :14003.00 Max. :49181.00 Max. :76757.0 Max. :204.000
##
## COMP_P COMP_Q COMP_R COMP_S
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 2.00 1st Qu.: 1.00 1st Qu.: 0.00 1st Qu.: 5.00
## Median : 6.00 Median : 3.00 Median : 2.00 Median : 12.00
## Mean : 30.96 Mean : 34.15 Mean : 12.18 Mean : 51.61
## 3rd Qu.: 17.00 3rd Qu.: 12.00 3rd Qu.: 6.00 3rd Qu.: 31.00
## Max. :16030.00 Max. :22248.00 Max. :6687.00 Max. :24832.00
##
## COMP_T COMP_U HOTELS BEDS
## Min. :0 Min. : 0.00000 Min. : 1.000 Min. : 2.0
## 1st Qu.:0 1st Qu.: 0.00000 1st Qu.: 1.000 1st Qu.: 40.0
## Median :0 Median : 0.00000 Median : 1.000 Median : 82.0
## Mean :0 Mean : 0.05027 Mean : 3.131 Mean : 257.5
## 3rd Qu.:0 3rd Qu.: 0.00000 3rd Qu.: 3.000 3rd Qu.: 200.0
## Max. :0 Max. :123.00000 Max. :97.000 Max. :13247.0
## NA's :4683 NA's :4683
## Pr_Agencies Pu_Agencies Pr_Bank Pu_Bank
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. :0.00
## 1st Qu.: 0.000 1st Qu.: 1.000 1st Qu.: 0.000 1st Qu.:1.00
## Median : 1.000 Median : 2.000 Median : 1.000 Median :2.00
## Mean : 3.383 Mean : 2.829 Mean : 1.312 Mean :1.58
## 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.:2.00
## Max. :1693.000 Max. :626.000 Max. :83.000 Max. :8.00
## NA's :2228 NA's :2228 NA's :2228 NA's :2228
## Pr_Assets Pu_Assets Cars Motorcycles
## Min. :0.000e+00 Min. :0.000e+00 Min. : 2 Min. : 4
## 1st Qu.:0.000e+00 1st Qu.:4.047e+07 1st Qu.: 602 1st Qu.: 591
## Median :3.231e+07 Median :1.339e+08 Median : 1440 Median : 1285
## Mean :9.180e+09 Mean :6.005e+09 Mean : 9861 Mean : 4879
## 3rd Qu.:1.148e+08 3rd Qu.:4.970e+08 3rd Qu.: 4086 3rd Qu.: 3295
## Max. :1.947e+13 Max. :8.016e+12 Max. :5740995 Max. :1134570
## NA's :2228 NA's :2228 NA's :9 NA's :9
## Wheeled_tractor UBER MAC WAL-MART
## Min. : 0.000 Min. :1 Min. : 1.000 Min. : 1.000
## 1st Qu.: 0.000 1st Qu.:1 1st Qu.: 1.000 1st Qu.: 1.000
## Median : 0.000 Median :1 Median : 2.000 Median : 1.000
## Mean : 5.755 Mean :1 Mean : 4.277 Mean : 2.059
## 3rd Qu.: 1.000 3rd Qu.:1 3rd Qu.: 3.000 3rd Qu.: 1.750
## Max. :3236.000 Max. :1 Max. :130.000 Max. :26.000
## NA's :9 NA's :5445 NA's :5404 NA's :5468
## POST_OFFICES
## Min. : 1.000
## 1st Qu.: 1.000
## Median : 1.000
## Mean : 2.081
## 3rd Qu.: 2.000
## Max. :225.000
## NA's :117
Now, we can see that all LONGLAT and GDPPC are valid.
cities.sf <- st_as_sf(cities,
coords = c("LONG", "LAT"),
crs=4326) %>% st_transform(crs=4674)
st_crs(cities.sf)
## Coordinate Reference System:
## User input: EPSG:4674
## wkt:
## GEOGCS["SIRGAS 2000",
## DATUM["Sistema_de_Referencia_Geocentrico_para_las_AmericaS_2000",
## SPHEROID["GRS 1980",6378137,298.257222101,
## AUTHORITY["EPSG","7019"]],
## TOWGS84[0,0,0,0,0,0,0],
## AUTHORITY["EPSG","6674"]],
## PRIMEM["Greenwich",0,
## AUTHORITY["EPSG","8901"]],
## UNIT["degree",0.0174532925199433,
## AUTHORITY["EPSG","9122"]],
## AUTHORITY["EPSG","4674"]]
muni_cities.sf <- st_join(muni, cities.sf, join=st_contains) %>%
st_make_valid()
## although coordinates are longitude/latitude, st_contains assumes that they are planar
qtm(muni_cities.sf, "GDP_CAPITA", borders=NULL, scale = 0.4)
ggplot(data=muni_cities.sf, aes(x=`GDP_CAPITA`)) +
geom_histogram(bins=20, color="black", fill="light blue")
## Warning: Removed 5 rows containing non-finite values (stat_bin).
The figure above reveals a right skewed distribution. This means that more cities have a lower GDPPC. Statistically, the skewed dsitribution can be normalised by using log transformation. The code chunk below is used to derive a new variable called LOG_GDPPC by using a log transformation on the variable GDP_CAPITA.
cities.sf <- cities.sf %>%
mutate(`LOG_GDPPC` = log(GDP_CAPITA))
Plotting LOG_GDPPC:
ggplot(data=cities.sf, aes(x=`LOG_GDPPC`)) +
geom_histogram(bins=20, color="black", fill="light blue")
Notice that the distribution is relatively less skewed after the transformation.
I have identified the following 10 variables that I feel might be relevant to the distributing of GDPPC of Brazil, which I will be using in my regression analysis: * IBGE_15-59: Resident Population Regular Urban Planning - from 15 to 59 y.o (economy active population) * IDHM: HDI Human Development Index * IDHM_Renda: HDI GNI Index * IDHM_Educacao: HDI Education index * IDHM_Longevidade: HDI Life Expectancy index * GVA_AGROPEC: Gross Added Value - Agropecuary * GVA_INDUSTRY: Gross Added Value - Industry * GVA_SERVICES: Gross Added Value - Services * GVA_PUBLIC: Gross Added Value - Public Services And one derived variable: * URBANIZATION_INDEX: URBANIZATION_INDEX = IBGE_DU_URBAN(Domestic Units Urban) / IBGE_DU(Domestic Units Total)
muni_cities.sf <- muni_cities.sf %>%
mutate(`URBANIZATION_INDEX` = `IBGE_DU_URBAN`/`IBGE_DU`)
muni_cities_analysis.sf <- select(muni_cities.sf, c(`NM_MUNICIP`, `CITY`, `STATE`, `IBGE_15-59`, `IDHM`, `IDHM_Renda`, `IDHM_Educacao`, `IDHM_Longevidade`, `GVA_AGROPEC`, `GVA_INDUSTRY`, `GVA_SERVICES`, `GVA_PUBLIC`, `URBANIZATION_INDEX`, `GDP_CAPITA`))
Pop <- ggplot(data=muni_cities_analysis.sf, aes(x=`IBGE_15-59`)) +
geom_histogram(bins=25, color="black", fill="light blue")
HDI <- ggplot(data=muni_cities_analysis.sf, aes(x=IDHM)) +
geom_histogram(bins=25, color="black", fill="light blue")
GNI <- ggplot(data=muni_cities_analysis.sf, aes(x=IDHM_Renda)) +
geom_histogram(bins=25, color="black", fill="light blue")
Edu <- ggplot(data=muni_cities_analysis.sf, aes(x=IDHM_Educacao)) +
geom_histogram(bins=25, color="black", fill="light blue")
Life <- ggplot(data=muni_cities_analysis.sf, aes(x=IDHM_Longevidade)) +
geom_histogram(bins=25, color="black", fill="light blue")
Agr <- ggplot(data=muni_cities_analysis.sf, aes(x=GVA_AGROPEC)) +
geom_histogram(bins=25, color="black", fill="light blue")
Ind <- ggplot(data=muni_cities_analysis.sf, aes(x=GVA_INDUSTRY)) +
geom_histogram(bins=25, color="black", fill="light blue")
Ser <- ggplot(data=muni_cities_analysis.sf, aes(x=GVA_SERVICES)) +
geom_histogram(bins=25, color="black", fill="light blue")
Pub <- ggplot(data=muni_cities_analysis.sf, aes(x=GVA_PUBLIC)) +
geom_histogram(bins=25, color="black", fill="light blue")
Urb <- ggplot(data=muni_cities_analysis.sf, aes(x=URBANIZATION_INDEX)) +
geom_histogram(bins=25, color="black", fill="light blue")
Display plots:
ggarrange(Pop, HDI, GNI, Edu, Life, Agr, Ind, Ser, Pub, Urb, ncol = 3, nrow = 4)
## Warning: Removed 10 rows containing non-finite values (stat_bin).
## Warning: Removed 11 rows containing non-finite values (stat_bin).
## Warning: Removed 11 rows containing non-finite values (stat_bin).
## Warning: Removed 11 rows containing non-finite values (stat_bin).
## Warning: Removed 11 rows containing non-finite values (stat_bin).
## Warning: Removed 5 rows containing non-finite values (stat_bin).
## Warning: Removed 5 rows containing non-finite values (stat_bin).
## Warning: Removed 5 rows containing non-finite values (stat_bin).
## Warning: Removed 5 rows containing non-finite values (stat_bin).
## Warning: Removed 12 rows containing non-finite values (stat_bin).
## Data standarization:
summary(muni_cities_analysis.sf)
## NM_MUNICIP CITY STATE IBGE_15-59
## SANTA TEREZINHA: 7 Length:5575 Length:5575 Min. : 94
## BOM JESUS : 5 Class :character Class :character 1st Qu.: 1734
## SÃO DOMINGOS : 5 Mode :character Mode :character Median : 3841
## BONITO : 4 Mean : 18212
## PLANALTO : 4 3rd Qu.: 9628
## SANTA HELENA : 4 Max. :7058221
## (Other) :5546 NA's :10
## IDHM IDHM_Renda IDHM_Educacao IDHM_Longevidade
## Min. :0.4180 Min. :0.4000 Min. :0.2070 Min. :0.6720
## 1st Qu.:0.5990 1st Qu.:0.5720 1st Qu.:0.4900 1st Qu.:0.7690
## Median :0.6650 Median :0.6540 Median :0.5600 Median :0.8080
## Mean :0.6592 Mean :0.6429 Mean :0.5591 Mean :0.8016
## 3rd Qu.:0.7180 3rd Qu.:0.7070 3rd Qu.:0.6310 3rd Qu.:0.8360
## Max. :0.8620 Max. :0.8910 Max. :0.8250 Max. :0.8940
## NA's :11 NA's :11 NA's :11 NA's :11
## GVA_AGROPEC GVA_INDUSTRY GVA_SERVICES GVA_PUBLIC
## Min. : 0 Min. : 1 Min. : 2 Min. : 7
## 1st Qu.: 4189 1st Qu.: 1726 1st Qu.: 10112 1st Qu.: 17267
## Median : 20426 Median : 7424 Median : 31211 Median : 35866
## Mean : 47271 Mean : 175928 Mean : 489451 Mean : 123768
## 3rd Qu.: 51227 3rd Qu.: 41022 3rd Qu.: 115406 3rd Qu.: 89245
## Max. :1402282 Max. :63306755 Max. :464656988 Max. :41902893
## NA's :5 NA's :5 NA's :5 NA's :5
## URBANIZATION_INDEX GDP_CAPITA geometry
## Min. :0.04553 Min. : 3191 MULTIPOLYGON :5575
## 1st Qu.:0.49157 1st Qu.: 9058 epsg:4674 : 0
## Median :0.66277 Median : 15870 +proj=long...: 0
## Mean :0.65212 Mean : 21126
## 3rd Qu.:0.83043 3rd Qu.: 26155
## Max. :1.00000 Max. :314638
## NA's :12 NA's :5
As we can see, the range for some variables is very large, We will be standardising the following variables which did not fall into the range of 0 to 1: * IBGE_15-59 * GVA_AGROPEC * GVA_INDUSTRY * GVA_SERVICES * GVA_PUBLIC * GDP_CAPITA
muni_cities_analysis.sf$'IBGE_15-59' <- normalize(muni_cities_analysis.sf$'IBGE_15-59')
muni_cities_analysis.sf$GVA_AGROPEC <- normalize(muni_cities_analysis.sf$GVA_AGROPEC)
muni_cities_analysis.sf$GVA_INDUSTRY <- normalize(muni_cities_analysis.sf$GVA_INDUSTRY)
muni_cities_analysis.sf$GVA_SERVICES <- normalize(muni_cities_analysis.sf$GVA_SERVICES)
muni_cities_analysis.sf$GVA_PUBLIC <- normalize(muni_cities_analysis.sf$GVA_PUBLIC)
muni_cities_analysis.sf$GDP_CAPITA <- normalize(muni_cities_analysis.sf$GDP_CAPITA)
Check again to see if they are normallized:
summary(muni_cities_analysis.sf)
## NM_MUNICIP CITY STATE
## SANTA TEREZINHA: 7 Length:5575 Length:5575
## BOM JESUS : 5 Class :character Class :character
## SÃO DOMINGOS : 5 Mode :character Mode :character
## BONITO : 4
## PLANALTO : 4
## SANTA HELENA : 4
## (Other) :5546
## IBGE_15-59 IDHM IDHM_Renda IDHM_Educacao
## Min. :0.000000 Min. :0.4180 Min. :0.4000 Min. :0.2070
## 1st Qu.:0.000232 1st Qu.:0.5990 1st Qu.:0.5720 1st Qu.:0.4900
## Median :0.000531 Median :0.6650 Median :0.6540 Median :0.5600
## Mean :0.002567 Mean :0.6592 Mean :0.6429 Mean :0.5591
## 3rd Qu.:0.001351 3rd Qu.:0.7180 3rd Qu.:0.7070 3rd Qu.:0.6310
## Max. :1.000000 Max. :0.8620 Max. :0.8910 Max. :0.8250
## NA's :10 NA's :11 NA's :11 NA's :11
## IDHM_Longevidade GVA_AGROPEC GVA_INDUSTRY GVA_SERVICES
## Min. :0.6720 Min. :0.000000 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.7690 1st Qu.:0.002987 1st Qu.:0.000027 1st Qu.:0.000022
## Median :0.8080 Median :0.014566 Median :0.000117 Median :0.000067
## Mean :0.8016 Mean :0.033710 Mean :0.002779 Mean :0.001053
## 3rd Qu.:0.8360 3rd Qu.:0.036531 3rd Qu.:0.000648 3rd Qu.:0.000248
## Max. :0.8940 Max. :1.000000 Max. :1.000000 Max. :1.000000
## NA's :11 NA's :5 NA's :5 NA's :5
## GVA_PUBLIC URBANIZATION_INDEX GDP_CAPITA geometry
## Min. :0.000000 Min. :0.04553 Min. :0.00000 MULTIPOLYGON :5575
## 1st Qu.:0.000412 1st Qu.:0.49157 1st Qu.:0.01884 epsg:4674 : 0
## Median :0.000856 Median :0.66277 Median :0.04071 +proj=long...: 0
## Mean :0.002954 Mean :0.65212 Mean :0.05759
## 3rd Qu.:0.002130 3rd Qu.:0.83043 3rd Qu.:0.07373
## Max. :1.000000 Max. :1.00000 Max. :1.00000
## NA's :5 NA's :12 NA's :5
Notice that there are some NA values, we will proceed to remove them from analysis, as they would affect our analysis
muni_cities_analysis.sf <- muni_cities_analysis.sf %>%
filter(!is.na(`IBGE_15-59`)) %>% filter(!is.na(`IDHM`)) %>% filter(!is.na(`IDHM_Renda`)) %>% filter(!is.na(`IDHM_Educacao`)) %>% filter(!is.na(`IDHM_Longevidade`)) %>% filter(!is.na(`GVA_AGROPEC`)) %>% filter(!is.na(`GVA_INDUSTRY`)) %>% filter(!is.na(`GVA_SERVICES`)) %>% filter(!is.na(`GVA_PUBLIC`)) %>% filter(!is.na(`URBANIZATION_INDEX`)) %>% filter(!is.na(`GDP_CAPITA`))
summary(muni_cities_analysis.sf)
## NM_MUNICIP CITY STATE IBGE_15-59
## BOM JESUS : 5 Length:5562 Length:5562 Min. :0.0000000
## SÃO DOMINGOS: 5 Class :character Class :character 1st Qu.:0.0002325
## BONITO : 4 Mode :character Mode :character Median :0.0005312
## PLANALTO : 4 Mean :0.0025683
## SANTA HELENA: 4 3rd Qu.:0.0013513
## SANTA INÊS : 4 Max. :1.0000000
## (Other) :5536
## IDHM IDHM_Renda IDHM_Educacao IDHM_Longevidade
## Min. :0.4180 Min. :0.4000 Min. :0.2070 Min. :0.6720
## 1st Qu.:0.5990 1st Qu.:0.5720 1st Qu.:0.4900 1st Qu.:0.7690
## Median :0.6650 Median :0.6540 Median :0.5600 Median :0.8080
## Mean :0.6593 Mean :0.6429 Mean :0.5592 Mean :0.8016
## 3rd Qu.:0.7180 3rd Qu.:0.7070 3rd Qu.:0.6310 3rd Qu.:0.8360
## Max. :0.8620 Max. :0.8910 Max. :0.8250 Max. :0.8940
##
## GVA_AGROPEC GVA_INDUSTRY GVA_SERVICES GVA_PUBLIC
## Min. :0.000000 Min. :0.0000000 Min. :0.0000000 Min. :0.0000000
## 1st Qu.:0.002987 1st Qu.:0.0000273 1st Qu.:0.0000218 1st Qu.:0.0004119
## Median :0.014570 Median :0.0001174 Median :0.0000672 Median :0.0008551
## Mean :0.033717 Mean :0.0027824 Mean :0.0010548 Mean :0.0029566
## 3rd Qu.:0.036541 3rd Qu.:0.0006480 3rd Qu.:0.0002488 3rd Qu.:0.0021322
## Max. :1.000000 Max. :1.0000000 Max. :1.0000000 Max. :1.0000000
##
## URBANIZATION_INDEX GDP_CAPITA geometry
## Min. :0.04553 Min. :0.00000 MULTIPOLYGON :5562
## 1st Qu.:0.49166 1st Qu.:0.01885 epsg:4674 : 0
## Median :0.66284 Median :0.04073 +proj=long...: 0
## Mean :0.65219 Mean :0.05759
## 3rd Qu.:0.83046 3rd Qu.:0.07374
## Max. :1.00000 Max. :1.00000
##
We can see that all NA values are removed.
Lastly, we want to reveal the geospatial distribution of GDP per capita. The map will be prepared by using tmap package.
tm_shape(muni_cities_analysis.sf) +
tm_polygons(col="grey", border.col = "white", border.alpha = 0.5) +
tm_dots(col = "GDP_CAPITA",
alpha = 0.6,
style="quantile",
n=5)
corrplot(cor(as.data.frame(muni_cities_analysis.sf)[, 4:14]), diag = FALSE, order = "AOE",
tl.pos = "td", tl.cex = 0.5, method = "number", type = "upper")
From the matrix, we can see that some variables are highly correlated. In view of this, I have decided to remove the following variables from my analysis: * GVA_SERVICES * IDHM_Renda * IDHM * IBGE_15-59
muni_cities_analysis.sf <- muni_cities_analysis.sf %>% select(-GVA_SERVICES, -IDHM_Renda, -'IBGE_15-59', -IDHM)
Plot again to check:
corrplot(cor(as.data.frame(muni_cities_analysis.sf)[, 4:10]), diag = FALSE, order = "AOE",
tl.pos = "td", tl.cex = 0.5, method = "number", type = "upper")
## Building multiple linear regression model The code below uses lm() to build a linear regression model:
muni_cities.mlr <- lm(formula = GDP_CAPITA ~ IDHM_Longevidade + IDHM_Educacao + GVA_AGROPEC + GVA_INDUSTRY + GVA_PUBLIC + URBANIZATION_INDEX , data=muni_cities_analysis.sf)
summary(muni_cities.mlr)
##
## Call:
## lm(formula = GDP_CAPITA ~ IDHM_Longevidade + IDHM_Educacao +
## GVA_AGROPEC + GVA_INDUSTRY + GVA_PUBLIC + URBANIZATION_INDEX,
## data = muni_cities_analysis.sf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.51619 -0.02253 -0.00861 0.00784 0.85222
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.297620 0.014894 -19.98 <2e-16 ***
## IDHM_Longevidade 0.316803 0.023170 13.67 <2e-16 ***
## IDHM_Educacao 0.178091 0.012365 14.40 <2e-16 ***
## GVA_AGROPEC 0.217097 0.011724 18.52 <2e-16 ***
## GVA_INDUSTRY 1.011883 0.055672 18.18 <2e-16 ***
## GVA_PUBLIC -0.816760 0.066970 -12.20 <2e-16 ***
## URBANIZATION_INDEX -0.009272 0.004177 -2.22 0.0265 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05416 on 5555 degrees of freedom
## Multiple R-squared: 0.3117, Adjusted R-squared: 0.3109
## F-statistic: 419.2 on 6 and 5555 DF, p-value: < 2.2e-16
With reference to the report above, it is clear that not all the independent variables are statistically significant.(With p < 0.05)
ols_vif_tol(muni_cities.mlr)
## Variables Tolerance VIF
## 1 IDHM_Longevidade 0.4925621 2.030201
## 2 IDHM_Educacao 0.3968377 2.519922
## 3 GVA_AGROPEC 0.9596783 1.042016
## 4 GVA_INDUSTRY 0.4268338 2.342832
## 5 GVA_PUBLIC 0.4292711 2.329530
## 6 URBANIZATION_INDEX 0.6586024 1.518367
Since the VIF of the independent variables are less than 10. We can safely conclude that there are no sign of multicollinearity among the independent variables.
ols_plot_resid_fit(muni_cities.mlr)
The figure above reveals that most of the data poitns are scattered around the 0 line, hence we can safely conclude that the relationships between the dependent variable and independent variables are linear.
ols_plot_resid_hist(muni_cities.mlr)
The figure reveals that the residual of the multiple linear regression model (i.e. muni_cities.mlr) resemble normal distribution.
First, export the residual of the hedonic model as a data frame
mlr.output <- as.data.frame(muni_cities.mlr$residuals)
Next, we will join the newly created data frame with muni_cities.sf object.
muni_cities.res.sf <- cbind(muni_cities_analysis.sf, muni_cities.mlr$residuals) %>%
rename(`MLR_RES` = `muni_cities.mlr.residuals`)
Next, we will convert muni_cities.res.sf simple feature object into a SpatialPointsDataFrame
muni_cities_res.sp <- as_Spatial(muni_cities.res.sf)
muni_cities_res.sp
## class : SpatialPolygonsDataFrame
## features : 5562
## extent : -73.99045, -28.83591, -33.75118, 4.884623 (xmin, xmax, ymin, ymax)
## Warning in proj4string(x): CRS object has comment, which is lost in output
## crs : +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
## variables : 11
## names : NM_MUNICIP, CITY, STATE, IDHM_Educacao, IDHM_Longevidade, GVA_AGROPEC, GVA_INDUSTRY, GVA_PUBLIC, URBANIZATION_INDEX, GDP_CAPITA, MLR_RES
## min values : ABADIA DE GOIÁS, Abadia De Goiás, AC, 0.207, 0.672, 0, 0, 0, 0.0455261775520925, 0, -0.516188191386672
## max values : ZORTÉA, Zortéa, TO, 0.825, 0.894, 1, 1, 1, 1, 1, 0.852215604469209
Next, we will use tmap package to display the distribution of the residuals.
tm_shape(muni_cities_analysis.sf) +
tm_polygons(border.col = NULL) +
tm_shape(muni_cities_res.sp) +
tm_fill(col="MLR_RES",
alpha = 0.6,
style="quantile")+
tm_layout(legend.outside = TRUE)
The figure above reveal that there is sign of spatial autocorrelation. To proof that our observation is indeed true, the Moran’s I test will be performed ### compute the distance-based weight matrix Get a summary statistics to determine range:
coords <- coordinates(muni_cities_res.sp)
k <- knn2nb(knearneigh(coords))
## Warning in knearneigh(coords): knearneigh: identical points found
k_dist <- unlist(nbdists(k, coords, longlat = TRUE))
summary(k_dist)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 11.74 15.88 21.35 23.96 369.63
As summary statisitcs shown, We will use a range of 0 to 370.
nb <- dnearneigh(coords, 0, 370, longlat = TRUE)
summary(nb)
## Neighbour list object:
## Number of regions: 5562
## Number of nonzero links: 3030086
## Percentage nonzero weights: 9.794742
## Average number of links: 544.7835
## Link number distribution:
##
## 1 4 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## 1 1 1 4 1 5 3 8 10 5 8 5 8 8 6 3 2 7 6 8
## 26 27 28 29 30 31 32 33 34 36 37 38 39 40 41 42 43 44 45 46
## 5 5 7 6 3 4 4 1 2 1 1 2 3 4 4 3 3 2 5 3
## 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
## 8 2 3 8 8 2 8 8 9 8 4 13 7 4 2 1 2 2 3 2
## 67 70 71 72 73 74 75 76 77 78 80 82 83 84 85 86 87 88 89 90
## 5 3 3 3 6 2 4 6 5 1 5 3 4 2 4 1 3 1 3 6
## 91 92 94 95 97 98 100 101 102 103 104 105 106 107 108 109 110 111 112 113
## 1 1 2 1 3 3 2 6 2 1 1 1 2 6 1 1 2 4 2 3
## 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 133 134
## 6 4 4 1 2 3 1 3 1 2 1 4 1 2 2 2 1 4 5 2
## 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
## 2 2 7 3 3 2 4 2 4 4 9 10 5 3 4 11 9 7 4 7
## 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
## 6 7 5 4 3 1 6 12 6 4 10 3 8 3 5 6 5 3 6 5
## 175 176 177 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
## 2 3 2 1 5 1 2 3 1 1 4 1 1 2 6 7 5 3 3 1
## 196 198 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
## 1 4 2 3 4 1 2 1 2 1 6 4 2 3 4 5 7 4 5 1
## 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
## 2 7 4 2 8 2 3 5 4 4 6 5 4 1 4 4 1 5 7 4
## 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
## 4 6 5 4 3 3 9 4 9 6 7 4 5 3 7 3 6 7 7 7
## 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
## 1 5 6 2 3 9 8 5 3 5 8 6 4 4 10 9 4 10 5 9
## 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297
## 4 6 12 5 7 4 7 5 5 4 5 8 7 4 5 5 3 7 3 3
## 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
## 3 6 2 7 6 7 3 3 8 5 5 6 2 4 8 5 4 3 8 7
## 318 319 320 321 322 323 324 325 326 327 329 330 331 332 333 334 335 336 337 338
## 3 8 4 10 8 5 5 6 5 5 5 11 4 5 9 3 5 7 5 5
## 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
## 7 4 8 4 3 3 8 5 2 7 2 10 8 4 4 1 3 3 6 6
## 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
## 4 11 6 3 6 3 9 9 4 7 8 2 6 6 6 6 5 7 10 5
## 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
## 9 9 9 8 6 6 12 16 14 8 4 11 5 8 6 3 7 4 12 3
## 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418
## 10 5 4 8 6 10 3 4 7 1 3 4 5 4 6 6 8 1 3 5
## 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
## 5 4 8 6 8 8 1 2 3 8 3 5 4 3 2 4 3 7 2 5
## 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
## 5 3 4 7 4 2 2 4 3 3 3 3 2 3 9 3 5 3 2 5
## 460 461 462 463 464 465 466 467 468 469 470 472 473 474 475 476 478 479 480 481
## 7 5 4 3 1 6 4 5 3 1 5 3 1 3 3 5 6 7 1 4
## 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
## 2 2 4 4 4 4 11 4 5 3 1 7 4 3 2 5 7 2 5 3
## 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521
## 4 1 3 4 4 3 6 5 1 5 9 4 8 2 9 2 7 4 3 2
## 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541
## 5 7 3 5 4 2 6 2 4 4 7 5 4 11 2 7 4 1 6 7
## 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 561 562
## 3 2 7 4 3 8 5 4 7 7 5 7 6 6 7 11 7 7 7 13
## 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582
## 3 8 9 5 10 3 7 8 8 7 8 6 5 9 9 11 6 9 8 9
## 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602
## 6 6 8 6 4 6 15 4 5 6 10 8 5 4 9 4 12 5 8 10
## 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622
## 8 8 16 4 8 5 6 6 6 5 8 3 10 3 2 2 5 11 15 3
## 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642
## 5 14 7 11 10 5 10 9 9 5 7 10 5 10 9 9 10 9 11 11
## 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662
## 6 7 8 8 13 2 6 9 9 13 8 8 11 11 7 9 14 10 9 8
## 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682
## 3 15 11 7 10 7 7 9 7 8 6 6 8 13 11 11 10 6 19 9
## 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702
## 8 15 8 6 5 10 12 9 13 13 4 10 11 8 5 17 9 5 11 10
## 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722
## 14 12 11 6 11 7 9 7 10 11 10 6 10 13 13 13 8 15 3 8
## 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742
## 8 11 11 7 9 9 8 6 11 12 15 8 13 8 11 12 10 4 3 9
## 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
## 7 11 11 8 7 14 9 15 7 11 8 10 14 9 9 5 9 5 8 5
## 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782
## 10 14 5 5 15 7 16 4 9 9 5 12 12 7 12 6 6 6 7 9
## 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802
## 7 12 6 7 7 9 9 6 6 11 7 8 10 5 8 6 2 6 13 7
## 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822
## 6 10 11 6 7 7 7 10 12 9 6 10 5 4 9 11 8 6 3 13
## 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842
## 12 8 10 13 10 8 10 8 10 9 2 7 5 7 7 7 5 7 6 9
## 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862
## 7 8 10 9 5 4 9 11 7 9 10 9 13 7 9 8 8 3 8 10
## 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882
## 6 12 4 7 6 13 6 10 13 5 4 6 7 6 10 10 11 12 4 4
## 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902
## 3 7 9 9 11 6 5 5 3 4 7 9 7 4 3 10 3 5 7 8
## 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922
## 7 8 5 4 15 5 7 4 5 8 7 8 3 9 5 7 7 5 3 4
## 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942
## 2 10 6 5 8 4 7 6 1 9 5 4 5 6 3 3 7 2 1 2
## 943 944 945 946 947 948 951 952 954 955 956 959 963 969 970 972
## 1 2 1 1 1 1 3 1 1 1 1 1 1 1 1 1
## 1 least connected region:
## 1523 with 1 link
## 1 most connected region:
## 4154 with 972 links
Convert the output neighbours lists (i.e. nb) into a spatial weights.
nb_lw <- nb2listw(nb, style = 'W')
summary(nb_lw)
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 5562
## Number of nonzero links: 3030086
## Percentage nonzero weights: 9.794742
## Average number of links: 544.7835
## Link number distribution:
##
## 1 4 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## 1 1 1 4 1 5 3 8 10 5 8 5 8 8 6 3 2 7 6 8
## 26 27 28 29 30 31 32 33 34 36 37 38 39 40 41 42 43 44 45 46
## 5 5 7 6 3 4 4 1 2 1 1 2 3 4 4 3 3 2 5 3
## 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
## 8 2 3 8 8 2 8 8 9 8 4 13 7 4 2 1 2 2 3 2
## 67 70 71 72 73 74 75 76 77 78 80 82 83 84 85 86 87 88 89 90
## 5 3 3 3 6 2 4 6 5 1 5 3 4 2 4 1 3 1 3 6
## 91 92 94 95 97 98 100 101 102 103 104 105 106 107 108 109 110 111 112 113
## 1 1 2 1 3 3 2 6 2 1 1 1 2 6 1 1 2 4 2 3
## 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 133 134
## 6 4 4 1 2 3 1 3 1 2 1 4 1 2 2 2 1 4 5 2
## 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
## 2 2 7 3 3 2 4 2 4 4 9 10 5 3 4 11 9 7 4 7
## 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
## 6 7 5 4 3 1 6 12 6 4 10 3 8 3 5 6 5 3 6 5
## 175 176 177 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
## 2 3 2 1 5 1 2 3 1 1 4 1 1 2 6 7 5 3 3 1
## 196 198 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
## 1 4 2 3 4 1 2 1 2 1 6 4 2 3 4 5 7 4 5 1
## 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
## 2 7 4 2 8 2 3 5 4 4 6 5 4 1 4 4 1 5 7 4
## 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
## 4 6 5 4 3 3 9 4 9 6 7 4 5 3 7 3 6 7 7 7
## 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
## 1 5 6 2 3 9 8 5 3 5 8 6 4 4 10 9 4 10 5 9
## 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297
## 4 6 12 5 7 4 7 5 5 4 5 8 7 4 5 5 3 7 3 3
## 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
## 3 6 2 7 6 7 3 3 8 5 5 6 2 4 8 5 4 3 8 7
## 318 319 320 321 322 323 324 325 326 327 329 330 331 332 333 334 335 336 337 338
## 3 8 4 10 8 5 5 6 5 5 5 11 4 5 9 3 5 7 5 5
## 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
## 7 4 8 4 3 3 8 5 2 7 2 10 8 4 4 1 3 3 6 6
## 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
## 4 11 6 3 6 3 9 9 4 7 8 2 6 6 6 6 5 7 10 5
## 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
## 9 9 9 8 6 6 12 16 14 8 4 11 5 8 6 3 7 4 12 3
## 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418
## 10 5 4 8 6 10 3 4 7 1 3 4 5 4 6 6 8 1 3 5
## 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
## 5 4 8 6 8 8 1 2 3 8 3 5 4 3 2 4 3 7 2 5
## 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
## 5 3 4 7 4 2 2 4 3 3 3 3 2 3 9 3 5 3 2 5
## 460 461 462 463 464 465 466 467 468 469 470 472 473 474 475 476 478 479 480 481
## 7 5 4 3 1 6 4 5 3 1 5 3 1 3 3 5 6 7 1 4
## 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
## 2 2 4 4 4 4 11 4 5 3 1 7 4 3 2 5 7 2 5 3
## 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521
## 4 1 3 4 4 3 6 5 1 5 9 4 8 2 9 2 7 4 3 2
## 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541
## 5 7 3 5 4 2 6 2 4 4 7 5 4 11 2 7 4 1 6 7
## 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 561 562
## 3 2 7 4 3 8 5 4 7 7 5 7 6 6 7 11 7 7 7 13
## 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582
## 3 8 9 5 10 3 7 8 8 7 8 6 5 9 9 11 6 9 8 9
## 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602
## 6 6 8 6 4 6 15 4 5 6 10 8 5 4 9 4 12 5 8 10
## 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622
## 8 8 16 4 8 5 6 6 6 5 8 3 10 3 2 2 5 11 15 3
## 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642
## 5 14 7 11 10 5 10 9 9 5 7 10 5 10 9 9 10 9 11 11
## 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662
## 6 7 8 8 13 2 6 9 9 13 8 8 11 11 7 9 14 10 9 8
## 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682
## 3 15 11 7 10 7 7 9 7 8 6 6 8 13 11 11 10 6 19 9
## 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702
## 8 15 8 6 5 10 12 9 13 13 4 10 11 8 5 17 9 5 11 10
## 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722
## 14 12 11 6 11 7 9 7 10 11 10 6 10 13 13 13 8 15 3 8
## 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742
## 8 11 11 7 9 9 8 6 11 12 15 8 13 8 11 12 10 4 3 9
## 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
## 7 11 11 8 7 14 9 15 7 11 8 10 14 9 9 5 9 5 8 5
## 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782
## 10 14 5 5 15 7 16 4 9 9 5 12 12 7 12 6 6 6 7 9
## 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802
## 7 12 6 7 7 9 9 6 6 11 7 8 10 5 8 6 2 6 13 7
## 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822
## 6 10 11 6 7 7 7 10 12 9 6 10 5 4 9 11 8 6 3 13
## 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842
## 12 8 10 13 10 8 10 8 10 9 2 7 5 7 7 7 5 7 6 9
## 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862
## 7 8 10 9 5 4 9 11 7 9 10 9 13 7 9 8 8 3 8 10
## 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882
## 6 12 4 7 6 13 6 10 13 5 4 6 7 6 10 10 11 12 4 4
## 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902
## 3 7 9 9 11 6 5 5 3 4 7 9 7 4 3 10 3 5 7 8
## 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922
## 7 8 5 4 15 5 7 4 5 8 7 8 3 9 5 7 7 5 3 4
## 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942
## 2 10 6 5 8 4 7 6 1 9 5 4 5 6 3 3 7 2 1 2
## 943 944 945 946 947 948 951 952 954 955 956 959 963 969 970 972
## 1 2 1 1 1 1 3 1 1 1 1 1 1 1 1 1
## 1 least connected region:
## 1523 with 1 link
## 1 most connected region:
## 4154 with 972 links
##
## Weights style: W
## Weights constants summary:
## n nn S0 S1 S2
## W 5562 30935844 5562 45.46846 22375.69
lm.morantest(muni_cities.mlr, nb_lw)
##
## Global Moran I for regression residuals
##
## data:
## model: lm(formula = GDP_CAPITA ~ IDHM_Longevidade + IDHM_Educacao +
## GVA_AGROPEC + GVA_INDUSTRY + GVA_PUBLIC + URBANIZATION_INDEX, data =
## muni_cities_analysis.sf)
## weights: nb_lw
##
## Moran I statistic standard deviate = 27.359, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I Expectation Variance
## 3.088069e-02 -3.794817e-04 1.305473e-06
The Global Moran’s I test for residual spatial autocorrelation shows that it’s p-value is less than 2.2e-16 which is less than the alpha value of 0.05. Hence, we will reject the null hypothesis that the residuals are randomly distributed.
Since the Observed Global Moran I = 3.088069e-02 which is greater than 0, we can infer than the residuals resemble cluster distribution.
bw.fixed <- bw.gwr(formula = GDP_CAPITA ~ IDHM_Longevidade + IDHM_Educacao + GVA_AGROPEC + GVA_INDUSTRY + GVA_PUBLIC + URBANIZATION_INDEX, data=muni_cities_res.sp, approach="CV", kernel="gaussian", adaptive=FALSE, longlat=FALSE)
## Take a cup of tea and have a break, it will take a few minutes.
## -----A kind suggestion from GWmodel development group
## Fixed bandwidth: 34.48404 CV score: 17.01473
## Fixed bandwidth: 21.31657 CV score: 17.07734
## Fixed bandwidth: 42.62198 CV score: 17.00009
## Fixed bandwidth: 47.65151 CV score: 16.99452
## Fixed bandwidth: 50.75993 CV score: 16.99187
## Fixed bandwidth: 52.68103 CV score: 16.99046
## Fixed bandwidth: 53.86834 CV score: 16.98967
## Fixed bandwidth: 54.60214 CV score: 16.9892
## Fixed bandwidth: 55.05565 CV score: 16.98892
## Fixed bandwidth: 55.33594 CV score: 16.98876
## Fixed bandwidth: 55.50917 CV score: 16.98865
## Fixed bandwidth: 55.61623 CV score: 16.98859
The result shows that the recommended bandwidth is 55.61623.
gwr.fixed <- gwr.basic(formula = GDP_CAPITA ~ IDHM_Longevidade + IDHM_Educacao + GVA_AGROPEC + GVA_INDUSTRY + GVA_PUBLIC + URBANIZATION_INDEX, data=muni_cities_res.sp, bw=bw.fixed, kernel = 'gaussian', longlat = FALSE)
## Warning in proj4string(data): CRS object has comment, which is lost in output
Display the model output:
gwr.fixed
## ***********************************************************************
## * Package GWmodel *
## ***********************************************************************
## Program starts at: 2020-05-31 21:28:58
## Call:
## gwr.basic(formula = GDP_CAPITA ~ IDHM_Longevidade + IDHM_Educacao +
## GVA_AGROPEC + GVA_INDUSTRY + GVA_PUBLIC + URBANIZATION_INDEX,
## data = muni_cities_res.sp, bw = bw.fixed, kernel = "gaussian",
## longlat = FALSE)
##
## Dependent (y) variable: GDP_CAPITA
## Independent variables: IDHM_Longevidade IDHM_Educacao GVA_AGROPEC GVA_INDUSTRY GVA_PUBLIC URBANIZATION_INDEX
## Number of data points: 5562
## ***********************************************************************
## * Results of Global Regression *
## ***********************************************************************
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.51619 -0.02253 -0.00861 0.00784 0.85222
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.297620 0.014894 -19.98 <2e-16 ***
## IDHM_Longevidade 0.316803 0.023170 13.67 <2e-16 ***
## IDHM_Educacao 0.178091 0.012365 14.40 <2e-16 ***
## GVA_AGROPEC 0.217097 0.011724 18.52 <2e-16 ***
## GVA_INDUSTRY 1.011883 0.055672 18.18 <2e-16 ***
## GVA_PUBLIC -0.816760 0.066970 -12.20 <2e-16 ***
## URBANIZATION_INDEX -0.009272 0.004177 -2.22 0.0265 *
##
## ---Significance stars
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Residual standard error: 0.05416 on 5555 degrees of freedom
## Multiple R-squared: 0.3117
## Adjusted R-squared: 0.3109
## F-statistic: 419.2 on 6 and 5555 DF, p-value: < 2.2e-16
## ***Extra Diagnostic information
## Residual sum of squares: 16.29655
## Sigma(hat): 0.05413902
## AIC: -16641.54
## AICc: -16641.51
## ***********************************************************************
## * Results of Geographically Weighted Regression *
## ***********************************************************************
##
## *********************Model calibration information*********************
## Kernel function: gaussian
## Fixed bandwidth: 55.61623
## Regression points: the same locations as observations are used.
## Distance metric: Euclidean distance metric is used.
##
## ****************Summary of GWR coefficient estimates:******************
## Min. 1st Qu. Median 3rd Qu. Max.
## Intercept -0.3004568 -0.2975073 -0.2968356 -0.2960530 -0.2946
## IDHM_Longevidade 0.3130635 0.3143717 0.3152692 0.3159543 0.3212
## IDHM_Educacao 0.1750872 0.1769845 0.1788888 0.1798482 0.1819
## GVA_AGROPEC 0.2167823 0.2174414 0.2175341 0.2175978 0.2177
## GVA_INDUSTRY 0.9927010 1.0153373 1.0193995 1.0224757 1.0276
## GVA_PUBLIC -0.8346534 -0.8275840 -0.8242750 -0.8182970 -0.7994
## URBANIZATION_INDEX -0.0104319 -0.0096507 -0.0092897 -0.0085478 -0.0080
## ************************Diagnostic information*************************
## Number of data points: 5562
## Effective number of parameters (2trace(S) - trace(S'S)): 7.39543
## Effective degrees of freedom (n-2trace(S) + trace(S'S)): 5554.605
## AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): -16649.16
## AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): -16658.39
## Residual sum of squares: 16.27297
## R-square value: 0.3126563
## Adjusted R-square value: 0.311741
##
## ***********************************************************************
## Program stops at: 2020-05-31 21:29:11
dist <- gw.dist(dp.locat = coordinates(muni_cities_res.sp))
bw.adaptive <- bw.gwr(formula = GDP_CAPITA ~ IDHM_Longevidade + IDHM_Educacao + GVA_AGROPEC + GVA_INDUSTRY + GVA_PUBLIC + URBANIZATION_INDEX, data=muni_cities_res.sp, approach="CV", kernel="gaussian", dMat = dist,
adaptive=TRUE, longlat=FALSE)
## Take a cup of tea and have a break, it will take a few minutes.
## -----A kind suggestion from GWmodel development group
## Adaptive bandwidth: 3445 CV score: 17.23047
## Adaptive bandwidth: 2137 CV score: 17.11392
## Adaptive bandwidth: 1328 CV score: 16.77703
## Adaptive bandwidth: 828 CV score: 16.47517
## Adaptive bandwidth: 519 CV score: 16.22232
## Adaptive bandwidth: 328 CV score: 15.99215
## Adaptive bandwidth: 210 CV score: 16.0175
## Adaptive bandwidth: 401 CV score: 16.07424
## Adaptive bandwidth: 283 CV score: 15.97288
## Adaptive bandwidth: 255 CV score: 15.97195
## Adaptive bandwidth: 237 CV score: 15.99858
## Adaptive bandwidth: 265 CV score: 15.97461
## Adaptive bandwidth: 247 CV score: 15.9919
## Adaptive bandwidth: 258 CV score: 15.97075
## Adaptive bandwidth: 262 CV score: 15.97355
## Adaptive bandwidth: 257 CV score: 15.97418
## Adaptive bandwidth: 260 CV score: 15.96914
## Adaptive bandwidth: 260 CV score: 15.96914
The result shows that the 260 is the recommended data points to be used.
Now, we can go ahead to calibrate the gwr-based hedonic pricing model by using adaptive bandwidth and gaussian kernel as shown in the code chunk below.
gwr.adaptive <- gwr.basic(formula = GDP_CAPITA ~ IDHM_Longevidade + IDHM_Educacao + GVA_AGROPEC + GVA_INDUSTRY + GVA_PUBLIC + URBANIZATION_INDEX, data=muni_cities_res.sp, bw=bw.adaptive, kernel = 'gaussian', adaptive=TRUE, longlat = FALSE)
## Warning in proj4string(data): CRS object has comment, which is lost in output
Display the model output:
gwr.adaptive
## ***********************************************************************
## * Package GWmodel *
## ***********************************************************************
## Program starts at: 2020-05-31 21:30:51
## Call:
## gwr.basic(formula = GDP_CAPITA ~ IDHM_Longevidade + IDHM_Educacao +
## GVA_AGROPEC + GVA_INDUSTRY + GVA_PUBLIC + URBANIZATION_INDEX,
## data = muni_cities_res.sp, bw = bw.adaptive, kernel = "gaussian",
## adaptive = TRUE, longlat = FALSE)
##
## Dependent (y) variable: GDP_CAPITA
## Independent variables: IDHM_Longevidade IDHM_Educacao GVA_AGROPEC GVA_INDUSTRY GVA_PUBLIC URBANIZATION_INDEX
## Number of data points: 5562
## ***********************************************************************
## * Results of Global Regression *
## ***********************************************************************
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.51619 -0.02253 -0.00861 0.00784 0.85222
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.297620 0.014894 -19.98 <2e-16 ***
## IDHM_Longevidade 0.316803 0.023170 13.67 <2e-16 ***
## IDHM_Educacao 0.178091 0.012365 14.40 <2e-16 ***
## GVA_AGROPEC 0.217097 0.011724 18.52 <2e-16 ***
## GVA_INDUSTRY 1.011883 0.055672 18.18 <2e-16 ***
## GVA_PUBLIC -0.816760 0.066970 -12.20 <2e-16 ***
## URBANIZATION_INDEX -0.009272 0.004177 -2.22 0.0265 *
##
## ---Significance stars
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Residual standard error: 0.05416 on 5555 degrees of freedom
## Multiple R-squared: 0.3117
## Adjusted R-squared: 0.3109
## F-statistic: 419.2 on 6 and 5555 DF, p-value: < 2.2e-16
## ***Extra Diagnostic information
## Residual sum of squares: 16.29655
## Sigma(hat): 0.05413902
## AIC: -16641.54
## AICc: -16641.51
## ***********************************************************************
## * Results of Geographically Weighted Regression *
## ***********************************************************************
##
## *********************Model calibration information*********************
## Kernel function: gaussian
## Adaptive bandwidth: 260 (number of nearest neighbours)
## Regression points: the same locations as observations are used.
## Distance metric: Euclidean distance metric is used.
##
## ****************Summary of GWR coefficient estimates:******************
## Min. 1st Qu. Median 3rd Qu. Max.
## Intercept -0.3413540 -0.2188945 -0.1548191 -0.0523613 0.0271
## IDHM_Longevidade -0.0617984 0.0592798 0.1545290 0.2211450 0.3936
## IDHM_Educacao 0.0109626 0.0317312 0.1388882 0.1902711 0.2554
## GVA_AGROPEC 0.0025691 0.1626542 0.1995342 0.2207908 0.3837
## GVA_INDUSTRY 0.3192241 1.1012191 2.3192578 3.5080612 6.9694
## GVA_PUBLIC -4.5215898 -2.5039254 -1.6814028 -0.6841106 0.0653
## URBANIZATION_INDEX -0.0359647 -0.0148809 0.0069244 0.0156982 0.0809
## ************************Diagnostic information*************************
## Number of data points: 5562
## Effective number of parameters (2trace(S) - trace(S'S)): 101.8268
## Effective degrees of freedom (n-2trace(S) + trace(S'S)): 5460.173
## AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): -17621.96
## AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): -17698.89
## Residual sum of squares: 13.33803
## R-square value: 0.4366233
## Adjusted R-square value: 0.426115
##
## ***********************************************************************
## Program stops at: 2020-05-31 21:31:10
Here I will be using fixed bandwith GWR model.
muni_cities.sf.fixed <- st_as_sf(gwr.fixed$SDF) %>%
st_transform(crs=4674)
gwr.fixed.output <- as.data.frame(gwr.fixed$SDF)
muni_cities.sf.fixed <- cbind(muni_cities.res.sf, as.matrix(gwr.fixed.output))
muni_cities.sf.fixed
## Simple feature collection with 5562 features and 38 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -73.99045 ymin: -33.75118 xmax: -28.83591 ymax: 4.884623
## CRS: 4674
## First 10 features:
## NM_MUNICIP CITY STATE IDHM_Educacao
## 1 ALTA FLORESTA D'OESTE Alta Floresta D'Oeste RO 0.526
## 2 ARIQUEMES Ariquemes RO 0.600
## 3 CABIXI Cabixi RO 0.559
## 4 CACOAL Cacoal RO 0.620
## 5 CEREJEIRAS Cerejeiras RO 0.602
## 6 COLORADO DO OESTE Colorado Do Oeste RO 0.584
## 7 CORUMBIARA Corumbiara RO 0.473
## 8 COSTA MARQUES Costa Marques RO 0.493
## 9 ESPIGÃO D'OESTE Espigão D'Oeste RO 0.536
## 10 GUAJARÁ-MIRIM Guajará-Mirim RO 0.519
## IDHM_Longevidade GVA_AGROPEC GVA_INDUSTRY GVA_PUBLIC URBANIZATION_INDEX
## 1 0.763 0.11848071 4.855722e-07 3.405978e-03 0.5918087
## 2 0.806 0.10345193 5.578592e-03 1.405826e-02 0.8434058
## 3 0.757 0.04213210 7.302128e-05 9.554075e-04 0.4482062
## 4 0.821 0.13407054 3.634820e-03 1.157999e-02 0.8072780
## 5 0.799 0.03771807 3.545427e-04 2.374261e-03 0.8531612
## 6 0.814 0.05183320 4.397635e-07 2.405381e-03 0.7448843
## 7 0.774 0.09425845 1.584682e-04 1.322334e-03 0.3141243
## 8 0.751 0.04461417 1.045577e-04 1.998908e-06 0.5325444
## 9 0.819 0.08232194 8.978652e-04 4.074391e-03 0.7543714
## 10 0.823 0.03560563 4.618831e-04 5.811294e-06 0.9295309
## GDP_CAPITA MLR_RES Intercept IDHM_Longevidade.1 IDHM_Educacao.1
## 1 0.04990125 -0.005328596 -0.2989687 0.3186823 0.1782066
## 2 0.05595688 -0.017422847 -0.2990383 0.3189523 0.1777577
## 3 0.05783450 0.011797131 -0.2987542 0.3183033 0.1783610
## 4 0.06081357 -0.027919487 -0.2988266 0.3185545 0.1779754
## 5 0.06270891 0.001294792 -0.2988367 0.3184414 0.1783226
## 6 0.04244695 -0.024198121 -0.2987387 0.3183021 0.1783016
## 7 0.07657810 0.028124509 -0.2988108 0.3184223 0.1782689
## 8 0.02861561 -0.004334422 -0.2992103 0.3190553 0.1781789
## 9 0.04321665 -0.022540017 -0.2987526 0.3184439 0.1779746
## 10 0.04026828 -0.014843935 -0.2992684 0.3191960 0.1780334
## GVA_AGROPEC.1 GVA_INDUSTRY.1 GVA_PUBLIC.1 URBANIZATION_INDEX.1 y
## 1 0.2171666 1.007059 -0.8143037 -0.009482365 0.04990125
## 2 0.2171032 1.004586 -0.8118081 -0.009355818 0.05595688
## 3 0.2172140 1.008736 -0.8157526 -0.009487759 0.05783450
## 4 0.2171588 1.006584 -0.8135944 -0.009381812 0.06081357
## 5 0.2171983 1.008193 -0.8153023 -0.009492257 0.06270891
## 6 0.2172092 1.008522 -0.8155051 -0.009466126 0.04244695
## 7 0.2171956 1.008052 -0.8151155 -0.009470285 0.07657810
## 8 0.2171301 1.005854 -0.8133930 -0.009522025 0.02861561
## 9 0.2171688 1.006909 -0.8138275 -0.009366906 0.04321665
## 10 0.2171047 1.004888 -0.8124650 -0.009487992 0.04026828
## yhat residual CV_Score Stud_residual Intercept_SE
## 1 0.05526785 -0.005366602 0 -0.09919428 0.01488882
## 2 0.07345246 -0.017495581 0 -0.32336196 0.01488893
## 3 0.04609872 0.011735787 0 0.21692666 0.01488812
## 4 0.08882957 -0.028016004 0 -0.51783397 0.01488812
## 5 0.06146372 0.001245191 0 0.02301080 0.01488840
## 6 0.06673362 -0.024286669 0 -0.44876555 0.01488800
## 7 0.04854880 0.028029301 0 0.51811036 0.01488824
## 8 0.03296209 -0.004346480 0 -0.08032619 0.01488989
## 9 0.06584716 -0.022630514 0 -0.41822424 0.01488786
## 10 0.05519942 -0.014931140 0 -0.27604559 0.01489004
## IDHM_Longevidade_SE IDHM_Educacao_SE GVA_AGROPEC_SE GVA_INDUSTRY_SE
## 1 0.02316136 0.01236128 0.01172019 0.05565055
## 2 0.02316156 0.01236166 0.01172100 0.05566089
## 3 0.02316028 0.01236045 0.01171934 0.05564431
## 4 0.02316023 0.01236058 0.01171989 0.05565101
## 5 0.02316071 0.01236076 0.01171963 0.05564623
## 6 0.02316007 0.01236033 0.01171932 0.05564468
## 7 0.02316045 0.01236060 0.01171958 0.05564639
## 8 0.02316307 0.01236254 0.01172118 0.05565704
## 9 0.02315981 0.01236027 0.01171964 0.05564937
## 10 0.02316332 0.01236283 0.01172159 0.05566163
## GVA_PUBLIC_SE URBANIZATION_INDEX_SE Intercept_TV IDHM_Longevidade_TV
## 1 0.06694256 0.004175245 -20.08008 13.75922
## 2 0.06694974 0.004175247 -20.08460 13.77076
## 3 0.06693731 0.004175017 -20.06661 13.74350
## 4 0.06694169 0.004174976 -20.07148 13.75437
## 5 0.06693901 0.004175109 -20.07178 13.74921
## 6 0.06693734 0.004174967 -20.06574 13.74357
## 7 0.06693885 0.004175047 -20.07025 13.74854
## 8 0.06694866 0.004175625 -20.09487 13.77431
## 9 0.06694013 0.004174883 -20.06687 13.74985
## 10 0.06695205 0.004175666 -20.09856 13.78024
## IDHM_Educacao_TV GVA_AGROPEC_TV GVA_INDUSTRY_TV GVA_PUBLIC_TV
## 1 14.41652 18.52928 18.09612 -12.16422
## 2 14.37975 18.52259 18.04833 -12.12564
## 3 14.42998 18.53467 18.12828 -12.18681
## 4 14.39863 18.52907 18.08743 -12.15378
## 5 14.42651 18.53286 18.11790 -12.17978
## 6 14.42531 18.53429 18.12433 -12.18311
## 7 14.42235 18.53272 18.11531 -12.17702
## 8 14.41281 18.52459 18.07236 -12.14950
## 9 14.39893 18.53034 18.09381 -12.15754
## 10 14.40069 18.52178 18.05352 -12.13503
## URBANIZATION_INDEX_TV Local_R2 geometry
## 1 -2.271092 0.3111511 MULTIPOLYGON (((-62.19465 -...
## 2 -2.240782 0.3115452 MULTIPOLYGON (((-62.53648 -...
## 3 -2.272508 0.3111230 MULTIPOLYGON (((-60.37075 -...
## 4 -2.247153 0.3114546 MULTIPOLYGON (((-61.0008 -1...
## 5 -2.273535 0.3111137 MULTIPOLYGON (((-61.49976 -...
## 6 -2.267354 0.3111890 MULTIPOLYGON (((-60.50475 -...
## 7 -2.268306 0.3111801 MULTIPOLYGON (((-61.34273 -...
## 8 -2.280383 0.3110411 MULTIPOLYGON (((-63.71199 -...
## 9 -2.243633 0.3114970 MULTIPOLYGON (((-60.94827 -...
## 10 -2.272210 0.3111482 MULTIPOLYGON (((-65.37724 -...
summary(gwr.fixed$SDF$yhat)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.02797 0.03088 0.05791 0.05763 0.07949 0.67577
tm_shape(muni_cities.res.sf)+
tm_polygons() +
tm_shape(muni_cities.sf.fixed) +
tm_dots(col = "Local_R2",
border.col = "gray60",
border.lwd = 1)
qtm(muni_cities.sf.fixed, "Intercept_SE", border=NULL)
qtm(muni_cities.sf.fixed, "residual", border=NULL)
qtm(muni_cities.sf.fixed, "yhat", border=NULL)