Note: I had all chunk displaying tmaps set to eval=FALSE, as when I tried to knit the file into html, it is so slow that the knitting couldn’t be completed. All the code chunks are working fine when executed individually.

1 Getting started

1.1 The Task

In this take-home exercise, you are tasked to determine factors affecting the unequal development of Brazil at the municipality level by using the data provided. The specific task of the analysis are as follows: * Prepare a choropleth map showing the distribution of GDP per capita, 2016 at municipality level. * Calibrate an explanatory model to explain factors affecting the GDP per capita at the municipality level by using multiple linear regression method. * Prepare a choropleth map showing the distribution of the residual of the GDP per capita. * Calibrate an explanatory model to explain factors affecting the GDP per capita at the municipality level by using geographically weighted regression method. SMU Classification: Restricted * Prepare a series of choropleth maps showing the outputs of the geographically weighted regression model.

1.2 Loading required packages

packages = c('olsrr', 'corrplot', 'ggpubr', 'sf', 'spdep', 'GWmodel', 'tmap', 'tidyverse', 'geobr', 'heatmaply')
for (p in packages){
  if(!require(p, character.only = T)){
    install.packages(p)
  }
  library(p,character.only = T)
}

2 Geospatial Data Wrangling

2.1 Importing municipality geospatial data

library(geobr)
muni <- read_municipality(year=2016)

Note: I had some errors going on when trying to use the above functions of geobr (“SSL certificate problem: certificate has expired/Error in read.table(file = file, header = header, sep = sep, quote = quote, : no lines available in input”), when I checked with the user guide, It seems that it is the problem with MacOS that I’m using, so I installed gdal using homebrew according to the instructions and tried to debug, with no luck however, the error persists. Therefore, I went to the data source(IBGE) provided by geobr’s developer and downloaded the municipality shape file for my use, I manually imported the municipality shape file in the code chunk below.

muni <- st_read(dsn = "data/geospatial", layer = "BRMUE250GC_SIR")
## Reading layer `BRMUE250GC_SIR' from data source `/Users/tianyunzhao/Desktop/IS415/Take-home exercise/Take-home_Ex04/Take-home_Ex04/data/geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 5572 features and 2 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -73.99045 ymin: -33.75118 xmax: -28.83591 ymax: 5.271841
## CRS:            4674

Checking CRS:

st_crs(muni)
## Coordinate Reference System:
##   User input: 4674 
##   wkt:
## GEOGCS["SIRGAS 2000",
##     DATUM["Sistema_de_Referencia_Geocentrico_para_las_AmericaS_2000",
##         SPHEROID["GRS 1980",6378137,298.257222101,
##             AUTHORITY["EPSG","7019"]],
##         TOWGS84[0,0,0,0,0,0,0],
##         AUTHORITY["EPSG","6674"]],
##     PRIMEM["Greenwich",0,
##         AUTHORITY["EPSG","8901"]],
##     UNIT["degree",0.0174532925199433,
##         AUTHORITY["EPSG","9122"]],
##     AUTHORITY["EPSG","4674"]]

CRS: 4674

Make sure all geometry is valid:

all(st_is_valid(muni))
## [1] TRUE

Check the extent of muni:

st_bbox(muni)
##       xmin       ymin       xmax       ymax 
## -73.990450 -33.751178 -28.835908   5.271841

3 Aspatial Data Wrangling

3.1 Importing aspatial data

cities <- read_delim("data/aspatial/BRAZIL_CITIES.csv", delim = ";")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   CITY = col_character(),
##   STATE = col_character(),
##   AREA = col_number(),
##   REGIAO_TUR = col_character(),
##   CATEGORIA_TUR = col_character(),
##   RURAL_URBAN = col_character(),
##   GVA_MAIN = col_character()
## )
## See spec(...) for full column specifications.

read_csv2() uses ; for the field separator and , for the decimal point. This is common in some European countries.

dict <- read_csv2("data/aspatial/Data_Dictionary.csv")
## Using ',' as decimal and '.' as grouping mark. Use read_delim() for more control.
## Warning: Missing column names filled in: 'X6' [6]
## Parsed with column specification:
## cols(
##   FIELD = col_character(),
##   DESCRIPTION = col_character(),
##   REFERENCE = col_character(),
##   UNIT = col_character(),
##   SOURCE = col_character(),
##   X6 = col_character()
## )

Check the dictionary to get an understanding of the column headers:

View(dict)

3.2 Validating aspatial data

Checking for NA:

summary(cities)
##      CITY              STATE              CAPITAL          IBGE_RES_POP     
##  Length:5573        Length:5573        Min.   :0.000000   Min.   :     805  
##  Class :character   Class :character   1st Qu.:0.000000   1st Qu.:    5235  
##  Mode  :character   Mode  :character   Median :0.000000   Median :   10934  
##                                        Mean   :0.004845   Mean   :   34278  
##                                        3rd Qu.:0.000000   3rd Qu.:   23424  
##                                        Max.   :1.000000   Max.   :11253503  
##                                                           NA's   :8         
##  IBGE_RES_POP_BRAS  IBGE_RES_POP_ESTR     IBGE_DU        IBGE_DU_URBAN    
##  Min.   :     805   Min.   :     0.0   Min.   :    239   Min.   :     60  
##  1st Qu.:    5230   1st Qu.:     0.0   1st Qu.:   1572   1st Qu.:    874  
##  Median :   10926   Median :     0.0   Median :   3174   Median :   1846  
##  Mean   :   34200   Mean   :    77.5   Mean   :  10303   Mean   :   8859  
##  3rd Qu.:   23390   3rd Qu.:    10.0   3rd Qu.:   6726   3rd Qu.:   4624  
##  Max.   :11133776   Max.   :119727.0   Max.   :3576148   Max.   :3548433  
##  NA's   :8          NA's   :8          NA's   :10        NA's   :10       
##  IBGE_DU_RURAL      IBGE_POP            IBGE_1            IBGE_1-4     
##  Min.   :    3   Min.   :     174   Min.   :     0.0   Min.   :     5  
##  1st Qu.:  487   1st Qu.:    2801   1st Qu.:    38.0   1st Qu.:   158  
##  Median :  931   Median :    6170   Median :    92.0   Median :   376  
##  Mean   : 1463   Mean   :   27595   Mean   :   383.3   Mean   :  1544  
##  3rd Qu.: 1832   3rd Qu.:   15302   3rd Qu.:   232.0   3rd Qu.:   951  
##  Max.   :33809   Max.   :10463636   Max.   :129464.0   Max.   :514794  
##  NA's   :81      NA's   :8          NA's   :8          NA's   :8       
##     IBGE_5-9        IBGE_10-14       IBGE_15-59         IBGE_60+      
##  Min.   :     7   Min.   :    12   Min.   :     94   Min.   :     29  
##  1st Qu.:   220   1st Qu.:   259   1st Qu.:   1734   1st Qu.:    341  
##  Median :   516   Median :   588   Median :   3841   Median :    722  
##  Mean   :  2069   Mean   :  2381   Mean   :  18212   Mean   :   3004  
##  3rd Qu.:  1300   3rd Qu.:  1478   3rd Qu.:   9628   3rd Qu.:   1724  
##  Max.   :684443   Max.   :783702   Max.   :7058221   Max.   :1293012  
##  NA's   :8        NA's   :8        NA's   :8         NA's   :8        
##  IBGE_PLANTED_AREA   IBGE_CROP_PRODUCTION_$ IDHM Ranking 2010      IDHM       
##  Min.   :      0.0   Min.   :      0        Min.   :   1      Min.   :0.4180  
##  1st Qu.:    910.2   1st Qu.:   2326        1st Qu.:1392      1st Qu.:0.5990  
##  Median :   3471.5   Median :  13846        Median :2783      Median :0.6650  
##  Mean   :  14179.9   Mean   :  57384        Mean   :2783      Mean   :0.6592  
##  3rd Qu.:  11194.2   3rd Qu.:  55619        3rd Qu.:4174      3rd Qu.:0.7180  
##  Max.   :1205669.0   Max.   :3274885        Max.   :5565      Max.   :0.8620  
##  NA's   :3           NA's   :3              NA's   :8         NA's   :8       
##    IDHM_Renda     IDHM_Longevidade IDHM_Educacao         LONG       
##  Min.   :0.4000   Min.   :0.6720   Min.   :0.2070   Min.   :-72.92  
##  1st Qu.:0.5720   1st Qu.:0.7690   1st Qu.:0.4900   1st Qu.:-50.87  
##  Median :0.6540   Median :0.8080   Median :0.5600   Median :-46.52  
##  Mean   :0.6429   Mean   :0.8016   Mean   :0.5591   Mean   :-46.23  
##  3rd Qu.:0.7070   3rd Qu.:0.8360   3rd Qu.:0.6310   3rd Qu.:-41.40  
##  Max.   :0.8910   Max.   :0.8940   Max.   :0.8250   Max.   :-32.44  
##  NA's   :8        NA's   :8        NA's   :8        NA's   :9       
##       LAT               ALT               PAY_TV         FIXED_PHONES    
##  Min.   :-33.688   Min.   :     0.0   Min.   :      1   Min.   :      3  
##  1st Qu.:-22.838   1st Qu.:   169.8   1st Qu.:     88   1st Qu.:    119  
##  Median :-18.089   Median :   406.5   Median :    247   Median :    327  
##  Mean   :-16.444   Mean   :   893.8   Mean   :   3094   Mean   :   6567  
##  3rd Qu.: -8.489   3rd Qu.:   628.9   3rd Qu.:    815   3rd Qu.:   1151  
##  Max.   :  4.585   Max.   :874579.0   Max.   :2047668   Max.   :5543127  
##  NA's   :9         NA's   :9          NA's   :3         NA's   :3        
##       AREA            REGIAO_TUR        CATEGORIA_TUR      ESTIMATED_POP     
##  Min.   :     3.57   Length:5573        Length:5573        Min.   :     786  
##  1st Qu.:   204.44   Class :character   Class :character   1st Qu.:    5454  
##  Median :   416.59   Mode  :character   Mode  :character   Median :   11590  
##  Mean   :  1517.44                                         Mean   :   37432  
##  3rd Qu.:  1026.57                                         3rd Qu.:   25296  
##  Max.   :159533.33                                         Max.   :12176866  
##  NA's   :3                                                 NA's   :3         
##  RURAL_URBAN         GVA_AGROPEC       GVA_INDUSTRY       GVA_SERVICES      
##  Length:5573        Min.   :      0   Min.   :       1   Min.   :        2  
##  Class :character   1st Qu.:   4189   1st Qu.:    1726   1st Qu.:    10112  
##  Mode  :character   Median :  20426   Median :    7424   Median :    31211  
##                     Mean   :  47271   Mean   :  175928   Mean   :   489451  
##                     3rd Qu.:  51227   3rd Qu.:   41022   3rd Qu.:   115406  
##                     Max.   :1402282   Max.   :63306755   Max.   :464656988  
##                     NA's   :3         NA's   :3          NA's   :3          
##    GVA_PUBLIC         GVA_TOTAL             TAXES                GDP           
##  Min.   :       7   Min.   :       17   Min.   :   -14159   Min.   :       15  
##  1st Qu.:   17267   1st Qu.:    42253   1st Qu.:     1305   1st Qu.:    43709  
##  Median :   35866   Median :   119492   Median :     5100   Median :   125153  
##  Mean   :  123768   Mean   :   832987   Mean   :   118864   Mean   :   954584  
##  3rd Qu.:   89245   3rd Qu.:   313963   3rd Qu.:    22197   3rd Qu.:   329539  
##  Max.   :41902893   Max.   :569910503   Max.   :117125387   Max.   :687035890  
##  NA's   :3          NA's   :3           NA's   :3           NA's   :3          
##     POP_GDP           GDP_CAPITA       GVA_MAIN          MUN_EXPENDIT      
##  Min.   :     815   Min.   :  3191   Length:5573        Min.   :1.421e+06  
##  1st Qu.:    5483   1st Qu.:  9058   Class :character   1st Qu.:1.573e+07  
##  Median :   11578   Median : 15870   Mode  :character   Median :2.746e+07  
##  Mean   :   36998   Mean   : 21126                      Mean   :1.043e+08  
##  3rd Qu.:   25085   3rd Qu.: 26155                      3rd Qu.:5.666e+07  
##  Max.   :12038175   Max.   :314638                      Max.   :4.577e+10  
##  NA's   :3          NA's   :3                           NA's   :1492       
##     COMP_TOT            COMP_A            COMP_B            COMP_C        
##  Min.   :     6.0   Min.   :   0.00   Min.   :  0.000   Min.   :    0.00  
##  1st Qu.:    68.0   1st Qu.:   1.00   1st Qu.:  0.000   1st Qu.:    3.00  
##  Median :   162.0   Median :   2.00   Median :  0.000   Median :   11.00  
##  Mean   :   906.8   Mean   :  18.25   Mean   :  1.852   Mean   :   73.44  
##  3rd Qu.:   448.0   3rd Qu.:   8.00   3rd Qu.:  2.000   3rd Qu.:   39.00  
##  Max.   :530446.0   Max.   :1948.00   Max.   :274.000   Max.   :31566.00  
##  NA's   :3          NA's   :3         NA's   :3         NA's   :3         
##      COMP_D             COMP_E            COMP_F             COMP_G        
##  Min.   :  0.0000   Min.   :  0.000   Min.   :    0.00   Min.   :     1.0  
##  1st Qu.:  0.0000   1st Qu.:  0.000   1st Qu.:    1.00   1st Qu.:    32.0  
##  Median :  0.0000   Median :  0.000   Median :    4.00   Median :    74.5  
##  Mean   :  0.4262   Mean   :  2.029   Mean   :   43.26   Mean   :   348.0  
##  3rd Qu.:  0.0000   3rd Qu.:  1.000   3rd Qu.:   15.00   3rd Qu.:   199.0  
##  Max.   :332.0000   Max.   :657.000   Max.   :25222.00   Max.   :150633.0  
##  NA's   :3          NA's   :3         NA's   :3          NA's   :3         
##      COMP_H          COMP_I             COMP_J             COMP_K        
##  Min.   :    0   Min.   :    0.00   Min.   :    0.00   Min.   :    0.00  
##  1st Qu.:    1   1st Qu.:    2.00   1st Qu.:    0.00   1st Qu.:    0.00  
##  Median :    7   Median :    7.00   Median :    1.00   Median :    0.00  
##  Mean   :   41   Mean   :   55.88   Mean   :   24.74   Mean   :   15.55  
##  3rd Qu.:   25   3rd Qu.:   24.00   3rd Qu.:    5.00   3rd Qu.:    2.00  
##  Max.   :19515   Max.   :29290.00   Max.   :38720.00   Max.   :23738.00  
##  NA's   :3       NA's   :3          NA's   :3          NA's   :3         
##      COMP_L             COMP_M             COMP_N            COMP_O       
##  Min.   :    0.00   Min.   :    0.00   Min.   :    0.0   Min.   :  0.000  
##  1st Qu.:    0.00   1st Qu.:    1.00   1st Qu.:    1.0   1st Qu.:  2.000  
##  Median :    0.00   Median :    4.00   Median :    4.0   Median :  2.000  
##  Mean   :   15.14   Mean   :   51.29   Mean   :   83.7   Mean   :  3.269  
##  3rd Qu.:    3.00   3rd Qu.:   13.00   3rd Qu.:   14.0   3rd Qu.:  3.000  
##  Max.   :14003.00   Max.   :49181.00   Max.   :76757.0   Max.   :204.000  
##  NA's   :3          NA's   :3          NA's   :3         NA's   :3        
##      COMP_P             COMP_Q             COMP_R            COMP_S        
##  Min.   :    0.00   Min.   :    0.00   Min.   :   0.00   Min.   :    0.00  
##  1st Qu.:    2.00   1st Qu.:    1.00   1st Qu.:   0.00   1st Qu.:    5.00  
##  Median :    6.00   Median :    3.00   Median :   2.00   Median :   12.00  
##  Mean   :   30.96   Mean   :   34.15   Mean   :  12.18   Mean   :   51.61  
##  3rd Qu.:   17.00   3rd Qu.:   12.00   3rd Qu.:   6.00   3rd Qu.:   31.00  
##  Max.   :16030.00   Max.   :22248.00   Max.   :6687.00   Max.   :24832.00  
##  NA's   :3          NA's   :3          NA's   :3         NA's   :3         
##      COMP_T      COMP_U              HOTELS            BEDS        
##  Min.   :0   Min.   :  0.00000   Min.   : 1.000   Min.   :    2.0  
##  1st Qu.:0   1st Qu.:  0.00000   1st Qu.: 1.000   1st Qu.:   40.0  
##  Median :0   Median :  0.00000   Median : 1.000   Median :   82.0  
##  Mean   :0   Mean   :  0.05027   Mean   : 3.131   Mean   :  257.5  
##  3rd Qu.:0   3rd Qu.:  0.00000   3rd Qu.: 3.000   3rd Qu.:  200.0  
##  Max.   :0   Max.   :123.00000   Max.   :97.000   Max.   :13247.0  
##  NA's   :3   NA's   :3           NA's   :4686     NA's   :4686     
##   Pr_Agencies        Pu_Agencies         Pr_Bank          Pu_Bank    
##  Min.   :   0.000   Min.   :  0.000   Min.   : 0.000   Min.   :0.00  
##  1st Qu.:   0.000   1st Qu.:  1.000   1st Qu.: 0.000   1st Qu.:1.00  
##  Median :   1.000   Median :  2.000   Median : 1.000   Median :2.00  
##  Mean   :   3.383   Mean   :  2.829   Mean   : 1.312   Mean   :1.58  
##  3rd Qu.:   2.000   3rd Qu.:  2.000   3rd Qu.: 2.000   3rd Qu.:2.00  
##  Max.   :1693.000   Max.   :626.000   Max.   :83.000   Max.   :8.00  
##  NA's   :2231       NA's   :2231      NA's   :2231     NA's   :2231  
##    Pr_Assets           Pu_Assets              Cars          Motorcycles     
##  Min.   :0.000e+00   Min.   :0.000e+00   Min.   :      2   Min.   :      4  
##  1st Qu.:0.000e+00   1st Qu.:4.047e+07   1st Qu.:    602   1st Qu.:    591  
##  Median :3.231e+07   Median :1.339e+08   Median :   1438   Median :   1285  
##  Mean   :9.180e+09   Mean   :6.005e+09   Mean   :   9859   Mean   :   4879  
##  3rd Qu.:1.148e+08   3rd Qu.:4.970e+08   3rd Qu.:   4086   3rd Qu.:   3294  
##  Max.   :1.947e+13   Max.   :8.016e+12   Max.   :5740995   Max.   :1134570  
##  NA's   :2231        NA's   :2231        NA's   :11        NA's   :11       
##  Wheeled_tractor         UBER           MAC             WAL-MART     
##  Min.   :   0.000   Min.   :1      Min.   :  1.000   Min.   : 1.000  
##  1st Qu.:   0.000   1st Qu.:1      1st Qu.:  1.000   1st Qu.: 1.000  
##  Median :   0.000   Median :1      Median :  2.000   Median : 1.000  
##  Mean   :   5.754   Mean   :1      Mean   :  4.277   Mean   : 2.059  
##  3rd Qu.:   1.000   3rd Qu.:1      3rd Qu.:  3.000   3rd Qu.: 1.750  
##  Max.   :3236.000   Max.   :1      Max.   :130.000   Max.   :26.000  
##  NA's   :11         NA's   :5448   NA's   :5407      NA's   :5471    
##   POST_OFFICES    
##  Min.   :  1.000  
##  1st Qu.:  1.000  
##  Median :  1.000  
##  Mean   :  2.081  
##  3rd Qu.:  2.000  
##  Max.   :225.000  
##  NA's   :120

We can see that there are 3 NA values for GDP_CAPITA, which is crucial for our analysis. Therefore, we proceed to drop them.

cities <- cities %>%
  filter(!is.na(GDP_CAPITA))
summary(cities)
##      CITY              STATE              CAPITAL          IBGE_RES_POP     
##  Length:5570        Length:5570        Min.   :0.000000   Min.   :     805  
##  Class :character   Class :character   1st Qu.:0.000000   1st Qu.:    5235  
##  Mode  :character   Mode  :character   Median :0.000000   Median :   10934  
##                                        Mean   :0.004847   Mean   :   34278  
##                                        3rd Qu.:0.000000   3rd Qu.:   23424  
##                                        Max.   :1.000000   Max.   :11253503  
##                                                           NA's   :5         
##  IBGE_RES_POP_BRAS  IBGE_RES_POP_ESTR     IBGE_DU        IBGE_DU_URBAN    
##  Min.   :     805   Min.   :     0.0   Min.   :    239   Min.   :     60  
##  1st Qu.:    5230   1st Qu.:     0.0   1st Qu.:   1572   1st Qu.:    874  
##  Median :   10926   Median :     0.0   Median :   3174   Median :   1846  
##  Mean   :   34200   Mean   :    77.5   Mean   :  10303   Mean   :   8859  
##  3rd Qu.:   23390   3rd Qu.:    10.0   3rd Qu.:   6726   3rd Qu.:   4624  
##  Max.   :11133776   Max.   :119727.0   Max.   :3576148   Max.   :3548433  
##  NA's   :5          NA's   :5          NA's   :7         NA's   :7        
##  IBGE_DU_RURAL      IBGE_POP            IBGE_1            IBGE_1-4     
##  Min.   :    3   Min.   :     174   Min.   :     0.0   Min.   :     5  
##  1st Qu.:  487   1st Qu.:    2801   1st Qu.:    38.0   1st Qu.:   158  
##  Median :  931   Median :    6170   Median :    92.0   Median :   376  
##  Mean   : 1463   Mean   :   27595   Mean   :   383.3   Mean   :  1544  
##  3rd Qu.: 1832   3rd Qu.:   15302   3rd Qu.:   232.0   3rd Qu.:   951  
##  Max.   :33809   Max.   :10463636   Max.   :129464.0   Max.   :514794  
##  NA's   :78      NA's   :5          NA's   :5          NA's   :5       
##     IBGE_5-9        IBGE_10-14       IBGE_15-59         IBGE_60+      
##  Min.   :     7   Min.   :    12   Min.   :     94   Min.   :     29  
##  1st Qu.:   220   1st Qu.:   259   1st Qu.:   1734   1st Qu.:    341  
##  Median :   516   Median :   588   Median :   3841   Median :    722  
##  Mean   :  2069   Mean   :  2381   Mean   :  18212   Mean   :   3004  
##  3rd Qu.:  1300   3rd Qu.:  1478   3rd Qu.:   9628   3rd Qu.:   1724  
##  Max.   :684443   Max.   :783702   Max.   :7058221   Max.   :1293012  
##  NA's   :5        NA's   :5        NA's   :5         NA's   :5        
##  IBGE_PLANTED_AREA   IBGE_CROP_PRODUCTION_$ IDHM Ranking 2010      IDHM       
##  Min.   :      0.0   Min.   :      0        Min.   :   1      Min.   :0.4180  
##  1st Qu.:    910.2   1st Qu.:   2326        1st Qu.:1392      1st Qu.:0.5990  
##  Median :   3471.5   Median :  13846        Median :2782      Median :0.6650  
##  Mean   :  14179.9   Mean   :  57384        Mean   :2783      Mean   :0.6592  
##  3rd Qu.:  11194.2   3rd Qu.:  55619        3rd Qu.:4173      3rd Qu.:0.7180  
##  Max.   :1205669.0   Max.   :3274885        Max.   :5565      Max.   :0.8620  
##                                             NA's   :6         NA's   :6       
##    IDHM_Renda     IDHM_Longevidade IDHM_Educacao         LONG       
##  Min.   :0.4000   Min.   :0.6720   Min.   :0.2070   Min.   :-72.92  
##  1st Qu.:0.5720   1st Qu.:0.7690   1st Qu.:0.4900   1st Qu.:-50.87  
##  Median :0.6540   Median :0.8080   Median :0.5600   Median :-46.52  
##  Mean   :0.6429   Mean   :0.8016   Mean   :0.5591   Mean   :-46.23  
##  3rd Qu.:0.7070   3rd Qu.:0.8360   3rd Qu.:0.6310   3rd Qu.:-41.41  
##  Max.   :0.8910   Max.   :0.8940   Max.   :0.8250   Max.   :-32.44  
##  NA's   :6        NA's   :6        NA's   :6        NA's   :7       
##       LAT               ALT               PAY_TV         FIXED_PHONES    
##  Min.   :-33.688   Min.   :     0.0   Min.   :      1   Min.   :      3  
##  1st Qu.:-22.838   1st Qu.:   169.7   1st Qu.:     88   1st Qu.:    119  
##  Median :-18.090   Median :   406.5   Median :    247   Median :    327  
##  Mean   :-16.445   Mean   :   894.0   Mean   :   3095   Mean   :   6568  
##  3rd Qu.: -8.489   3rd Qu.:   629.0   3rd Qu.:    815   3rd Qu.:   1151  
##  Max.   :  4.585   Max.   :874579.0   Max.   :2047668   Max.   :5543127  
##  NA's   :7         NA's   :7          NA's   :1         NA's   :1        
##       AREA            REGIAO_TUR        CATEGORIA_TUR      ESTIMATED_POP     
##  Min.   :     3.57   Length:5570        Length:5570        Min.   :     786  
##  1st Qu.:   204.43   Class :character   Class :character   1st Qu.:    5454  
##  Median :   415.92   Mode  :character   Mode  :character   Median :   11591  
##  Mean   :  1515.89                                         Mean   :   37437  
##  3rd Qu.:  1026.38                                         3rd Qu.:   25297  
##  Max.   :159533.33                                         Max.   :12176866  
##  NA's   :1                                                 NA's   :1         
##  RURAL_URBAN         GVA_AGROPEC       GVA_INDUSTRY       GVA_SERVICES      
##  Length:5570        Min.   :      0   Min.   :       1   Min.   :        2  
##  Class :character   1st Qu.:   4189   1st Qu.:    1726   1st Qu.:    10112  
##  Mode  :character   Median :  20426   Median :    7424   Median :    31211  
##                     Mean   :  47271   Mean   :  175928   Mean   :   489451  
##                     3rd Qu.:  51227   3rd Qu.:   41022   3rd Qu.:   115406  
##                     Max.   :1402282   Max.   :63306755   Max.   :464656988  
##                                                                             
##    GVA_PUBLIC         GVA_TOTAL             TAXES                GDP           
##  Min.   :       7   Min.   :       17   Min.   :   -14159   Min.   :       15  
##  1st Qu.:   17267   1st Qu.:    42253   1st Qu.:     1305   1st Qu.:    43709  
##  Median :   35866   Median :   119492   Median :     5100   Median :   125153  
##  Mean   :  123768   Mean   :   832987   Mean   :   118864   Mean   :   954584  
##  3rd Qu.:   89245   3rd Qu.:   313963   3rd Qu.:    22197   3rd Qu.:   329539  
##  Max.   :41902893   Max.   :569910503   Max.   :117125387   Max.   :687035890  
##                                                                                
##     POP_GDP           GDP_CAPITA       GVA_MAIN          MUN_EXPENDIT      
##  Min.   :     815   Min.   :  3191   Length:5570        Min.   :1.421e+06  
##  1st Qu.:    5483   1st Qu.:  9058   Class :character   1st Qu.:1.573e+07  
##  Median :   11578   Median : 15870   Mode  :character   Median :2.746e+07  
##  Mean   :   36998   Mean   : 21126                      Mean   :1.043e+08  
##  3rd Qu.:   25085   3rd Qu.: 26155                      3rd Qu.:5.672e+07  
##  Max.   :12038175   Max.   :314638                      Max.   :4.577e+10  
##                                                         NA's   :1491       
##     COMP_TOT            COMP_A            COMP_B            COMP_C        
##  Min.   :     6.0   Min.   :   0.00   Min.   :  0.000   Min.   :    0.00  
##  1st Qu.:    68.0   1st Qu.:   1.00   1st Qu.:  0.000   1st Qu.:    3.00  
##  Median :   162.0   Median :   2.00   Median :  0.000   Median :   11.00  
##  Mean   :   906.8   Mean   :  18.25   Mean   :  1.852   Mean   :   73.44  
##  3rd Qu.:   448.0   3rd Qu.:   8.00   3rd Qu.:  2.000   3rd Qu.:   39.00  
##  Max.   :530446.0   Max.   :1948.00   Max.   :274.000   Max.   :31566.00  
##                                                                           
##      COMP_D             COMP_E            COMP_F             COMP_G        
##  Min.   :  0.0000   Min.   :  0.000   Min.   :    0.00   Min.   :     1.0  
##  1st Qu.:  0.0000   1st Qu.:  0.000   1st Qu.:    1.00   1st Qu.:    32.0  
##  Median :  0.0000   Median :  0.000   Median :    4.00   Median :    74.5  
##  Mean   :  0.4262   Mean   :  2.029   Mean   :   43.26   Mean   :   348.0  
##  3rd Qu.:  0.0000   3rd Qu.:  1.000   3rd Qu.:   15.00   3rd Qu.:   199.0  
##  Max.   :332.0000   Max.   :657.000   Max.   :25222.00   Max.   :150633.0  
##                                                                            
##      COMP_H          COMP_I             COMP_J             COMP_K        
##  Min.   :    0   Min.   :    0.00   Min.   :    0.00   Min.   :    0.00  
##  1st Qu.:    1   1st Qu.:    2.00   1st Qu.:    0.00   1st Qu.:    0.00  
##  Median :    7   Median :    7.00   Median :    1.00   Median :    0.00  
##  Mean   :   41   Mean   :   55.88   Mean   :   24.74   Mean   :   15.55  
##  3rd Qu.:   25   3rd Qu.:   24.00   3rd Qu.:    5.00   3rd Qu.:    2.00  
##  Max.   :19515   Max.   :29290.00   Max.   :38720.00   Max.   :23738.00  
##                                                                          
##      COMP_L             COMP_M             COMP_N            COMP_O       
##  Min.   :    0.00   Min.   :    0.00   Min.   :    0.0   Min.   :  0.000  
##  1st Qu.:    0.00   1st Qu.:    1.00   1st Qu.:    1.0   1st Qu.:  2.000  
##  Median :    0.00   Median :    4.00   Median :    4.0   Median :  2.000  
##  Mean   :   15.14   Mean   :   51.29   Mean   :   83.7   Mean   :  3.269  
##  3rd Qu.:    3.00   3rd Qu.:   13.00   3rd Qu.:   14.0   3rd Qu.:  3.000  
##  Max.   :14003.00   Max.   :49181.00   Max.   :76757.0   Max.   :204.000  
##                                                                           
##      COMP_P             COMP_Q             COMP_R            COMP_S        
##  Min.   :    0.00   Min.   :    0.00   Min.   :   0.00   Min.   :    0.00  
##  1st Qu.:    2.00   1st Qu.:    1.00   1st Qu.:   0.00   1st Qu.:    5.00  
##  Median :    6.00   Median :    3.00   Median :   2.00   Median :   12.00  
##  Mean   :   30.96   Mean   :   34.15   Mean   :  12.18   Mean   :   51.61  
##  3rd Qu.:   17.00   3rd Qu.:   12.00   3rd Qu.:   6.00   3rd Qu.:   31.00  
##  Max.   :16030.00   Max.   :22248.00   Max.   :6687.00   Max.   :24832.00  
##                                                                            
##      COMP_T      COMP_U              HOTELS            BEDS        
##  Min.   :0   Min.   :  0.00000   Min.   : 1.000   Min.   :    2.0  
##  1st Qu.:0   1st Qu.:  0.00000   1st Qu.: 1.000   1st Qu.:   40.0  
##  Median :0   Median :  0.00000   Median : 1.000   Median :   82.0  
##  Mean   :0   Mean   :  0.05027   Mean   : 3.131   Mean   :  257.5  
##  3rd Qu.:0   3rd Qu.:  0.00000   3rd Qu.: 3.000   3rd Qu.:  200.0  
##  Max.   :0   Max.   :123.00000   Max.   :97.000   Max.   :13247.0  
##                                  NA's   :4683     NA's   :4683     
##   Pr_Agencies        Pu_Agencies         Pr_Bank          Pu_Bank    
##  Min.   :   0.000   Min.   :  0.000   Min.   : 0.000   Min.   :0.00  
##  1st Qu.:   0.000   1st Qu.:  1.000   1st Qu.: 0.000   1st Qu.:1.00  
##  Median :   1.000   Median :  2.000   Median : 1.000   Median :2.00  
##  Mean   :   3.383   Mean   :  2.829   Mean   : 1.312   Mean   :1.58  
##  3rd Qu.:   2.000   3rd Qu.:  2.000   3rd Qu.: 2.000   3rd Qu.:2.00  
##  Max.   :1693.000   Max.   :626.000   Max.   :83.000   Max.   :8.00  
##  NA's   :2228       NA's   :2228      NA's   :2228     NA's   :2228  
##    Pr_Assets           Pu_Assets              Cars          Motorcycles     
##  Min.   :0.000e+00   Min.   :0.000e+00   Min.   :      2   Min.   :      4  
##  1st Qu.:0.000e+00   1st Qu.:4.047e+07   1st Qu.:    602   1st Qu.:    591  
##  Median :3.231e+07   Median :1.339e+08   Median :   1440   Median :   1285  
##  Mean   :9.180e+09   Mean   :6.005e+09   Mean   :   9861   Mean   :   4879  
##  3rd Qu.:1.148e+08   3rd Qu.:4.970e+08   3rd Qu.:   4086   3rd Qu.:   3295  
##  Max.   :1.947e+13   Max.   :8.016e+12   Max.   :5740995   Max.   :1134570  
##  NA's   :2228        NA's   :2228        NA's   :9         NA's   :9        
##  Wheeled_tractor         UBER           MAC             WAL-MART     
##  Min.   :   0.000   Min.   :1      Min.   :  1.000   Min.   : 1.000  
##  1st Qu.:   0.000   1st Qu.:1      1st Qu.:  1.000   1st Qu.: 1.000  
##  Median :   0.000   Median :1      Median :  2.000   Median : 1.000  
##  Mean   :   5.755   Mean   :1      Mean   :  4.277   Mean   : 2.059  
##  3rd Qu.:   1.000   3rd Qu.:1      3rd Qu.:  3.000   3rd Qu.: 1.750  
##  Max.   :3236.000   Max.   :1      Max.   :130.000   Max.   :26.000  
##  NA's   :9          NA's   :5445   NA's   :5404      NA's   :5468    
##   POST_OFFICES    
##  Min.   :  1.000  
##  1st Qu.:  1.000  
##  Median :  1.000  
##  Mean   :  2.081  
##  3rd Qu.:  2.000  
##  Max.   :225.000  
##  NA's   :117

Then, we can see that there are 7 rows without LONGLAT data. We proceed to see the city names of these occurances.

cities[!complete.cases(cities$LONG),]
## # A tibble: 7 x 81
##   CITY  STATE CAPITAL IBGE_RES_POP IBGE_RES_POP_BR… IBGE_RES_POP_ES… IBGE_DU
##   <chr> <chr>   <dbl>        <dbl>            <dbl>            <dbl>   <dbl>
## 1 Baln… SC          0           NA               NA               NA      NA
## 2 Moju… PA          0           NA               NA               NA      NA
## 3 Para… MS          0           NA               NA               NA      NA
## 4 Pesc… SC          0           NA               NA               NA      NA
## 5 Pinh… RS          0         2130             2130                0     745
## 6 Pint… RS          0           NA               NA               NA      NA
## 7 Sant… BA          0         9648             9648                0    2891
## # … with 74 more variables: IBGE_DU_URBAN <dbl>, IBGE_DU_RURAL <dbl>,
## #   IBGE_POP <dbl>, IBGE_1 <dbl>, `IBGE_1-4` <dbl>, `IBGE_5-9` <dbl>,
## #   `IBGE_10-14` <dbl>, `IBGE_15-59` <dbl>, `IBGE_60+` <dbl>,
## #   IBGE_PLANTED_AREA <dbl>, `IBGE_CROP_PRODUCTION_$` <dbl>, `IDHM Ranking
## #   2010` <dbl>, IDHM <dbl>, IDHM_Renda <dbl>, IDHM_Longevidade <dbl>,
## #   IDHM_Educacao <dbl>, LONG <dbl>, LAT <dbl>, ALT <dbl>, PAY_TV <dbl>,
## #   FIXED_PHONES <dbl>, AREA <dbl>, REGIAO_TUR <chr>, CATEGORIA_TUR <chr>,
## #   ESTIMATED_POP <dbl>, RURAL_URBAN <chr>, GVA_AGROPEC <dbl>,
## #   GVA_INDUSTRY <dbl>, GVA_SERVICES <dbl>, GVA_PUBLIC <dbl>, ` GVA_TOTAL
## #   ` <dbl>, TAXES <dbl>, GDP <dbl>, POP_GDP <dbl>, GDP_CAPITA <dbl>,
## #   GVA_MAIN <chr>, MUN_EXPENDIT <dbl>, COMP_TOT <dbl>, COMP_A <dbl>,
## #   COMP_B <dbl>, COMP_C <dbl>, COMP_D <dbl>, COMP_E <dbl>, COMP_F <dbl>,
## #   COMP_G <dbl>, COMP_H <dbl>, COMP_I <dbl>, COMP_J <dbl>, COMP_K <dbl>,
## #   COMP_L <dbl>, COMP_M <dbl>, COMP_N <dbl>, COMP_O <dbl>, COMP_P <dbl>,
## #   COMP_Q <dbl>, COMP_R <dbl>, COMP_S <dbl>, COMP_T <dbl>, COMP_U <dbl>,
## #   HOTELS <dbl>, BEDS <dbl>, Pr_Agencies <dbl>, Pu_Agencies <dbl>,
## #   Pr_Bank <dbl>, Pu_Bank <dbl>, Pr_Assets <dbl>, Pu_Assets <dbl>, Cars <dbl>,
## #   Motorcycles <dbl>, Wheeled_tractor <dbl>, UBER <dbl>, MAC <dbl>,
## #   `WAL-MART` <dbl>, POST_OFFICES <dbl>

Then, using the city names, I am able to obtain the LONGLAT data from https://www.latlong.net/, and then manually fill in the coordinates:

cities$LONG[cities$CITY == "Balneário Rincão"] <- -49.221458
cities$LAT[cities$CITY == "Balneário Rincão"] <- -28.819550

cities$LONG[cities$CITY == "Mojuí Dos Campos"] <- -54.643299
cities$LAT[cities$CITY == "Mojuí Dos Campos"] <- -2.682650

cities$LONG[cities$CITY == "Paraíso Das Águas"] <- -53.012230
cities$LAT[cities$CITY == "Paraíso Das Águas"] <- -19.023310

cities$LONG[cities$CITY == "Pescaria Brava"] <- -48.886219
cities$LAT[cities$CITY == "Pescaria Brava"] <- -28.396391

cities$LONG[cities$CITY == "Pinhal Da Serra"] <- -51.170410
cities$LAT[cities$CITY == "Pinhal Da Serra"] <- -27.872141

cities$LONG[cities$CITY == "Pinto Bandeira"] <- -51.450291
cities$LAT[cities$CITY == "Pinto Bandeira"] <- -29.099859

cities$LONG[cities$CITY == "Santa Terezinha"] <- -50.510399
cities$LAT[cities$CITY == "Santa Terezinha"] <- -10.475950

Checking again:

summary(cities)
##      CITY              STATE              CAPITAL          IBGE_RES_POP     
##  Length:5570        Length:5570        Min.   :0.000000   Min.   :     805  
##  Class :character   Class :character   1st Qu.:0.000000   1st Qu.:    5235  
##  Mode  :character   Mode  :character   Median :0.000000   Median :   10934  
##                                        Mean   :0.004847   Mean   :   34278  
##                                        3rd Qu.:0.000000   3rd Qu.:   23424  
##                                        Max.   :1.000000   Max.   :11253503  
##                                                           NA's   :5         
##  IBGE_RES_POP_BRAS  IBGE_RES_POP_ESTR     IBGE_DU        IBGE_DU_URBAN    
##  Min.   :     805   Min.   :     0.0   Min.   :    239   Min.   :     60  
##  1st Qu.:    5230   1st Qu.:     0.0   1st Qu.:   1572   1st Qu.:    874  
##  Median :   10926   Median :     0.0   Median :   3174   Median :   1846  
##  Mean   :   34200   Mean   :    77.5   Mean   :  10303   Mean   :   8859  
##  3rd Qu.:   23390   3rd Qu.:    10.0   3rd Qu.:   6726   3rd Qu.:   4624  
##  Max.   :11133776   Max.   :119727.0   Max.   :3576148   Max.   :3548433  
##  NA's   :5          NA's   :5          NA's   :7         NA's   :7        
##  IBGE_DU_RURAL      IBGE_POP            IBGE_1            IBGE_1-4     
##  Min.   :    3   Min.   :     174   Min.   :     0.0   Min.   :     5  
##  1st Qu.:  487   1st Qu.:    2801   1st Qu.:    38.0   1st Qu.:   158  
##  Median :  931   Median :    6170   Median :    92.0   Median :   376  
##  Mean   : 1463   Mean   :   27595   Mean   :   383.3   Mean   :  1544  
##  3rd Qu.: 1832   3rd Qu.:   15302   3rd Qu.:   232.0   3rd Qu.:   951  
##  Max.   :33809   Max.   :10463636   Max.   :129464.0   Max.   :514794  
##  NA's   :78      NA's   :5          NA's   :5          NA's   :5       
##     IBGE_5-9        IBGE_10-14       IBGE_15-59         IBGE_60+      
##  Min.   :     7   Min.   :    12   Min.   :     94   Min.   :     29  
##  1st Qu.:   220   1st Qu.:   259   1st Qu.:   1734   1st Qu.:    341  
##  Median :   516   Median :   588   Median :   3841   Median :    722  
##  Mean   :  2069   Mean   :  2381   Mean   :  18212   Mean   :   3004  
##  3rd Qu.:  1300   3rd Qu.:  1478   3rd Qu.:   9628   3rd Qu.:   1724  
##  Max.   :684443   Max.   :783702   Max.   :7058221   Max.   :1293012  
##  NA's   :5        NA's   :5        NA's   :5         NA's   :5        
##  IBGE_PLANTED_AREA   IBGE_CROP_PRODUCTION_$ IDHM Ranking 2010      IDHM       
##  Min.   :      0.0   Min.   :      0        Min.   :   1      Min.   :0.4180  
##  1st Qu.:    910.2   1st Qu.:   2326        1st Qu.:1392      1st Qu.:0.5990  
##  Median :   3471.5   Median :  13846        Median :2782      Median :0.6650  
##  Mean   :  14179.9   Mean   :  57384        Mean   :2783      Mean   :0.6592  
##  3rd Qu.:  11194.2   3rd Qu.:  55619        3rd Qu.:4173      3rd Qu.:0.7180  
##  Max.   :1205669.0   Max.   :3274885        Max.   :5565      Max.   :0.8620  
##                                             NA's   :6         NA's   :6       
##    IDHM_Renda     IDHM_Longevidade IDHM_Educacao         LONG       
##  Min.   :0.4000   Min.   :0.6720   Min.   :0.2070   Min.   :-72.92  
##  1st Qu.:0.5720   1st Qu.:0.7690   1st Qu.:0.4900   1st Qu.:-50.87  
##  Median :0.6540   Median :0.8080   Median :0.5600   Median :-46.53  
##  Mean   :0.6429   Mean   :0.8016   Mean   :0.5591   Mean   :-46.24  
##  3rd Qu.:0.7070   3rd Qu.:0.8360   3rd Qu.:0.6310   3rd Qu.:-41.42  
##  Max.   :0.8910   Max.   :0.8940   Max.   :0.8250   Max.   :-32.44  
##  NA's   :6        NA's   :6        NA's   :6                        
##       LAT               ALT               PAY_TV         FIXED_PHONES    
##  Min.   :-33.688   Min.   :     0.0   Min.   :      1   Min.   :      3  
##  1st Qu.:-22.840   1st Qu.:   169.7   1st Qu.:     88   1st Qu.:    119  
##  Median :-18.091   Median :   406.5   Median :    247   Median :    327  
##  Mean   :-16.448   Mean   :   894.0   Mean   :   3095   Mean   :   6568  
##  3rd Qu.: -8.497   3rd Qu.:   629.0   3rd Qu.:    815   3rd Qu.:   1151  
##  Max.   :  4.585   Max.   :874579.0   Max.   :2047668   Max.   :5543127  
##                    NA's   :7          NA's   :1         NA's   :1        
##       AREA            REGIAO_TUR        CATEGORIA_TUR      ESTIMATED_POP     
##  Min.   :     3.57   Length:5570        Length:5570        Min.   :     786  
##  1st Qu.:   204.43   Class :character   Class :character   1st Qu.:    5454  
##  Median :   415.92   Mode  :character   Mode  :character   Median :   11591  
##  Mean   :  1515.89                                         Mean   :   37437  
##  3rd Qu.:  1026.38                                         3rd Qu.:   25297  
##  Max.   :159533.33                                         Max.   :12176866  
##  NA's   :1                                                 NA's   :1         
##  RURAL_URBAN         GVA_AGROPEC       GVA_INDUSTRY       GVA_SERVICES      
##  Length:5570        Min.   :      0   Min.   :       1   Min.   :        2  
##  Class :character   1st Qu.:   4189   1st Qu.:    1726   1st Qu.:    10112  
##  Mode  :character   Median :  20426   Median :    7424   Median :    31211  
##                     Mean   :  47271   Mean   :  175928   Mean   :   489451  
##                     3rd Qu.:  51227   3rd Qu.:   41022   3rd Qu.:   115406  
##                     Max.   :1402282   Max.   :63306755   Max.   :464656988  
##                                                                             
##    GVA_PUBLIC         GVA_TOTAL             TAXES                GDP           
##  Min.   :       7   Min.   :       17   Min.   :   -14159   Min.   :       15  
##  1st Qu.:   17267   1st Qu.:    42253   1st Qu.:     1305   1st Qu.:    43709  
##  Median :   35866   Median :   119492   Median :     5100   Median :   125153  
##  Mean   :  123768   Mean   :   832987   Mean   :   118864   Mean   :   954584  
##  3rd Qu.:   89245   3rd Qu.:   313963   3rd Qu.:    22197   3rd Qu.:   329539  
##  Max.   :41902893   Max.   :569910503   Max.   :117125387   Max.   :687035890  
##                                                                                
##     POP_GDP           GDP_CAPITA       GVA_MAIN          MUN_EXPENDIT      
##  Min.   :     815   Min.   :  3191   Length:5570        Min.   :1.421e+06  
##  1st Qu.:    5483   1st Qu.:  9058   Class :character   1st Qu.:1.573e+07  
##  Median :   11578   Median : 15870   Mode  :character   Median :2.746e+07  
##  Mean   :   36998   Mean   : 21126                      Mean   :1.043e+08  
##  3rd Qu.:   25085   3rd Qu.: 26155                      3rd Qu.:5.672e+07  
##  Max.   :12038175   Max.   :314638                      Max.   :4.577e+10  
##                                                         NA's   :1491       
##     COMP_TOT            COMP_A            COMP_B            COMP_C        
##  Min.   :     6.0   Min.   :   0.00   Min.   :  0.000   Min.   :    0.00  
##  1st Qu.:    68.0   1st Qu.:   1.00   1st Qu.:  0.000   1st Qu.:    3.00  
##  Median :   162.0   Median :   2.00   Median :  0.000   Median :   11.00  
##  Mean   :   906.8   Mean   :  18.25   Mean   :  1.852   Mean   :   73.44  
##  3rd Qu.:   448.0   3rd Qu.:   8.00   3rd Qu.:  2.000   3rd Qu.:   39.00  
##  Max.   :530446.0   Max.   :1948.00   Max.   :274.000   Max.   :31566.00  
##                                                                           
##      COMP_D             COMP_E            COMP_F             COMP_G        
##  Min.   :  0.0000   Min.   :  0.000   Min.   :    0.00   Min.   :     1.0  
##  1st Qu.:  0.0000   1st Qu.:  0.000   1st Qu.:    1.00   1st Qu.:    32.0  
##  Median :  0.0000   Median :  0.000   Median :    4.00   Median :    74.5  
##  Mean   :  0.4262   Mean   :  2.029   Mean   :   43.26   Mean   :   348.0  
##  3rd Qu.:  0.0000   3rd Qu.:  1.000   3rd Qu.:   15.00   3rd Qu.:   199.0  
##  Max.   :332.0000   Max.   :657.000   Max.   :25222.00   Max.   :150633.0  
##                                                                            
##      COMP_H          COMP_I             COMP_J             COMP_K        
##  Min.   :    0   Min.   :    0.00   Min.   :    0.00   Min.   :    0.00  
##  1st Qu.:    1   1st Qu.:    2.00   1st Qu.:    0.00   1st Qu.:    0.00  
##  Median :    7   Median :    7.00   Median :    1.00   Median :    0.00  
##  Mean   :   41   Mean   :   55.88   Mean   :   24.74   Mean   :   15.55  
##  3rd Qu.:   25   3rd Qu.:   24.00   3rd Qu.:    5.00   3rd Qu.:    2.00  
##  Max.   :19515   Max.   :29290.00   Max.   :38720.00   Max.   :23738.00  
##                                                                          
##      COMP_L             COMP_M             COMP_N            COMP_O       
##  Min.   :    0.00   Min.   :    0.00   Min.   :    0.0   Min.   :  0.000  
##  1st Qu.:    0.00   1st Qu.:    1.00   1st Qu.:    1.0   1st Qu.:  2.000  
##  Median :    0.00   Median :    4.00   Median :    4.0   Median :  2.000  
##  Mean   :   15.14   Mean   :   51.29   Mean   :   83.7   Mean   :  3.269  
##  3rd Qu.:    3.00   3rd Qu.:   13.00   3rd Qu.:   14.0   3rd Qu.:  3.000  
##  Max.   :14003.00   Max.   :49181.00   Max.   :76757.0   Max.   :204.000  
##                                                                           
##      COMP_P             COMP_Q             COMP_R            COMP_S        
##  Min.   :    0.00   Min.   :    0.00   Min.   :   0.00   Min.   :    0.00  
##  1st Qu.:    2.00   1st Qu.:    1.00   1st Qu.:   0.00   1st Qu.:    5.00  
##  Median :    6.00   Median :    3.00   Median :   2.00   Median :   12.00  
##  Mean   :   30.96   Mean   :   34.15   Mean   :  12.18   Mean   :   51.61  
##  3rd Qu.:   17.00   3rd Qu.:   12.00   3rd Qu.:   6.00   3rd Qu.:   31.00  
##  Max.   :16030.00   Max.   :22248.00   Max.   :6687.00   Max.   :24832.00  
##                                                                            
##      COMP_T      COMP_U              HOTELS            BEDS        
##  Min.   :0   Min.   :  0.00000   Min.   : 1.000   Min.   :    2.0  
##  1st Qu.:0   1st Qu.:  0.00000   1st Qu.: 1.000   1st Qu.:   40.0  
##  Median :0   Median :  0.00000   Median : 1.000   Median :   82.0  
##  Mean   :0   Mean   :  0.05027   Mean   : 3.131   Mean   :  257.5  
##  3rd Qu.:0   3rd Qu.:  0.00000   3rd Qu.: 3.000   3rd Qu.:  200.0  
##  Max.   :0   Max.   :123.00000   Max.   :97.000   Max.   :13247.0  
##                                  NA's   :4683     NA's   :4683     
##   Pr_Agencies        Pu_Agencies         Pr_Bank          Pu_Bank    
##  Min.   :   0.000   Min.   :  0.000   Min.   : 0.000   Min.   :0.00  
##  1st Qu.:   0.000   1st Qu.:  1.000   1st Qu.: 0.000   1st Qu.:1.00  
##  Median :   1.000   Median :  2.000   Median : 1.000   Median :2.00  
##  Mean   :   3.383   Mean   :  2.829   Mean   : 1.312   Mean   :1.58  
##  3rd Qu.:   2.000   3rd Qu.:  2.000   3rd Qu.: 2.000   3rd Qu.:2.00  
##  Max.   :1693.000   Max.   :626.000   Max.   :83.000   Max.   :8.00  
##  NA's   :2228       NA's   :2228      NA's   :2228     NA's   :2228  
##    Pr_Assets           Pu_Assets              Cars          Motorcycles     
##  Min.   :0.000e+00   Min.   :0.000e+00   Min.   :      2   Min.   :      4  
##  1st Qu.:0.000e+00   1st Qu.:4.047e+07   1st Qu.:    602   1st Qu.:    591  
##  Median :3.231e+07   Median :1.339e+08   Median :   1440   Median :   1285  
##  Mean   :9.180e+09   Mean   :6.005e+09   Mean   :   9861   Mean   :   4879  
##  3rd Qu.:1.148e+08   3rd Qu.:4.970e+08   3rd Qu.:   4086   3rd Qu.:   3295  
##  Max.   :1.947e+13   Max.   :8.016e+12   Max.   :5740995   Max.   :1134570  
##  NA's   :2228        NA's   :2228        NA's   :9         NA's   :9        
##  Wheeled_tractor         UBER           MAC             WAL-MART     
##  Min.   :   0.000   Min.   :1      Min.   :  1.000   Min.   : 1.000  
##  1st Qu.:   0.000   1st Qu.:1      1st Qu.:  1.000   1st Qu.: 1.000  
##  Median :   0.000   Median :1      Median :  2.000   Median : 1.000  
##  Mean   :   5.755   Mean   :1      Mean   :  4.277   Mean   : 2.059  
##  3rd Qu.:   1.000   3rd Qu.:1      3rd Qu.:  3.000   3rd Qu.: 1.750  
##  Max.   :3236.000   Max.   :1      Max.   :130.000   Max.   :26.000  
##  NA's   :9          NA's   :5445   NA's   :5404      NA's   :5468    
##   POST_OFFICES    
##  Min.   :  1.000  
##  1st Qu.:  1.000  
##  Median :  1.000  
##  Mean   :  2.081  
##  3rd Qu.:  2.000  
##  Max.   :225.000  
##  NA's   :117

Now, we can see that all LONGLAT and GDPPC are valid.

3.3 Coverting aspatial data frame into sf object

cities.sf <- st_as_sf(cities,
                            coords = c("LONG", "LAT"),
                            crs=4326) %>% st_transform(crs=4674)
st_crs(cities.sf)
## Coordinate Reference System:
##   User input: EPSG:4674 
##   wkt:
## GEOGCS["SIRGAS 2000",
##     DATUM["Sistema_de_Referencia_Geocentrico_para_las_AmericaS_2000",
##         SPHEROID["GRS 1980",6378137,298.257222101,
##             AUTHORITY["EPSG","7019"]],
##         TOWGS84[0,0,0,0,0,0,0],
##         AUTHORITY["EPSG","6674"]],
##     PRIMEM["Greenwich",0,
##         AUTHORITY["EPSG","8901"]],
##     UNIT["degree",0.0174532925199433,
##         AUTHORITY["EPSG","9122"]],
##     AUTHORITY["EPSG","4674"]]

4 Choropleth map showing the distribution of GDP per capita at municipality level

4.1 Joinning cities.sf data frame with municipality sf

muni_cities.sf <- st_join(muni, cities.sf, join=st_contains) %>%
  st_make_valid()
## although coordinates are longitude/latitude, st_contains assumes that they are planar

4.2 Plotting GDP per capita using Choropleth Map

qtm(muni_cities.sf, "GDP_CAPITA", borders=NULL, scale = 0.4)

5 Exploratory Data Analysis

5.1 EDA using statistical graphics

ggplot(data=muni_cities.sf, aes(x=`GDP_CAPITA`)) +
  geom_histogram(bins=20, color="black", fill="light blue")
## Warning: Removed 5 rows containing non-finite values (stat_bin).

The figure above reveals a right skewed distribution. This means that more cities have a lower GDPPC. Statistically, the skewed dsitribution can be normalised by using log transformation. The code chunk below is used to derive a new variable called LOG_GDPPC by using a log transformation on the variable GDP_CAPITA.

cities.sf <- cities.sf %>%
  mutate(`LOG_GDPPC` = log(GDP_CAPITA))

Plotting LOG_GDPPC:

ggplot(data=cities.sf, aes(x=`LOG_GDPPC`)) +
  geom_histogram(bins=20, color="black", fill="light blue")

Notice that the distribution is relatively less skewed after the transformation.

5.2 Identifing possible factors affecting GDPPC:

I have identified the following 10 variables that I feel might be relevant to the distributing of GDPPC of Brazil, which I will be using in my regression analysis: * IBGE_15-59: Resident Population Regular Urban Planning - from 15 to 59 y.o (economy active population) * IDHM: HDI Human Development Index * IDHM_Renda: HDI GNI Index * IDHM_Educacao: HDI Education index * IDHM_Longevidade: HDI Life Expectancy index * GVA_AGROPEC: Gross Added Value - Agropecuary * GVA_INDUSTRY: Gross Added Value - Industry * GVA_SERVICES: Gross Added Value - Services * GVA_PUBLIC: Gross Added Value - Public Services And one derived variable: * URBANIZATION_INDEX: URBANIZATION_INDEX = IBGE_DU_URBAN(Domestic Units Urban) / IBGE_DU(Domestic Units Total)

5.2.1 Deriving urbanization index:

muni_cities.sf <- muni_cities.sf %>%
  mutate(`URBANIZATION_INDEX` = `IBGE_DU_URBAN`/`IBGE_DU`)

5.3 Getting rid of columns not needed for analysis:

muni_cities_analysis.sf <- select(muni_cities.sf, c(`NM_MUNICIP`, `CITY`, `STATE`,   `IBGE_15-59`, `IDHM`, `IDHM_Renda`, `IDHM_Educacao`, `IDHM_Longevidade`, `GVA_AGROPEC`, `GVA_INDUSTRY`, `GVA_SERVICES`, `GVA_PUBLIC`, `URBANIZATION_INDEX`, `GDP_CAPITA`))

5.4 Multiple Histogram Plots distribution of variables

Pop <- ggplot(data=muni_cities_analysis.sf, aes(x=`IBGE_15-59`)) +
  geom_histogram(bins=25, color="black", fill="light blue")
HDI <- ggplot(data=muni_cities_analysis.sf, aes(x=IDHM)) +
  geom_histogram(bins=25, color="black", fill="light blue")
GNI <- ggplot(data=muni_cities_analysis.sf, aes(x=IDHM_Renda)) +
  geom_histogram(bins=25, color="black", fill="light blue")
Edu <- ggplot(data=muni_cities_analysis.sf, aes(x=IDHM_Educacao)) +
  geom_histogram(bins=25, color="black", fill="light blue")
Life <- ggplot(data=muni_cities_analysis.sf, aes(x=IDHM_Longevidade)) +
  geom_histogram(bins=25, color="black", fill="light blue")
Agr <- ggplot(data=muni_cities_analysis.sf, aes(x=GVA_AGROPEC)) +
  geom_histogram(bins=25, color="black", fill="light blue")
Ind <- ggplot(data=muni_cities_analysis.sf, aes(x=GVA_INDUSTRY)) +
  geom_histogram(bins=25, color="black", fill="light blue")
Ser <- ggplot(data=muni_cities_analysis.sf, aes(x=GVA_SERVICES)) +
  geom_histogram(bins=25, color="black", fill="light blue")
Pub <- ggplot(data=muni_cities_analysis.sf, aes(x=GVA_PUBLIC)) +
  geom_histogram(bins=25, color="black", fill="light blue")
Urb <- ggplot(data=muni_cities_analysis.sf, aes(x=URBANIZATION_INDEX)) +
  geom_histogram(bins=25, color="black", fill="light blue")

Display plots:

ggarrange(Pop, HDI, GNI, Edu, Life, Agr, Ind, Ser, Pub, Urb, ncol = 3, nrow = 4)
## Warning: Removed 10 rows containing non-finite values (stat_bin).
## Warning: Removed 11 rows containing non-finite values (stat_bin).

## Warning: Removed 11 rows containing non-finite values (stat_bin).

## Warning: Removed 11 rows containing non-finite values (stat_bin).

## Warning: Removed 11 rows containing non-finite values (stat_bin).
## Warning: Removed 5 rows containing non-finite values (stat_bin).

## Warning: Removed 5 rows containing non-finite values (stat_bin).

## Warning: Removed 5 rows containing non-finite values (stat_bin).

## Warning: Removed 5 rows containing non-finite values (stat_bin).
## Warning: Removed 12 rows containing non-finite values (stat_bin).

## Data standarization:

summary(muni_cities_analysis.sf)
##            NM_MUNICIP       CITY              STATE             IBGE_15-59     
##  SANTA TEREZINHA:   7   Length:5575        Length:5575        Min.   :     94  
##  BOM JESUS      :   5   Class :character   Class :character   1st Qu.:   1734  
##  SÃO DOMINGOS   :   5   Mode  :character   Mode  :character   Median :   3841  
##  BONITO         :   4                                         Mean   :  18212  
##  PLANALTO       :   4                                         3rd Qu.:   9628  
##  SANTA HELENA   :   4                                         Max.   :7058221  
##  (Other)        :5546                                         NA's   :10       
##       IDHM          IDHM_Renda     IDHM_Educacao    IDHM_Longevidade
##  Min.   :0.4180   Min.   :0.4000   Min.   :0.2070   Min.   :0.6720  
##  1st Qu.:0.5990   1st Qu.:0.5720   1st Qu.:0.4900   1st Qu.:0.7690  
##  Median :0.6650   Median :0.6540   Median :0.5600   Median :0.8080  
##  Mean   :0.6592   Mean   :0.6429   Mean   :0.5591   Mean   :0.8016  
##  3rd Qu.:0.7180   3rd Qu.:0.7070   3rd Qu.:0.6310   3rd Qu.:0.8360  
##  Max.   :0.8620   Max.   :0.8910   Max.   :0.8250   Max.   :0.8940  
##  NA's   :11       NA's   :11       NA's   :11       NA's   :11      
##   GVA_AGROPEC       GVA_INDUSTRY       GVA_SERVICES         GVA_PUBLIC      
##  Min.   :      0   Min.   :       1   Min.   :        2   Min.   :       7  
##  1st Qu.:   4189   1st Qu.:    1726   1st Qu.:    10112   1st Qu.:   17267  
##  Median :  20426   Median :    7424   Median :    31211   Median :   35866  
##  Mean   :  47271   Mean   :  175928   Mean   :   489451   Mean   :  123768  
##  3rd Qu.:  51227   3rd Qu.:   41022   3rd Qu.:   115406   3rd Qu.:   89245  
##  Max.   :1402282   Max.   :63306755   Max.   :464656988   Max.   :41902893  
##  NA's   :5         NA's   :5          NA's   :5           NA's   :5         
##  URBANIZATION_INDEX   GDP_CAPITA              geometry   
##  Min.   :0.04553    Min.   :  3191   MULTIPOLYGON :5575  
##  1st Qu.:0.49157    1st Qu.:  9058   epsg:4674    :   0  
##  Median :0.66277    Median : 15870   +proj=long...:   0  
##  Mean   :0.65212    Mean   : 21126                       
##  3rd Qu.:0.83043    3rd Qu.: 26155                       
##  Max.   :1.00000    Max.   :314638                       
##  NA's   :12         NA's   :5

As we can see, the range for some variables is very large, We will be standardising the following variables which did not fall into the range of 0 to 1: * IBGE_15-59 * GVA_AGROPEC * GVA_INDUSTRY * GVA_SERVICES * GVA_PUBLIC * GDP_CAPITA

muni_cities_analysis.sf$'IBGE_15-59' <- normalize(muni_cities_analysis.sf$'IBGE_15-59')
muni_cities_analysis.sf$GVA_AGROPEC <- normalize(muni_cities_analysis.sf$GVA_AGROPEC)
muni_cities_analysis.sf$GVA_INDUSTRY <- normalize(muni_cities_analysis.sf$GVA_INDUSTRY)
muni_cities_analysis.sf$GVA_SERVICES <- normalize(muni_cities_analysis.sf$GVA_SERVICES)
muni_cities_analysis.sf$GVA_PUBLIC <- normalize(muni_cities_analysis.sf$GVA_PUBLIC)
muni_cities_analysis.sf$GDP_CAPITA <- normalize(muni_cities_analysis.sf$GDP_CAPITA)

Check again to see if they are normallized:

summary(muni_cities_analysis.sf)
##            NM_MUNICIP       CITY              STATE          
##  SANTA TEREZINHA:   7   Length:5575        Length:5575       
##  BOM JESUS      :   5   Class :character   Class :character  
##  SÃO DOMINGOS   :   5   Mode  :character   Mode  :character  
##  BONITO         :   4                                        
##  PLANALTO       :   4                                        
##  SANTA HELENA   :   4                                        
##  (Other)        :5546                                        
##    IBGE_15-59            IDHM          IDHM_Renda     IDHM_Educacao   
##  Min.   :0.000000   Min.   :0.4180   Min.   :0.4000   Min.   :0.2070  
##  1st Qu.:0.000232   1st Qu.:0.5990   1st Qu.:0.5720   1st Qu.:0.4900  
##  Median :0.000531   Median :0.6650   Median :0.6540   Median :0.5600  
##  Mean   :0.002567   Mean   :0.6592   Mean   :0.6429   Mean   :0.5591  
##  3rd Qu.:0.001351   3rd Qu.:0.7180   3rd Qu.:0.7070   3rd Qu.:0.6310  
##  Max.   :1.000000   Max.   :0.8620   Max.   :0.8910   Max.   :0.8250  
##  NA's   :10         NA's   :11       NA's   :11       NA's   :11      
##  IDHM_Longevidade  GVA_AGROPEC        GVA_INDUSTRY       GVA_SERVICES     
##  Min.   :0.6720   Min.   :0.000000   Min.   :0.000000   Min.   :0.000000  
##  1st Qu.:0.7690   1st Qu.:0.002987   1st Qu.:0.000027   1st Qu.:0.000022  
##  Median :0.8080   Median :0.014566   Median :0.000117   Median :0.000067  
##  Mean   :0.8016   Mean   :0.033710   Mean   :0.002779   Mean   :0.001053  
##  3rd Qu.:0.8360   3rd Qu.:0.036531   3rd Qu.:0.000648   3rd Qu.:0.000248  
##  Max.   :0.8940   Max.   :1.000000   Max.   :1.000000   Max.   :1.000000  
##  NA's   :11       NA's   :5          NA's   :5          NA's   :5         
##    GVA_PUBLIC       URBANIZATION_INDEX   GDP_CAPITA               geometry   
##  Min.   :0.000000   Min.   :0.04553    Min.   :0.00000   MULTIPOLYGON :5575  
##  1st Qu.:0.000412   1st Qu.:0.49157    1st Qu.:0.01884   epsg:4674    :   0  
##  Median :0.000856   Median :0.66277    Median :0.04071   +proj=long...:   0  
##  Mean   :0.002954   Mean   :0.65212    Mean   :0.05759                       
##  3rd Qu.:0.002130   3rd Qu.:0.83043    3rd Qu.:0.07373                       
##  Max.   :1.000000   Max.   :1.00000    Max.   :1.00000                       
##  NA's   :5          NA's   :12         NA's   :5

Notice that there are some NA values, we will proceed to remove them from analysis, as they would affect our analysis

5.5 Remove NA values

muni_cities_analysis.sf <- muni_cities_analysis.sf %>%
  filter(!is.na(`IBGE_15-59`)) %>% filter(!is.na(`IDHM`)) %>% filter(!is.na(`IDHM_Renda`)) %>% filter(!is.na(`IDHM_Educacao`)) %>% filter(!is.na(`IDHM_Longevidade`)) %>% filter(!is.na(`GVA_AGROPEC`)) %>% filter(!is.na(`GVA_INDUSTRY`)) %>% filter(!is.na(`GVA_SERVICES`)) %>% filter(!is.na(`GVA_PUBLIC`)) %>% filter(!is.na(`URBANIZATION_INDEX`)) %>% filter(!is.na(`GDP_CAPITA`))

summary(muni_cities_analysis.sf)
##         NM_MUNICIP       CITY              STATE             IBGE_15-59       
##  BOM JESUS   :   5   Length:5562        Length:5562        Min.   :0.0000000  
##  SÃO DOMINGOS:   5   Class :character   Class :character   1st Qu.:0.0002325  
##  BONITO      :   4   Mode  :character   Mode  :character   Median :0.0005312  
##  PLANALTO    :   4                                         Mean   :0.0025683  
##  SANTA HELENA:   4                                         3rd Qu.:0.0013513  
##  SANTA INÊS  :   4                                         Max.   :1.0000000  
##  (Other)     :5536                                                            
##       IDHM          IDHM_Renda     IDHM_Educacao    IDHM_Longevidade
##  Min.   :0.4180   Min.   :0.4000   Min.   :0.2070   Min.   :0.6720  
##  1st Qu.:0.5990   1st Qu.:0.5720   1st Qu.:0.4900   1st Qu.:0.7690  
##  Median :0.6650   Median :0.6540   Median :0.5600   Median :0.8080  
##  Mean   :0.6593   Mean   :0.6429   Mean   :0.5592   Mean   :0.8016  
##  3rd Qu.:0.7180   3rd Qu.:0.7070   3rd Qu.:0.6310   3rd Qu.:0.8360  
##  Max.   :0.8620   Max.   :0.8910   Max.   :0.8250   Max.   :0.8940  
##                                                                     
##   GVA_AGROPEC        GVA_INDUSTRY        GVA_SERVICES         GVA_PUBLIC       
##  Min.   :0.000000   Min.   :0.0000000   Min.   :0.0000000   Min.   :0.0000000  
##  1st Qu.:0.002987   1st Qu.:0.0000273   1st Qu.:0.0000218   1st Qu.:0.0004119  
##  Median :0.014570   Median :0.0001174   Median :0.0000672   Median :0.0008551  
##  Mean   :0.033717   Mean   :0.0027824   Mean   :0.0010548   Mean   :0.0029566  
##  3rd Qu.:0.036541   3rd Qu.:0.0006480   3rd Qu.:0.0002488   3rd Qu.:0.0021322  
##  Max.   :1.000000   Max.   :1.0000000   Max.   :1.0000000   Max.   :1.0000000  
##                                                                                
##  URBANIZATION_INDEX   GDP_CAPITA               geometry   
##  Min.   :0.04553    Min.   :0.00000   MULTIPOLYGON :5562  
##  1st Qu.:0.49166    1st Qu.:0.01885   epsg:4674    :   0  
##  Median :0.66284    Median :0.04073   +proj=long...:   0  
##  Mean   :0.65219    Mean   :0.05759                       
##  3rd Qu.:0.83046    3rd Qu.:0.07374                       
##  Max.   :1.00000    Max.   :1.00000                       
## 

We can see that all NA values are removed.

5.6 Drawing Statistical Point Map

Lastly, we want to reveal the geospatial distribution of GDP per capita. The map will be prepared by using tmap package.

tm_shape(muni_cities_analysis.sf) + 
  tm_polygons(col="grey", border.col = "white", border.alpha = 0.5) +
  tm_dots(col = "GDP_CAPITA",
          alpha = 0.6,
          style="quantile",
          n=5)

6 Multiple Linear Regression

6.1 Visualising the relationships of the independent variables

corrplot(cor(as.data.frame(muni_cities_analysis.sf)[, 4:14]), diag = FALSE, order = "AOE",
         tl.pos = "td", tl.cex = 0.5, method = "number", type = "upper")

From the matrix, we can see that some variables are highly correlated. In view of this, I have decided to remove the following variables from my analysis: * GVA_SERVICES * IDHM_Renda * IDHM * IBGE_15-59

muni_cities_analysis.sf <- muni_cities_analysis.sf %>% select(-GVA_SERVICES, -IDHM_Renda, -'IBGE_15-59', -IDHM)

Plot again to check:

corrplot(cor(as.data.frame(muni_cities_analysis.sf)[, 4:10]), diag = FALSE, order = "AOE",
         tl.pos = "td", tl.cex = 0.5, method = "number", type = "upper")

## Building multiple linear regression model The code below uses lm() to build a linear regression model:

muni_cities.mlr <- lm(formula = GDP_CAPITA ~ IDHM_Longevidade + IDHM_Educacao + GVA_AGROPEC + GVA_INDUSTRY  + GVA_PUBLIC + URBANIZATION_INDEX , data=muni_cities_analysis.sf)

summary(muni_cities.mlr)
## 
## Call:
## lm(formula = GDP_CAPITA ~ IDHM_Longevidade + IDHM_Educacao + 
##     GVA_AGROPEC + GVA_INDUSTRY + GVA_PUBLIC + URBANIZATION_INDEX, 
##     data = muni_cities_analysis.sf)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.51619 -0.02253 -0.00861  0.00784  0.85222 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.297620   0.014894  -19.98   <2e-16 ***
## IDHM_Longevidade    0.316803   0.023170   13.67   <2e-16 ***
## IDHM_Educacao       0.178091   0.012365   14.40   <2e-16 ***
## GVA_AGROPEC         0.217097   0.011724   18.52   <2e-16 ***
## GVA_INDUSTRY        1.011883   0.055672   18.18   <2e-16 ***
## GVA_PUBLIC         -0.816760   0.066970  -12.20   <2e-16 ***
## URBANIZATION_INDEX -0.009272   0.004177   -2.22   0.0265 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05416 on 5555 degrees of freedom
## Multiple R-squared:  0.3117, Adjusted R-squared:  0.3109 
## F-statistic: 419.2 on 6 and 5555 DF,  p-value: < 2.2e-16

With reference to the report above, it is clear that not all the independent variables are statistically significant.(With p < 0.05)

6.2 Checking for multicolinearity

ols_vif_tol(muni_cities.mlr)
##            Variables Tolerance      VIF
## 1   IDHM_Longevidade 0.4925621 2.030201
## 2      IDHM_Educacao 0.3968377 2.519922
## 3        GVA_AGROPEC 0.9596783 1.042016
## 4       GVA_INDUSTRY 0.4268338 2.342832
## 5         GVA_PUBLIC 0.4292711 2.329530
## 6 URBANIZATION_INDEX 0.6586024 1.518367

Since the VIF of the independent variables are less than 10. We can safely conclude that there are no sign of multicollinearity among the independent variables.

6.3 Test for Non-Linearity

ols_plot_resid_fit(muni_cities.mlr)

The figure above reveals that most of the data poitns are scattered around the 0 line, hence we can safely conclude that the relationships between the dependent variable and independent variables are linear.

6.4 Test for Normality Assumption

ols_plot_resid_hist(muni_cities.mlr)

The figure reveals that the residual of the multiple linear regression model (i.e. muni_cities.mlr) resemble normal distribution.

6.5 Testing for Spatial Autocorrelation

First, export the residual of the hedonic model as a data frame

mlr.output <- as.data.frame(muni_cities.mlr$residuals)

Next, we will join the newly created data frame with muni_cities.sf object.

muni_cities.res.sf <- cbind(muni_cities_analysis.sf, muni_cities.mlr$residuals) %>%
  rename(`MLR_RES` = `muni_cities.mlr.residuals`)

Next, we will convert muni_cities.res.sf simple feature object into a SpatialPointsDataFrame

muni_cities_res.sp <- as_Spatial(muni_cities.res.sf)
muni_cities_res.sp
## class       : SpatialPolygonsDataFrame 
## features    : 5562 
## extent      : -73.99045, -28.83591, -33.75118, 4.884623  (xmin, xmax, ymin, ymax)
## Warning in proj4string(x): CRS object has comment, which is lost in output
## crs         : +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs 
## variables   : 11
## names       :      NM_MUNICIP,            CITY, STATE, IDHM_Educacao, IDHM_Longevidade, GVA_AGROPEC, GVA_INDUSTRY, GVA_PUBLIC, URBANIZATION_INDEX, GDP_CAPITA,            MLR_RES 
## min values  : ABADIA DE GOIÁS, Abadia De Goiás,    AC,         0.207,            0.672,           0,            0,          0, 0.0455261775520925,          0, -0.516188191386672 
## max values  :          ZORTÉA,          Zortéa,    TO,         0.825,            0.894,           1,            1,          1,                  1,          1,  0.852215604469209

Next, we will use tmap package to display the distribution of the residuals.

tm_shape(muni_cities_analysis.sf) +
  tm_polygons(border.col = NULL) +
  tm_shape(muni_cities_res.sp) +
  tm_fill(col="MLR_RES",
          alpha = 0.6,
          style="quantile")+
  tm_layout(legend.outside = TRUE)

The figure above reveal that there is sign of spatial autocorrelation. To proof that our observation is indeed true, the Moran’s I test will be performed ### compute the distance-based weight matrix Get a summary statistics to determine range:

coords <- coordinates(muni_cities_res.sp)
k <- knn2nb(knearneigh(coords))
## Warning in knearneigh(coords): knearneigh: identical points found
k_dist <- unlist(nbdists(k, coords, longlat = TRUE))
summary(k_dist)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   11.74   15.88   21.35   23.96  369.63

As summary statisitcs shown, We will use a range of 0 to 370.

nb <- dnearneigh(coords, 0, 370, longlat = TRUE)
summary(nb)
## Neighbour list object:
## Number of regions: 5562 
## Number of nonzero links: 3030086 
## Percentage nonzero weights: 9.794742 
## Average number of links: 544.7835 
## Link number distribution:
## 
##   1   4   8   9  10  11  12  13  14  15  16  17  18  19  20  21  22  23  24  25 
##   1   1   1   4   1   5   3   8  10   5   8   5   8   8   6   3   2   7   6   8 
##  26  27  28  29  30  31  32  33  34  36  37  38  39  40  41  42  43  44  45  46 
##   5   5   7   6   3   4   4   1   2   1   1   2   3   4   4   3   3   2   5   3 
##  47  48  49  50  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66 
##   8   2   3   8   8   2   8   8   9   8   4  13   7   4   2   1   2   2   3   2 
##  67  70  71  72  73  74  75  76  77  78  80  82  83  84  85  86  87  88  89  90 
##   5   3   3   3   6   2   4   6   5   1   5   3   4   2   4   1   3   1   3   6 
##  91  92  94  95  97  98 100 101 102 103 104 105 106 107 108 109 110 111 112 113 
##   1   1   2   1   3   3   2   6   2   1   1   1   2   6   1   1   2   4   2   3 
## 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 133 134 
##   6   4   4   1   2   3   1   3   1   2   1   4   1   2   2   2   1   4   5   2 
## 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 
##   2   2   7   3   3   2   4   2   4   4   9  10   5   3   4  11   9   7   4   7 
## 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 
##   6   7   5   4   3   1   6  12   6   4  10   3   8   3   5   6   5   3   6   5 
## 175 176 177 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 
##   2   3   2   1   5   1   2   3   1   1   4   1   1   2   6   7   5   3   3   1 
## 196 198 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 
##   1   4   2   3   4   1   2   1   2   1   6   4   2   3   4   5   7   4   5   1 
## 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 
##   2   7   4   2   8   2   3   5   4   4   6   5   4   1   4   4   1   5   7   4 
## 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 
##   4   6   5   4   3   3   9   4   9   6   7   4   5   3   7   3   6   7   7   7 
## 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 
##   1   5   6   2   3   9   8   5   3   5   8   6   4   4  10   9   4  10   5   9 
## 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 
##   4   6  12   5   7   4   7   5   5   4   5   8   7   4   5   5   3   7   3   3 
## 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 
##   3   6   2   7   6   7   3   3   8   5   5   6   2   4   8   5   4   3   8   7 
## 318 319 320 321 322 323 324 325 326 327 329 330 331 332 333 334 335 336 337 338 
##   3   8   4  10   8   5   5   6   5   5   5  11   4   5   9   3   5   7   5   5 
## 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 
##   7   4   8   4   3   3   8   5   2   7   2  10   8   4   4   1   3   3   6   6 
## 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 
##   4  11   6   3   6   3   9   9   4   7   8   2   6   6   6   6   5   7  10   5 
## 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 
##   9   9   9   8   6   6  12  16  14   8   4  11   5   8   6   3   7   4  12   3 
## 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 
##  10   5   4   8   6  10   3   4   7   1   3   4   5   4   6   6   8   1   3   5 
## 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 
##   5   4   8   6   8   8   1   2   3   8   3   5   4   3   2   4   3   7   2   5 
## 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 
##   5   3   4   7   4   2   2   4   3   3   3   3   2   3   9   3   5   3   2   5 
## 460 461 462 463 464 465 466 467 468 469 470 472 473 474 475 476 478 479 480 481 
##   7   5   4   3   1   6   4   5   3   1   5   3   1   3   3   5   6   7   1   4 
## 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 
##   2   2   4   4   4   4  11   4   5   3   1   7   4   3   2   5   7   2   5   3 
## 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 
##   4   1   3   4   4   3   6   5   1   5   9   4   8   2   9   2   7   4   3   2 
## 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 
##   5   7   3   5   4   2   6   2   4   4   7   5   4  11   2   7   4   1   6   7 
## 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 561 562 
##   3   2   7   4   3   8   5   4   7   7   5   7   6   6   7  11   7   7   7  13 
## 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 
##   3   8   9   5  10   3   7   8   8   7   8   6   5   9   9  11   6   9   8   9 
## 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 
##   6   6   8   6   4   6  15   4   5   6  10   8   5   4   9   4  12   5   8  10 
## 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 
##   8   8  16   4   8   5   6   6   6   5   8   3  10   3   2   2   5  11  15   3 
## 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 
##   5  14   7  11  10   5  10   9   9   5   7  10   5  10   9   9  10   9  11  11 
## 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 
##   6   7   8   8  13   2   6   9   9  13   8   8  11  11   7   9  14  10   9   8 
## 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 
##   3  15  11   7  10   7   7   9   7   8   6   6   8  13  11  11  10   6  19   9 
## 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 
##   8  15   8   6   5  10  12   9  13  13   4  10  11   8   5  17   9   5  11  10 
## 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 
##  14  12  11   6  11   7   9   7  10  11  10   6  10  13  13  13   8  15   3   8 
## 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 
##   8  11  11   7   9   9   8   6  11  12  15   8  13   8  11  12  10   4   3   9 
## 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 
##   7  11  11   8   7  14   9  15   7  11   8  10  14   9   9   5   9   5   8   5 
## 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 
##  10  14   5   5  15   7  16   4   9   9   5  12  12   7  12   6   6   6   7   9 
## 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 
##   7  12   6   7   7   9   9   6   6  11   7   8  10   5   8   6   2   6  13   7 
## 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 
##   6  10  11   6   7   7   7  10  12   9   6  10   5   4   9  11   8   6   3  13 
## 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 
##  12   8  10  13  10   8  10   8  10   9   2   7   5   7   7   7   5   7   6   9 
## 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 
##   7   8  10   9   5   4   9  11   7   9  10   9  13   7   9   8   8   3   8  10 
## 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 
##   6  12   4   7   6  13   6  10  13   5   4   6   7   6  10  10  11  12   4   4 
## 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 
##   3   7   9   9  11   6   5   5   3   4   7   9   7   4   3  10   3   5   7   8 
## 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 
##   7   8   5   4  15   5   7   4   5   8   7   8   3   9   5   7   7   5   3   4 
## 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 
##   2  10   6   5   8   4   7   6   1   9   5   4   5   6   3   3   7   2   1   2 
## 943 944 945 946 947 948 951 952 954 955 956 959 963 969 970 972 
##   1   2   1   1   1   1   3   1   1   1   1   1   1   1   1   1 
## 1 least connected region:
## 1523 with 1 link
## 1 most connected region:
## 4154 with 972 links

Convert the output neighbours lists (i.e. nb) into a spatial weights.

nb_lw <- nb2listw(nb, style = 'W')
summary(nb_lw)
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 5562 
## Number of nonzero links: 3030086 
## Percentage nonzero weights: 9.794742 
## Average number of links: 544.7835 
## Link number distribution:
## 
##   1   4   8   9  10  11  12  13  14  15  16  17  18  19  20  21  22  23  24  25 
##   1   1   1   4   1   5   3   8  10   5   8   5   8   8   6   3   2   7   6   8 
##  26  27  28  29  30  31  32  33  34  36  37  38  39  40  41  42  43  44  45  46 
##   5   5   7   6   3   4   4   1   2   1   1   2   3   4   4   3   3   2   5   3 
##  47  48  49  50  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66 
##   8   2   3   8   8   2   8   8   9   8   4  13   7   4   2   1   2   2   3   2 
##  67  70  71  72  73  74  75  76  77  78  80  82  83  84  85  86  87  88  89  90 
##   5   3   3   3   6   2   4   6   5   1   5   3   4   2   4   1   3   1   3   6 
##  91  92  94  95  97  98 100 101 102 103 104 105 106 107 108 109 110 111 112 113 
##   1   1   2   1   3   3   2   6   2   1   1   1   2   6   1   1   2   4   2   3 
## 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 133 134 
##   6   4   4   1   2   3   1   3   1   2   1   4   1   2   2   2   1   4   5   2 
## 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 
##   2   2   7   3   3   2   4   2   4   4   9  10   5   3   4  11   9   7   4   7 
## 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 
##   6   7   5   4   3   1   6  12   6   4  10   3   8   3   5   6   5   3   6   5 
## 175 176 177 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 
##   2   3   2   1   5   1   2   3   1   1   4   1   1   2   6   7   5   3   3   1 
## 196 198 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 
##   1   4   2   3   4   1   2   1   2   1   6   4   2   3   4   5   7   4   5   1 
## 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 
##   2   7   4   2   8   2   3   5   4   4   6   5   4   1   4   4   1   5   7   4 
## 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 
##   4   6   5   4   3   3   9   4   9   6   7   4   5   3   7   3   6   7   7   7 
## 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 
##   1   5   6   2   3   9   8   5   3   5   8   6   4   4  10   9   4  10   5   9 
## 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 
##   4   6  12   5   7   4   7   5   5   4   5   8   7   4   5   5   3   7   3   3 
## 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 
##   3   6   2   7   6   7   3   3   8   5   5   6   2   4   8   5   4   3   8   7 
## 318 319 320 321 322 323 324 325 326 327 329 330 331 332 333 334 335 336 337 338 
##   3   8   4  10   8   5   5   6   5   5   5  11   4   5   9   3   5   7   5   5 
## 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 
##   7   4   8   4   3   3   8   5   2   7   2  10   8   4   4   1   3   3   6   6 
## 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 
##   4  11   6   3   6   3   9   9   4   7   8   2   6   6   6   6   5   7  10   5 
## 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 
##   9   9   9   8   6   6  12  16  14   8   4  11   5   8   6   3   7   4  12   3 
## 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 
##  10   5   4   8   6  10   3   4   7   1   3   4   5   4   6   6   8   1   3   5 
## 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 
##   5   4   8   6   8   8   1   2   3   8   3   5   4   3   2   4   3   7   2   5 
## 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 
##   5   3   4   7   4   2   2   4   3   3   3   3   2   3   9   3   5   3   2   5 
## 460 461 462 463 464 465 466 467 468 469 470 472 473 474 475 476 478 479 480 481 
##   7   5   4   3   1   6   4   5   3   1   5   3   1   3   3   5   6   7   1   4 
## 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 
##   2   2   4   4   4   4  11   4   5   3   1   7   4   3   2   5   7   2   5   3 
## 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 
##   4   1   3   4   4   3   6   5   1   5   9   4   8   2   9   2   7   4   3   2 
## 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 
##   5   7   3   5   4   2   6   2   4   4   7   5   4  11   2   7   4   1   6   7 
## 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 561 562 
##   3   2   7   4   3   8   5   4   7   7   5   7   6   6   7  11   7   7   7  13 
## 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 
##   3   8   9   5  10   3   7   8   8   7   8   6   5   9   9  11   6   9   8   9 
## 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 
##   6   6   8   6   4   6  15   4   5   6  10   8   5   4   9   4  12   5   8  10 
## 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 
##   8   8  16   4   8   5   6   6   6   5   8   3  10   3   2   2   5  11  15   3 
## 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 
##   5  14   7  11  10   5  10   9   9   5   7  10   5  10   9   9  10   9  11  11 
## 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 
##   6   7   8   8  13   2   6   9   9  13   8   8  11  11   7   9  14  10   9   8 
## 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 
##   3  15  11   7  10   7   7   9   7   8   6   6   8  13  11  11  10   6  19   9 
## 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 
##   8  15   8   6   5  10  12   9  13  13   4  10  11   8   5  17   9   5  11  10 
## 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 
##  14  12  11   6  11   7   9   7  10  11  10   6  10  13  13  13   8  15   3   8 
## 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 
##   8  11  11   7   9   9   8   6  11  12  15   8  13   8  11  12  10   4   3   9 
## 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 
##   7  11  11   8   7  14   9  15   7  11   8  10  14   9   9   5   9   5   8   5 
## 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 
##  10  14   5   5  15   7  16   4   9   9   5  12  12   7  12   6   6   6   7   9 
## 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 
##   7  12   6   7   7   9   9   6   6  11   7   8  10   5   8   6   2   6  13   7 
## 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 
##   6  10  11   6   7   7   7  10  12   9   6  10   5   4   9  11   8   6   3  13 
## 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 
##  12   8  10  13  10   8  10   8  10   9   2   7   5   7   7   7   5   7   6   9 
## 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 
##   7   8  10   9   5   4   9  11   7   9  10   9  13   7   9   8   8   3   8  10 
## 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 
##   6  12   4   7   6  13   6  10  13   5   4   6   7   6  10  10  11  12   4   4 
## 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 
##   3   7   9   9  11   6   5   5   3   4   7   9   7   4   3  10   3   5   7   8 
## 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 
##   7   8   5   4  15   5   7   4   5   8   7   8   3   9   5   7   7   5   3   4 
## 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 
##   2  10   6   5   8   4   7   6   1   9   5   4   5   6   3   3   7   2   1   2 
## 943 944 945 946 947 948 951 952 954 955 956 959 963 969 970 972 
##   1   2   1   1   1   1   3   1   1   1   1   1   1   1   1   1 
## 1 least connected region:
## 1523 with 1 link
## 1 most connected region:
## 4154 with 972 links
## 
## Weights style: W 
## Weights constants summary:
##      n       nn   S0       S1       S2
## W 5562 30935844 5562 45.46846 22375.69

6.5.1 Perform Moran’s I test for residual spatial autocorrelation

lm.morantest(muni_cities.mlr, nb_lw)
## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = GDP_CAPITA ~ IDHM_Longevidade + IDHM_Educacao +
## GVA_AGROPEC + GVA_INDUSTRY + GVA_PUBLIC + URBANIZATION_INDEX, data =
## muni_cities_analysis.sf)
## weights: nb_lw
## 
## Moran I statistic standard deviate = 27.359, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##     3.088069e-02    -3.794817e-04     1.305473e-06

The Global Moran’s I test for residual spatial autocorrelation shows that it’s p-value is less than 2.2e-16 which is less than the alpha value of 0.05. Hence, we will reject the null hypothesis that the residuals are randomly distributed.

Since the Observed Global Moran I = 3.088069e-02 which is greater than 0, we can infer than the residuals resemble cluster distribution.

7 Building Hedonic Models using GWmodel

7.1 Building Fixed Bandwidth GWR Model

7.1.1 Computing fixed bandwith

bw.fixed <- bw.gwr(formula = GDP_CAPITA ~ IDHM_Longevidade + IDHM_Educacao + GVA_AGROPEC + GVA_INDUSTRY  + GVA_PUBLIC + URBANIZATION_INDEX, data=muni_cities_res.sp, approach="CV", kernel="gaussian", adaptive=FALSE, longlat=FALSE)
## Take a cup of tea and have a break, it will take a few minutes.
##           -----A kind suggestion from GWmodel development group
## Fixed bandwidth: 34.48404 CV score: 17.01473 
## Fixed bandwidth: 21.31657 CV score: 17.07734 
## Fixed bandwidth: 42.62198 CV score: 17.00009 
## Fixed bandwidth: 47.65151 CV score: 16.99452 
## Fixed bandwidth: 50.75993 CV score: 16.99187 
## Fixed bandwidth: 52.68103 CV score: 16.99046 
## Fixed bandwidth: 53.86834 CV score: 16.98967 
## Fixed bandwidth: 54.60214 CV score: 16.9892 
## Fixed bandwidth: 55.05565 CV score: 16.98892 
## Fixed bandwidth: 55.33594 CV score: 16.98876 
## Fixed bandwidth: 55.50917 CV score: 16.98865 
## Fixed bandwidth: 55.61623 CV score: 16.98859

The result shows that the recommended bandwidth is 55.61623.

7.1.2 Calibrate the gwr model using fixed bandwidth

gwr.fixed <- gwr.basic(formula = GDP_CAPITA ~ IDHM_Longevidade + IDHM_Educacao + GVA_AGROPEC + GVA_INDUSTRY  + GVA_PUBLIC + URBANIZATION_INDEX, data=muni_cities_res.sp, bw=bw.fixed, kernel = 'gaussian', longlat = FALSE)
## Warning in proj4string(data): CRS object has comment, which is lost in output

Display the model output:

gwr.fixed
##    ***********************************************************************
##    *                       Package   GWmodel                             *
##    ***********************************************************************
##    Program starts at: 2020-05-31 21:28:58 
##    Call:
##    gwr.basic(formula = GDP_CAPITA ~ IDHM_Longevidade + IDHM_Educacao + 
##     GVA_AGROPEC + GVA_INDUSTRY + GVA_PUBLIC + URBANIZATION_INDEX, 
##     data = muni_cities_res.sp, bw = bw.fixed, kernel = "gaussian", 
##     longlat = FALSE)
## 
##    Dependent (y) variable:  GDP_CAPITA
##    Independent variables:  IDHM_Longevidade IDHM_Educacao GVA_AGROPEC GVA_INDUSTRY GVA_PUBLIC URBANIZATION_INDEX
##    Number of data points: 5562
##    ***********************************************************************
##    *                    Results of Global Regression                     *
##    ***********************************************************************
## 
##    Call:
##     lm(formula = formula, data = data)
## 
##    Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.51619 -0.02253 -0.00861  0.00784  0.85222 
## 
##    Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
##    (Intercept)        -0.297620   0.014894  -19.98   <2e-16 ***
##    IDHM_Longevidade    0.316803   0.023170   13.67   <2e-16 ***
##    IDHM_Educacao       0.178091   0.012365   14.40   <2e-16 ***
##    GVA_AGROPEC         0.217097   0.011724   18.52   <2e-16 ***
##    GVA_INDUSTRY        1.011883   0.055672   18.18   <2e-16 ***
##    GVA_PUBLIC         -0.816760   0.066970  -12.20   <2e-16 ***
##    URBANIZATION_INDEX -0.009272   0.004177   -2.22   0.0265 *  
## 
##    ---Significance stars
##    Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
##    Residual standard error: 0.05416 on 5555 degrees of freedom
##    Multiple R-squared: 0.3117
##    Adjusted R-squared: 0.3109 
##    F-statistic: 419.2 on 6 and 5555 DF,  p-value: < 2.2e-16 
##    ***Extra Diagnostic information
##    Residual sum of squares: 16.29655
##    Sigma(hat): 0.05413902
##    AIC:  -16641.54
##    AICc:  -16641.51
##    ***********************************************************************
##    *          Results of Geographically Weighted Regression              *
##    ***********************************************************************
## 
##    *********************Model calibration information*********************
##    Kernel function: gaussian 
##    Fixed bandwidth: 55.61623 
##    Regression points: the same locations as observations are used.
##    Distance metric: Euclidean distance metric is used.
## 
##    ****************Summary of GWR coefficient estimates:******************
##                             Min.    1st Qu.     Median    3rd Qu.    Max.
##    Intercept          -0.3004568 -0.2975073 -0.2968356 -0.2960530 -0.2946
##    IDHM_Longevidade    0.3130635  0.3143717  0.3152692  0.3159543  0.3212
##    IDHM_Educacao       0.1750872  0.1769845  0.1788888  0.1798482  0.1819
##    GVA_AGROPEC         0.2167823  0.2174414  0.2175341  0.2175978  0.2177
##    GVA_INDUSTRY        0.9927010  1.0153373  1.0193995  1.0224757  1.0276
##    GVA_PUBLIC         -0.8346534 -0.8275840 -0.8242750 -0.8182970 -0.7994
##    URBANIZATION_INDEX -0.0104319 -0.0096507 -0.0092897 -0.0085478 -0.0080
##    ************************Diagnostic information*************************
##    Number of data points: 5562 
##    Effective number of parameters (2trace(S) - trace(S'S)): 7.39543 
##    Effective degrees of freedom (n-2trace(S) + trace(S'S)): 5554.605 
##    AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): -16649.16 
##    AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): -16658.39 
##    Residual sum of squares: 16.27297 
##    R-square value:  0.3126563 
##    Adjusted R-square value:  0.311741 
## 
##    ***********************************************************************
##    Program stops at: 2020-05-31 21:29:11

7.2 Building Adaptive Bandwidth GWR Model

7.2.1 Computing the adaptive bandwidth

dist <- gw.dist(dp.locat = coordinates(muni_cities_res.sp))
bw.adaptive <- bw.gwr(formula = GDP_CAPITA ~ IDHM_Longevidade + IDHM_Educacao + GVA_AGROPEC + GVA_INDUSTRY  + GVA_PUBLIC + URBANIZATION_INDEX, data=muni_cities_res.sp, approach="CV", kernel="gaussian", dMat = dist,
adaptive=TRUE, longlat=FALSE)
## Take a cup of tea and have a break, it will take a few minutes.
##           -----A kind suggestion from GWmodel development group
## Adaptive bandwidth: 3445 CV score: 17.23047 
## Adaptive bandwidth: 2137 CV score: 17.11392 
## Adaptive bandwidth: 1328 CV score: 16.77703 
## Adaptive bandwidth: 828 CV score: 16.47517 
## Adaptive bandwidth: 519 CV score: 16.22232 
## Adaptive bandwidth: 328 CV score: 15.99215 
## Adaptive bandwidth: 210 CV score: 16.0175 
## Adaptive bandwidth: 401 CV score: 16.07424 
## Adaptive bandwidth: 283 CV score: 15.97288 
## Adaptive bandwidth: 255 CV score: 15.97195 
## Adaptive bandwidth: 237 CV score: 15.99858 
## Adaptive bandwidth: 265 CV score: 15.97461 
## Adaptive bandwidth: 247 CV score: 15.9919 
## Adaptive bandwidth: 258 CV score: 15.97075 
## Adaptive bandwidth: 262 CV score: 15.97355 
## Adaptive bandwidth: 257 CV score: 15.97418 
## Adaptive bandwidth: 260 CV score: 15.96914 
## Adaptive bandwidth: 260 CV score: 15.96914

The result shows that the 260 is the recommended data points to be used.

7.2.2 Constructing the adaptive bandwidth gwr model

Now, we can go ahead to calibrate the gwr-based hedonic pricing model by using adaptive bandwidth and gaussian kernel as shown in the code chunk below.

gwr.adaptive <- gwr.basic(formula = GDP_CAPITA ~ IDHM_Longevidade + IDHM_Educacao + GVA_AGROPEC + GVA_INDUSTRY  + GVA_PUBLIC + URBANIZATION_INDEX, data=muni_cities_res.sp, bw=bw.adaptive, kernel = 'gaussian', adaptive=TRUE, longlat = FALSE)
## Warning in proj4string(data): CRS object has comment, which is lost in output

Display the model output:

gwr.adaptive
##    ***********************************************************************
##    *                       Package   GWmodel                             *
##    ***********************************************************************
##    Program starts at: 2020-05-31 21:30:51 
##    Call:
##    gwr.basic(formula = GDP_CAPITA ~ IDHM_Longevidade + IDHM_Educacao + 
##     GVA_AGROPEC + GVA_INDUSTRY + GVA_PUBLIC + URBANIZATION_INDEX, 
##     data = muni_cities_res.sp, bw = bw.adaptive, kernel = "gaussian", 
##     adaptive = TRUE, longlat = FALSE)
## 
##    Dependent (y) variable:  GDP_CAPITA
##    Independent variables:  IDHM_Longevidade IDHM_Educacao GVA_AGROPEC GVA_INDUSTRY GVA_PUBLIC URBANIZATION_INDEX
##    Number of data points: 5562
##    ***********************************************************************
##    *                    Results of Global Regression                     *
##    ***********************************************************************
## 
##    Call:
##     lm(formula = formula, data = data)
## 
##    Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.51619 -0.02253 -0.00861  0.00784  0.85222 
## 
##    Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
##    (Intercept)        -0.297620   0.014894  -19.98   <2e-16 ***
##    IDHM_Longevidade    0.316803   0.023170   13.67   <2e-16 ***
##    IDHM_Educacao       0.178091   0.012365   14.40   <2e-16 ***
##    GVA_AGROPEC         0.217097   0.011724   18.52   <2e-16 ***
##    GVA_INDUSTRY        1.011883   0.055672   18.18   <2e-16 ***
##    GVA_PUBLIC         -0.816760   0.066970  -12.20   <2e-16 ***
##    URBANIZATION_INDEX -0.009272   0.004177   -2.22   0.0265 *  
## 
##    ---Significance stars
##    Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
##    Residual standard error: 0.05416 on 5555 degrees of freedom
##    Multiple R-squared: 0.3117
##    Adjusted R-squared: 0.3109 
##    F-statistic: 419.2 on 6 and 5555 DF,  p-value: < 2.2e-16 
##    ***Extra Diagnostic information
##    Residual sum of squares: 16.29655
##    Sigma(hat): 0.05413902
##    AIC:  -16641.54
##    AICc:  -16641.51
##    ***********************************************************************
##    *          Results of Geographically Weighted Regression              *
##    ***********************************************************************
## 
##    *********************Model calibration information*********************
##    Kernel function: gaussian 
##    Adaptive bandwidth: 260 (number of nearest neighbours)
##    Regression points: the same locations as observations are used.
##    Distance metric: Euclidean distance metric is used.
## 
##    ****************Summary of GWR coefficient estimates:******************
##                             Min.    1st Qu.     Median    3rd Qu.   Max.
##    Intercept          -0.3413540 -0.2188945 -0.1548191 -0.0523613 0.0271
##    IDHM_Longevidade   -0.0617984  0.0592798  0.1545290  0.2211450 0.3936
##    IDHM_Educacao       0.0109626  0.0317312  0.1388882  0.1902711 0.2554
##    GVA_AGROPEC         0.0025691  0.1626542  0.1995342  0.2207908 0.3837
##    GVA_INDUSTRY        0.3192241  1.1012191  2.3192578  3.5080612 6.9694
##    GVA_PUBLIC         -4.5215898 -2.5039254 -1.6814028 -0.6841106 0.0653
##    URBANIZATION_INDEX -0.0359647 -0.0148809  0.0069244  0.0156982 0.0809
##    ************************Diagnostic information*************************
##    Number of data points: 5562 
##    Effective number of parameters (2trace(S) - trace(S'S)): 101.8268 
##    Effective degrees of freedom (n-2trace(S) + trace(S'S)): 5460.173 
##    AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): -17621.96 
##    AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): -17698.89 
##    Residual sum of squares: 13.33803 
##    R-square value:  0.4366233 
##    Adjusted R-square value:  0.426115 
## 
##    ***********************************************************************
##    Program stops at: 2020-05-31 21:31:10

8 Visualising GWR Output

Here I will be using fixed bandwith GWR model.

8.1 Convert GWR output SDF into sf. data frame

muni_cities.sf.fixed <- st_as_sf(gwr.fixed$SDF) %>%
  st_transform(crs=4674)
gwr.fixed.output <- as.data.frame(gwr.fixed$SDF)
muni_cities.sf.fixed <- cbind(muni_cities.res.sf, as.matrix(gwr.fixed.output))
muni_cities.sf.fixed
## Simple feature collection with 5562 features and 38 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -73.99045 ymin: -33.75118 xmax: -28.83591 ymax: 4.884623
## CRS:            4674
## First 10 features:
##               NM_MUNICIP                  CITY STATE IDHM_Educacao
## 1  ALTA FLORESTA D'OESTE Alta Floresta D'Oeste    RO         0.526
## 2              ARIQUEMES             Ariquemes    RO         0.600
## 3                 CABIXI                Cabixi    RO         0.559
## 4                 CACOAL                Cacoal    RO         0.620
## 5             CEREJEIRAS            Cerejeiras    RO         0.602
## 6      COLORADO DO OESTE     Colorado Do Oeste    RO         0.584
## 7             CORUMBIARA            Corumbiara    RO         0.473
## 8          COSTA MARQUES         Costa Marques    RO         0.493
## 9        ESPIGÃO D'OESTE       Espigão D'Oeste    RO         0.536
## 10         GUAJARÁ-MIRIM         Guajará-Mirim    RO         0.519
##    IDHM_Longevidade GVA_AGROPEC GVA_INDUSTRY   GVA_PUBLIC URBANIZATION_INDEX
## 1             0.763  0.11848071 4.855722e-07 3.405978e-03          0.5918087
## 2             0.806  0.10345193 5.578592e-03 1.405826e-02          0.8434058
## 3             0.757  0.04213210 7.302128e-05 9.554075e-04          0.4482062
## 4             0.821  0.13407054 3.634820e-03 1.157999e-02          0.8072780
## 5             0.799  0.03771807 3.545427e-04 2.374261e-03          0.8531612
## 6             0.814  0.05183320 4.397635e-07 2.405381e-03          0.7448843
## 7             0.774  0.09425845 1.584682e-04 1.322334e-03          0.3141243
## 8             0.751  0.04461417 1.045577e-04 1.998908e-06          0.5325444
## 9             0.819  0.08232194 8.978652e-04 4.074391e-03          0.7543714
## 10            0.823  0.03560563 4.618831e-04 5.811294e-06          0.9295309
##    GDP_CAPITA      MLR_RES  Intercept IDHM_Longevidade.1 IDHM_Educacao.1
## 1  0.04990125 -0.005328596 -0.2989687          0.3186823       0.1782066
## 2  0.05595688 -0.017422847 -0.2990383          0.3189523       0.1777577
## 3  0.05783450  0.011797131 -0.2987542          0.3183033       0.1783610
## 4  0.06081357 -0.027919487 -0.2988266          0.3185545       0.1779754
## 5  0.06270891  0.001294792 -0.2988367          0.3184414       0.1783226
## 6  0.04244695 -0.024198121 -0.2987387          0.3183021       0.1783016
## 7  0.07657810  0.028124509 -0.2988108          0.3184223       0.1782689
## 8  0.02861561 -0.004334422 -0.2992103          0.3190553       0.1781789
## 9  0.04321665 -0.022540017 -0.2987526          0.3184439       0.1779746
## 10 0.04026828 -0.014843935 -0.2992684          0.3191960       0.1780334
##    GVA_AGROPEC.1 GVA_INDUSTRY.1 GVA_PUBLIC.1 URBANIZATION_INDEX.1          y
## 1      0.2171666       1.007059   -0.8143037         -0.009482365 0.04990125
## 2      0.2171032       1.004586   -0.8118081         -0.009355818 0.05595688
## 3      0.2172140       1.008736   -0.8157526         -0.009487759 0.05783450
## 4      0.2171588       1.006584   -0.8135944         -0.009381812 0.06081357
## 5      0.2171983       1.008193   -0.8153023         -0.009492257 0.06270891
## 6      0.2172092       1.008522   -0.8155051         -0.009466126 0.04244695
## 7      0.2171956       1.008052   -0.8151155         -0.009470285 0.07657810
## 8      0.2171301       1.005854   -0.8133930         -0.009522025 0.02861561
## 9      0.2171688       1.006909   -0.8138275         -0.009366906 0.04321665
## 10     0.2171047       1.004888   -0.8124650         -0.009487992 0.04026828
##          yhat     residual CV_Score Stud_residual Intercept_SE
## 1  0.05526785 -0.005366602        0   -0.09919428   0.01488882
## 2  0.07345246 -0.017495581        0   -0.32336196   0.01488893
## 3  0.04609872  0.011735787        0    0.21692666   0.01488812
## 4  0.08882957 -0.028016004        0   -0.51783397   0.01488812
## 5  0.06146372  0.001245191        0    0.02301080   0.01488840
## 6  0.06673362 -0.024286669        0   -0.44876555   0.01488800
## 7  0.04854880  0.028029301        0    0.51811036   0.01488824
## 8  0.03296209 -0.004346480        0   -0.08032619   0.01488989
## 9  0.06584716 -0.022630514        0   -0.41822424   0.01488786
## 10 0.05519942 -0.014931140        0   -0.27604559   0.01489004
##    IDHM_Longevidade_SE IDHM_Educacao_SE GVA_AGROPEC_SE GVA_INDUSTRY_SE
## 1           0.02316136       0.01236128     0.01172019      0.05565055
## 2           0.02316156       0.01236166     0.01172100      0.05566089
## 3           0.02316028       0.01236045     0.01171934      0.05564431
## 4           0.02316023       0.01236058     0.01171989      0.05565101
## 5           0.02316071       0.01236076     0.01171963      0.05564623
## 6           0.02316007       0.01236033     0.01171932      0.05564468
## 7           0.02316045       0.01236060     0.01171958      0.05564639
## 8           0.02316307       0.01236254     0.01172118      0.05565704
## 9           0.02315981       0.01236027     0.01171964      0.05564937
## 10          0.02316332       0.01236283     0.01172159      0.05566163
##    GVA_PUBLIC_SE URBANIZATION_INDEX_SE Intercept_TV IDHM_Longevidade_TV
## 1     0.06694256           0.004175245    -20.08008            13.75922
## 2     0.06694974           0.004175247    -20.08460            13.77076
## 3     0.06693731           0.004175017    -20.06661            13.74350
## 4     0.06694169           0.004174976    -20.07148            13.75437
## 5     0.06693901           0.004175109    -20.07178            13.74921
## 6     0.06693734           0.004174967    -20.06574            13.74357
## 7     0.06693885           0.004175047    -20.07025            13.74854
## 8     0.06694866           0.004175625    -20.09487            13.77431
## 9     0.06694013           0.004174883    -20.06687            13.74985
## 10    0.06695205           0.004175666    -20.09856            13.78024
##    IDHM_Educacao_TV GVA_AGROPEC_TV GVA_INDUSTRY_TV GVA_PUBLIC_TV
## 1          14.41652       18.52928        18.09612     -12.16422
## 2          14.37975       18.52259        18.04833     -12.12564
## 3          14.42998       18.53467        18.12828     -12.18681
## 4          14.39863       18.52907        18.08743     -12.15378
## 5          14.42651       18.53286        18.11790     -12.17978
## 6          14.42531       18.53429        18.12433     -12.18311
## 7          14.42235       18.53272        18.11531     -12.17702
## 8          14.41281       18.52459        18.07236     -12.14950
## 9          14.39893       18.53034        18.09381     -12.15754
## 10         14.40069       18.52178        18.05352     -12.13503
##    URBANIZATION_INDEX_TV  Local_R2                       geometry
## 1              -2.271092 0.3111511 MULTIPOLYGON (((-62.19465 -...
## 2              -2.240782 0.3115452 MULTIPOLYGON (((-62.53648 -...
## 3              -2.272508 0.3111230 MULTIPOLYGON (((-60.37075 -...
## 4              -2.247153 0.3114546 MULTIPOLYGON (((-61.0008 -1...
## 5              -2.273535 0.3111137 MULTIPOLYGON (((-61.49976 -...
## 6              -2.267354 0.3111890 MULTIPOLYGON (((-60.50475 -...
## 7              -2.268306 0.3111801 MULTIPOLYGON (((-61.34273 -...
## 8              -2.280383 0.3110411 MULTIPOLYGON (((-63.71199 -...
## 9              -2.243633 0.3114970 MULTIPOLYGON (((-60.94827 -...
## 10             -2.272210 0.3111482 MULTIPOLYGON (((-65.37724 -...
summary(gwr.fixed$SDF$yhat)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.02797  0.03088  0.05791  0.05763  0.07949  0.67577

8.2 Visualising local R2

tm_shape(muni_cities.res.sf)+
  tm_polygons() +
tm_shape(muni_cities.sf.fixed) +  
  tm_dots(col = "Local_R2",
           border.col = "gray60",
           border.lwd = 1) 

8.3 Visualising Intercept SE

qtm(muni_cities.sf.fixed, "Intercept_SE", border=NULL)

8.4 VIsualising Residuals

qtm(muni_cities.sf.fixed, "residual", border=NULL)

8.5 Visualising Y hat

qtm(muni_cities.sf.fixed, "yhat", border=NULL)