0 Lectura de la tabla madre

tabla_001 <- readRDS("tabla_001.rds")
r3_100 <- tabla_001[c(1:100),]
kbl(r3_100) %>%
  kable_styling(bootstrap_options = c("striped", "hover")) %>%
  kable_paper() %>%
  scroll_box(width = "100%", height = "300px")
zona código.x Freq.x anio p comuna.x promedio_i año comuna.y personas Ingresos_expandidos Freq.y p_poblacional código.y multi_pob p_variable
10101011001 10101 60 2017 0.0055366 Puerto Montt 268732.4 2017 10101 245902 66081845388 584 0.0023749 10101 156939747 0.0002440
10101011002 10101 177 2017 0.0163329 Puerto Montt 268732.4 2017 10101 245902 66081845388 2941 0.0119600 10101 790342117 0.0007198
10101021001 10101 82 2017 0.0075667 Puerto Montt 268732.4 2017 10101 245902 66081845388 3953 0.0160755 10101 1062299350 0.0003335
10101021002 10101 77 2017 0.0071053 Puerto Montt 268732.4 2017 10101 245902 66081845388 1107 0.0045018 10101 297486815 0.0003131
10101021003 10101 70 2017 0.0064594 Puerto Montt 268732.4 2017 10101 245902 66081845388 2294 0.0093289 10101 616472226 0.0002847
10101021004 10101 99 2017 0.0091354 Puerto Montt 268732.4 2017 10101 245902 66081845388 3391 0.0137900 10101 911271717 0.0004026
10101021005 10101 171 2017 0.0157793 Puerto Montt 268732.4 2017 10101 245902 66081845388 2564 0.0104269 10101 689029986 0.0006954
10101031001 10101 133 2017 0.0122728 Puerto Montt 268732.4 2017 10101 245902 66081845388 4530 0.0184220 10101 1217357970 0.0005409
10101031002 10101 115 2017 0.0106118 Puerto Montt 268732.4 2017 10101 245902 66081845388 4740 0.0192760 10101 1273791783 0.0004677
10101031003 10101 94 2017 0.0086740 Puerto Montt 268732.4 2017 10101 245902 66081845388 4107 0.0167018 10101 1103684147 0.0003823
10101031004 10101 88 2017 0.0081203 Puerto Montt 268732.4 2017 10101 245902 66081845388 2856 0.0116144 10101 767499859 0.0003579
10101031005 10101 146 2017 0.0134724 Puerto Montt 268732.4 2017 10101 245902 66081845388 5690 0.0231393 10101 1529087605 0.0005937
10101031006 10101 94 2017 0.0086740 Puerto Montt 268732.4 2017 10101 245902 66081845388 2460 0.0100040 10101 661081812 0.0003823
10101031007 10101 39 2017 0.0035988 Puerto Montt 268732.4 2017 10101 245902 66081845388 2292 0.0093208 10101 615934761 0.0001586
10101031008 10101 54 2017 0.0049829 Puerto Montt 268732.4 2017 10101 245902 66081845388 3585 0.0145790 10101 963405811 0.0002196
10101031009 10101 166 2017 0.0153179 Puerto Montt 268732.4 2017 10101 245902 66081845388 4436 0.0180397 10101 1192097121 0.0006751
10101031010 10101 92 2017 0.0084894 Puerto Montt 268732.4 2017 10101 245902 66081845388 3566 0.0145017 10101 958299894 0.0003741
10101031011 10101 49 2017 0.0045215 Puerto Montt 268732.4 2017 10101 245902 66081845388 2757 0.0112118 10101 740895347 0.0001993
10101031012 10101 94 2017 0.0086740 Puerto Montt 268732.4 2017 10101 245902 66081845388 1849 0.0075193 10101 496886289 0.0003823
10101031013 10101 73 2017 0.0067362 Puerto Montt 268732.4 2017 10101 245902 66081845388 3945 0.0160430 10101 1060149491 0.0002969
10101031014 10101 109 2017 0.0100581 Puerto Montt 268732.4 2017 10101 245902 66081845388 2265 0.0092110 10101 608678985 0.0004433
10101031015 10101 31 2017 0.0028606 Puerto Montt 268732.4 2017 10101 245902 66081845388 1930 0.0078487 10101 518653616 0.0001261
10101031016 10101 248 2017 0.0228846 Puerto Montt 268732.4 2017 10101 245902 66081845388 3071 0.0124887 10101 825277335 0.0010085
10101031017 10101 60 2017 0.0055366 Puerto Montt 268732.4 2017 10101 245902 66081845388 3885 0.0157990 10101 1044025544 0.0002440
10101032002 10101 2 2017 0.0001846 Puerto Montt 268732.4 2017 10101 245902 66081845388 129 0.0005246 10101 34666485 0.0000081
10101032011 10101 20 2017 0.0018455 Puerto Montt 268732.4 2017 10101 245902 66081845388 426 0.0017324 10101 114480021 0.0000813
10101032019 10101 32 2017 0.0029528 Puerto Montt 268732.4 2017 10101 245902 66081845388 829 0.0033713 10101 222779196 0.0001301
10101041001 10101 70 2017 0.0064594 Puerto Montt 268732.4 2017 10101 245902 66081845388 4342 0.0176574 10101 1166836271 0.0002847
10101041002 10101 55 2017 0.0050752 Puerto Montt 268732.4 2017 10101 245902 66081845388 2169 0.0088206 10101 582880671 0.0002237
10101041003 10101 774 2017 0.0714220 Puerto Montt 268732.4 2017 10101 245902 66081845388 5202 0.0211548 10101 1397946172 0.0031476
10101051001 10101 246 2017 0.0227000 Puerto Montt 268732.4 2017 10101 245902 66081845388 2463 0.0100162 10101 661888009 0.0010004
10101051002 10101 33 2017 0.0030451 Puerto Montt 268732.4 2017 10101 245902 66081845388 1913 0.0077795 10101 514085165 0.0001342
10101051003 10101 65 2017 0.0059980 Puerto Montt 268732.4 2017 10101 245902 66081845388 3272 0.0133061 10101 879292556 0.0002643
10101051004 10101 307 2017 0.0283289 Puerto Montt 268732.4 2017 10101 245902 66081845388 3633 0.0147742 10101 976304968 0.0012485
10101061001 10101 1239 2017 0.1143305 Puerto Montt 268732.4 2017 10101 245902 66081845388 6787 0.0276004 10101 1823887096 0.0050386
10101061002 10101 329 2017 0.0303590 Puerto Montt 268732.4 2017 10101 245902 66081845388 2729 0.0110979 10101 733370839 0.0013379
10101061003 10101 160 2017 0.0147642 Puerto Montt 268732.4 2017 10101 245902 66081845388 3668 0.0149165 10101 985710604 0.0006507
10101061004 10101 110 2017 0.0101504 Puerto Montt 268732.4 2017 10101 245902 66081845388 2995 0.0121796 10101 804853669 0.0004473
10101061005 10101 312 2017 0.0287903 Puerto Montt 268732.4 2017 10101 245902 66081845388 2571 0.0104554 10101 690911113 0.0012688
10101061006 10101 401 2017 0.0370029 Puerto Montt 268732.4 2017 10101 245902 66081845388 4130 0.0167953 10101 1109864993 0.0016307
10101061007 10101 12 2017 0.0011073 Puerto Montt 268732.4 2017 10101 245902 66081845388 817 0.0033225 10101 219554407 0.0000488
10101061008 10101 388 2017 0.0358033 Puerto Montt 268732.4 2017 10101 245902 66081845388 2109 0.0085766 10101 566756724 0.0015779
10101061009 10101 1 2017 0.0000923 Puerto Montt 268732.4 2017 10101 245902 66081845388 168 0.0006832 10101 45147051 0.0000041
10101061010 10101 6 2017 0.0005537 Puerto Montt 268732.4 2017 10101 245902 66081845388 1543 0.0062749 10101 414654161 0.0000244
10101062003 10101 10 2017 0.0009228 Puerto Montt 268732.4 2017 10101 245902 66081845388 158 0.0006425 10101 42459726 0.0000407
10101062008 10101 72 2017 0.0066439 Puerto Montt 268732.4 2017 10101 245902 66081845388 581 0.0023627 10101 156133550 0.0002928
10101062013 10101 61 2017 0.0056289 Puerto Montt 268732.4 2017 10101 245902 66081845388 571 0.0023221 10101 153446225 0.0002481
10101062029 10101 1 2017 0.0000923 Puerto Montt 268732.4 2017 10101 245902 66081845388 47 0.0001911 10101 12630425 0.0000041
10101062039 10101 4 2017 0.0003691 Puerto Montt 268732.4 2017 10101 245902 66081845388 67 0.0002725 10101 18005074 0.0000163
10101071001 10101 20 2017 0.0018455 Puerto Montt 268732.4 2017 10101 245902 66081845388 2352 0.0095648 10101 632058708 0.0000813
10101071002 10101 54 2017 0.0049829 Puerto Montt 268732.4 2017 10101 245902 66081845388 3919 0.0159372 10101 1053162447 0.0002196
10101071003 10101 112 2017 0.0103350 Puerto Montt 268732.4 2017 10101 245902 66081845388 4978 0.0202438 10101 1337750105 0.0004555
10101071004 10101 75 2017 0.0069207 Puerto Montt 268732.4 2017 10101 245902 66081845388 3443 0.0140015 10101 925245804 0.0003050
10101071005 10101 61 2017 0.0056289 Puerto Montt 268732.4 2017 10101 245902 66081845388 2751 0.0111874 10101 739282953 0.0002481
10101071006 10101 60 2017 0.0055366 Puerto Montt 268732.4 2017 10101 245902 66081845388 4214 0.0171369 10101 1132438518 0.0002440
10101071007 10101 29 2017 0.0026760 Puerto Montt 268732.4 2017 10101 245902 66081845388 2345 0.0095363 10101 630177581 0.0001179
10101071008 10101 77 2017 0.0071053 Puerto Montt 268732.4 2017 10101 245902 66081845388 5480 0.0222853 10101 1472653792 0.0003131
10101071009 10101 49 2017 0.0045215 Puerto Montt 268732.4 2017 10101 245902 66081845388 3549 0.0144326 10101 953731443 0.0001993
10101071010 10101 48 2017 0.0044293 Puerto Montt 268732.4 2017 10101 245902 66081845388 3521 0.0143187 10101 946206935 0.0001952
10101071011 10101 43 2017 0.0039679 Puerto Montt 268732.4 2017 10101 245902 66081845388 3094 0.0125822 10101 831458181 0.0001749
10101071012 10101 47 2017 0.0043370 Puerto Montt 268732.4 2017 10101 245902 66081845388 2621 0.0106587 10101 704347735 0.0001911
10101071014 10101 26 2017 0.0023992 Puerto Montt 268732.4 2017 10101 245902 66081845388 875 0.0035583 10101 235140888 0.0001057
10101072014 10101 36 2017 0.0033220 Puerto Montt 268732.4 2017 10101 245902 66081845388 997 0.0040545 10101 267926246 0.0001464
10101072021 10101 4 2017 0.0003691 Puerto Montt 268732.4 2017 10101 245902 66081845388 44 0.0001789 10101 11824228 0.0000163
10101072028 10101 4 2017 0.0003691 Puerto Montt 268732.4 2017 10101 245902 66081845388 145 0.0005897 10101 38966204 0.0000163
10101072029 10101 36 2017 0.0033220 Puerto Montt 268732.4 2017 10101 245902 66081845388 1051 0.0042741 10101 282437798 0.0001464
10101072036 10101 1 2017 0.0000923 Puerto Montt 268732.4 2017 10101 245902 66081845388 118 0.0004799 10101 31710428 0.0000041
10101072045 10101 7 2017 0.0006459 Puerto Montt 268732.4 2017 10101 245902 66081845388 113 0.0004595 10101 30366766 0.0000285
10101082016 10101 13 2017 0.0011996 Puerto Montt 268732.4 2017 10101 245902 66081845388 121 0.0004921 10101 32516626 0.0000529
10101082017 10101 5 2017 0.0004614 Puerto Montt 268732.4 2017 10101 245902 66081845388 38 0.0001545 10101 10211833 0.0000203
10101082018 10101 13 2017 0.0011996 Puerto Montt 268732.4 2017 10101 245902 66081845388 623 0.0025335 10101 167420312 0.0000529
10101082030 10101 3 2017 0.0002768 Puerto Montt 268732.4 2017 10101 245902 66081845388 176 0.0007157 10101 47296910 0.0000122
10101082034 10101 5 2017 0.0004614 Puerto Montt 268732.4 2017 10101 245902 66081845388 66 0.0002684 10101 17736341 0.0000203
10101082042 10101 12 2017 0.0011073 Puerto Montt 268732.4 2017 10101 245902 66081845388 253 0.0010289 10101 67989308 0.0000488
10101082045 10101 6 2017 0.0005537 Puerto Montt 268732.4 2017 10101 245902 66081845388 123 0.0005002 10101 33054091 0.0000244
10101092004 10101 6 2017 0.0005537 Puerto Montt 268732.4 2017 10101 245902 66081845388 97 0.0003945 10101 26067047 0.0000244
10101092008 10101 83 2017 0.0076589 Puerto Montt 268732.4 2017 10101 245902 66081845388 752 0.0030581 10101 202086798 0.0003375
10101092037 10101 11 2017 0.0010150 Puerto Montt 268732.4 2017 10101 245902 66081845388 276 0.0011224 10101 74170154 0.0000447
10101092040 10101 33 2017 0.0030451 Puerto Montt 268732.4 2017 10101 245902 66081845388 509 0.0020699 10101 136784814 0.0001342
10101092041 10101 44 2017 0.0040602 Puerto Montt 268732.4 2017 10101 245902 66081845388 1683 0.0068442 10101 452276703 0.0001789
10101092044 10101 21 2017 0.0019378 Puerto Montt 268732.4 2017 10101 245902 66081845388 530 0.0021553 10101 142428195 0.0000854
10101102005 10101 1 2017 0.0000923 Puerto Montt 268732.4 2017 10101 245902 66081845388 147 0.0005978 10101 39503669 0.0000041
10101102007 10101 12 2017 0.0011073 Puerto Montt 268732.4 2017 10101 245902 66081845388 824 0.0033509 10101 221435534 0.0000488
10101102035 10101 22 2017 0.0020301 Puerto Montt 268732.4 2017 10101 245902 66081845388 940 0.0038227 10101 252608497 0.0000895
10101102037 10101 3 2017 0.0002768 Puerto Montt 268732.4 2017 10101 245902 66081845388 164 0.0006669 10101 44072121 0.0000122
10101102051 10101 1 2017 0.0000923 Puerto Montt 268732.4 2017 10101 245902 66081845388 57 0.0002318 10101 15317749 0.0000041
10101112025 10101 13 2017 0.0011996 Puerto Montt 268732.4 2017 10101 245902 66081845388 1078 0.0043839 10101 289693574 0.0000529
10101122024 10101 6 2017 0.0005537 Puerto Montt 268732.4 2017 10101 245902 66081845388 952 0.0038715 10101 255833286 0.0000244
10101131001 10101 88 2017 0.0081203 Puerto Montt 268732.4 2017 10101 245902 66081845388 604 0.0024563 10101 162314396 0.0003579
10101132022 10101 15 2017 0.0013841 Puerto Montt 268732.4 2017 10101 245902 66081845388 703 0.0028589 10101 188918908 0.0000610
10101132023 10101 12 2017 0.0011073 Puerto Montt 268732.4 2017 10101 245902 66081845388 603 0.0024522 10101 162045664 0.0000488
10101132027 10101 1 2017 0.0000923 Puerto Montt 268732.4 2017 10101 245902 66081845388 105 0.0004270 10101 28216907 0.0000041
10101132049 10101 77 2017 0.0071053 Puerto Montt 268732.4 2017 10101 245902 66081845388 1883 0.0076575 10101 506023192 0.0003131
10101142009 10101 2 2017 0.0001846 Puerto Montt 268732.4 2017 10101 245902 66081845388 59 0.0002399 10101 15855214 0.0000081
10101142015 10101 4 2017 0.0003691 Puerto Montt 268732.4 2017 10101 245902 66081845388 124 0.0005043 10101 33322823 0.0000163
10101142027 10101 4 2017 0.0003691 Puerto Montt 268732.4 2017 10101 245902 66081845388 192 0.0007808 10101 51596629 0.0000163
10101142038 10101 3 2017 0.0002768 Puerto Montt 268732.4 2017 10101 245902 66081845388 53 0.0002155 10101 14242820 0.0000122
10101142046 10101 9 2017 0.0008305 Puerto Montt 268732.4 2017 10101 245902 66081845388 317 0.0012891 10101 85188185 0.0000366
10101142047 10101 11 2017 0.0010150 Puerto Montt 268732.4 2017 10101 245902 66081845388 263 0.0010695 10101 70676633 0.0000447
10101142049 10101 61 2017 0.0056289 Puerto Montt 268732.4 2017 10101 245902 66081845388 973 0.0039569 10101 261476668 0.0002481


nrow(tabla_001)
## [1] 12671


summary(tabla_001)
##           zona         código.x             Freq.x            anio          
##  10101011001:    1   Length:12671       Min.   :   1.00   Length:12671      
##  10101011002:    1   Class :character   1st Qu.:   3.00   Class :character  
##  10101021001:    1   Mode  :character   Median :  13.00   Mode  :character  
##  10101021002:    1                      Mean   :  88.55                     
##  10101021003:    1                      3rd Qu.:  99.00                     
##  10101021004:    1                      Max.   :2209.00                     
##  (Other)    :12665                                                          
##        p               comuna.x           promedio_i         año           
##  Min.   :0.0000161   Length:12671       Min.   :156649   Length:12671      
##  1st Qu.:0.0038380   Class :character   1st Qu.:221772   Class :character  
##  Median :0.0099865   Mode  :character   Median :258970   Mode  :character  
##  Mean   :0.0272275                      Mean   :263123                     
##  3rd Qu.:0.0259991                      3rd Qu.:297912                     
##  Max.   :1.0000000                      Max.   :469344                     
##                                         NA's   :134                        
##     comuna.y        personas      Ingresos_expandidos     Freq.y     
##  Min.   : 1101   Min.   :  1250   Min.   :2.925e+08   Min.   :    1  
##  1st Qu.: 6303   1st Qu.: 16394   1st Qu.:3.817e+09   1st Qu.:  100  
##  Median : 9104   Median : 38013   Median :8.862e+09   Median :  374  
##  Mean   : 9346   Mean   : 95867   Mean   :2.922e+10   Mean   : 1372  
##  3rd Qu.:13117   3rd Qu.:147041   3rd Qu.:4.259e+10   3rd Qu.: 2556  
##  Max.   :16305   Max.   :568106   Max.   :1.808e+11   Max.   :11700  
##  NA's   :134     NA's   :134      NA's   :134                        
##  p_poblacional         código.y           multi_pob           p_variable     
##  Min.   :0.0000166   Length:12671       Min.   :2.291e+05   Min.   :0.00000  
##  1st Qu.:0.0038052   Class :character   1st Qu.:2.338e+07   1st Qu.:0.00013  
##  Median :0.0103905   Mode  :character   Median :9.259e+07   Median :0.00038  
##  Mean   :0.0259502                      Mean   :4.130e+08   Mean   :0.00114  
##  3rd Qu.:0.0252357                      3rd Qu.:7.532e+08   3rd Qu.:0.00115  
##  Max.   :0.9060475                      Max.   :4.695e+09   Max.   :0.04290  
##                                         NA's   :134         NA's   :134

1 NAs y outliers

1.1 Existencia y tratamiento de NAs

Descubramos si los campos multi_pob y p_variable poseen valores NA

any(is.na(tabla_001$multi_pob))
## [1] TRUE
any(is.na(tabla_001$p_variable))
## [1] TRUE

Reemplazaremos los valores NAs con los promedios de cada campo, generando dos nuevas columnas:

tabla_001$multi_pob_mean <- ifelse(is.na(tabla_001$multi_pob),
                                   mean(tabla_001$multi_pob, na.rm = TRUE),
                                   tabla_001$multi_pob                             
                                   )

tabla_001$p_variable_mean <- ifelse(is.na(tabla_001$p_variable),
                                   mean(tabla_001$p_variable, na.rm = TRUE),
                                   tabla_001$p_variable                             
                                   )

Verificamos:

any(is.na(tabla_001$multi_pob_mean))
## [1] FALSE
any(is.na(tabla_001$p_variable_mean))
## [1] FALSE

1.2 Outliers

A veces los outliers pueden distorsionar mucho un modelo. La manera más fácil de identificarlos es por medio del análisis de diagramas de caja y bigotes.

Observemos cómo quedaron nuestros nuevos campos:

r3_100 <- tabla_001[c(1:100),]
kbl(r3_100) %>%
  kable_styling(bootstrap_options = c("striped", "hover")) %>%
  kable_paper() %>%
  scroll_box(width = "100%", height = "300px")
zona código.x Freq.x anio p comuna.x promedio_i año comuna.y personas Ingresos_expandidos Freq.y p_poblacional código.y multi_pob p_variable multi_pob_mean p_variable_mean
10101011001 10101 60 2017 0.0055366 Puerto Montt 268732.4 2017 10101 245902 66081845388 584 0.0023749 10101 156939747 0.0002440 156939747 0.0002440
10101011002 10101 177 2017 0.0163329 Puerto Montt 268732.4 2017 10101 245902 66081845388 2941 0.0119600 10101 790342117 0.0007198 790342117 0.0007198
10101021001 10101 82 2017 0.0075667 Puerto Montt 268732.4 2017 10101 245902 66081845388 3953 0.0160755 10101 1062299350 0.0003335 1062299350 0.0003335
10101021002 10101 77 2017 0.0071053 Puerto Montt 268732.4 2017 10101 245902 66081845388 1107 0.0045018 10101 297486815 0.0003131 297486815 0.0003131
10101021003 10101 70 2017 0.0064594 Puerto Montt 268732.4 2017 10101 245902 66081845388 2294 0.0093289 10101 616472226 0.0002847 616472226 0.0002847
10101021004 10101 99 2017 0.0091354 Puerto Montt 268732.4 2017 10101 245902 66081845388 3391 0.0137900 10101 911271717 0.0004026 911271717 0.0004026
10101021005 10101 171 2017 0.0157793 Puerto Montt 268732.4 2017 10101 245902 66081845388 2564 0.0104269 10101 689029986 0.0006954 689029986 0.0006954
10101031001 10101 133 2017 0.0122728 Puerto Montt 268732.4 2017 10101 245902 66081845388 4530 0.0184220 10101 1217357970 0.0005409 1217357970 0.0005409
10101031002 10101 115 2017 0.0106118 Puerto Montt 268732.4 2017 10101 245902 66081845388 4740 0.0192760 10101 1273791783 0.0004677 1273791783 0.0004677
10101031003 10101 94 2017 0.0086740 Puerto Montt 268732.4 2017 10101 245902 66081845388 4107 0.0167018 10101 1103684147 0.0003823 1103684147 0.0003823
10101031004 10101 88 2017 0.0081203 Puerto Montt 268732.4 2017 10101 245902 66081845388 2856 0.0116144 10101 767499859 0.0003579 767499859 0.0003579
10101031005 10101 146 2017 0.0134724 Puerto Montt 268732.4 2017 10101 245902 66081845388 5690 0.0231393 10101 1529087605 0.0005937 1529087605 0.0005937
10101031006 10101 94 2017 0.0086740 Puerto Montt 268732.4 2017 10101 245902 66081845388 2460 0.0100040 10101 661081812 0.0003823 661081812 0.0003823
10101031007 10101 39 2017 0.0035988 Puerto Montt 268732.4 2017 10101 245902 66081845388 2292 0.0093208 10101 615934761 0.0001586 615934761 0.0001586
10101031008 10101 54 2017 0.0049829 Puerto Montt 268732.4 2017 10101 245902 66081845388 3585 0.0145790 10101 963405811 0.0002196 963405811 0.0002196
10101031009 10101 166 2017 0.0153179 Puerto Montt 268732.4 2017 10101 245902 66081845388 4436 0.0180397 10101 1192097121 0.0006751 1192097121 0.0006751
10101031010 10101 92 2017 0.0084894 Puerto Montt 268732.4 2017 10101 245902 66081845388 3566 0.0145017 10101 958299894 0.0003741 958299894 0.0003741
10101031011 10101 49 2017 0.0045215 Puerto Montt 268732.4 2017 10101 245902 66081845388 2757 0.0112118 10101 740895347 0.0001993 740895347 0.0001993
10101031012 10101 94 2017 0.0086740 Puerto Montt 268732.4 2017 10101 245902 66081845388 1849 0.0075193 10101 496886289 0.0003823 496886289 0.0003823
10101031013 10101 73 2017 0.0067362 Puerto Montt 268732.4 2017 10101 245902 66081845388 3945 0.0160430 10101 1060149491 0.0002969 1060149491 0.0002969
10101031014 10101 109 2017 0.0100581 Puerto Montt 268732.4 2017 10101 245902 66081845388 2265 0.0092110 10101 608678985 0.0004433 608678985 0.0004433
10101031015 10101 31 2017 0.0028606 Puerto Montt 268732.4 2017 10101 245902 66081845388 1930 0.0078487 10101 518653616 0.0001261 518653616 0.0001261
10101031016 10101 248 2017 0.0228846 Puerto Montt 268732.4 2017 10101 245902 66081845388 3071 0.0124887 10101 825277335 0.0010085 825277335 0.0010085
10101031017 10101 60 2017 0.0055366 Puerto Montt 268732.4 2017 10101 245902 66081845388 3885 0.0157990 10101 1044025544 0.0002440 1044025544 0.0002440
10101032002 10101 2 2017 0.0001846 Puerto Montt 268732.4 2017 10101 245902 66081845388 129 0.0005246 10101 34666485 0.0000081 34666485 0.0000081
10101032011 10101 20 2017 0.0018455 Puerto Montt 268732.4 2017 10101 245902 66081845388 426 0.0017324 10101 114480021 0.0000813 114480021 0.0000813
10101032019 10101 32 2017 0.0029528 Puerto Montt 268732.4 2017 10101 245902 66081845388 829 0.0033713 10101 222779196 0.0001301 222779196 0.0001301
10101041001 10101 70 2017 0.0064594 Puerto Montt 268732.4 2017 10101 245902 66081845388 4342 0.0176574 10101 1166836271 0.0002847 1166836271 0.0002847
10101041002 10101 55 2017 0.0050752 Puerto Montt 268732.4 2017 10101 245902 66081845388 2169 0.0088206 10101 582880671 0.0002237 582880671 0.0002237
10101041003 10101 774 2017 0.0714220 Puerto Montt 268732.4 2017 10101 245902 66081845388 5202 0.0211548 10101 1397946172 0.0031476 1397946172 0.0031476
10101051001 10101 246 2017 0.0227000 Puerto Montt 268732.4 2017 10101 245902 66081845388 2463 0.0100162 10101 661888009 0.0010004 661888009 0.0010004
10101051002 10101 33 2017 0.0030451 Puerto Montt 268732.4 2017 10101 245902 66081845388 1913 0.0077795 10101 514085165 0.0001342 514085165 0.0001342
10101051003 10101 65 2017 0.0059980 Puerto Montt 268732.4 2017 10101 245902 66081845388 3272 0.0133061 10101 879292556 0.0002643 879292556 0.0002643
10101051004 10101 307 2017 0.0283289 Puerto Montt 268732.4 2017 10101 245902 66081845388 3633 0.0147742 10101 976304968 0.0012485 976304968 0.0012485
10101061001 10101 1239 2017 0.1143305 Puerto Montt 268732.4 2017 10101 245902 66081845388 6787 0.0276004 10101 1823887096 0.0050386 1823887096 0.0050386
10101061002 10101 329 2017 0.0303590 Puerto Montt 268732.4 2017 10101 245902 66081845388 2729 0.0110979 10101 733370839 0.0013379 733370839 0.0013379
10101061003 10101 160 2017 0.0147642 Puerto Montt 268732.4 2017 10101 245902 66081845388 3668 0.0149165 10101 985710604 0.0006507 985710604 0.0006507
10101061004 10101 110 2017 0.0101504 Puerto Montt 268732.4 2017 10101 245902 66081845388 2995 0.0121796 10101 804853669 0.0004473 804853669 0.0004473
10101061005 10101 312 2017 0.0287903 Puerto Montt 268732.4 2017 10101 245902 66081845388 2571 0.0104554 10101 690911113 0.0012688 690911113 0.0012688
10101061006 10101 401 2017 0.0370029 Puerto Montt 268732.4 2017 10101 245902 66081845388 4130 0.0167953 10101 1109864993 0.0016307 1109864993 0.0016307
10101061007 10101 12 2017 0.0011073 Puerto Montt 268732.4 2017 10101 245902 66081845388 817 0.0033225 10101 219554407 0.0000488 219554407 0.0000488
10101061008 10101 388 2017 0.0358033 Puerto Montt 268732.4 2017 10101 245902 66081845388 2109 0.0085766 10101 566756724 0.0015779 566756724 0.0015779
10101061009 10101 1 2017 0.0000923 Puerto Montt 268732.4 2017 10101 245902 66081845388 168 0.0006832 10101 45147051 0.0000041 45147051 0.0000041
10101061010 10101 6 2017 0.0005537 Puerto Montt 268732.4 2017 10101 245902 66081845388 1543 0.0062749 10101 414654161 0.0000244 414654161 0.0000244
10101062003 10101 10 2017 0.0009228 Puerto Montt 268732.4 2017 10101 245902 66081845388 158 0.0006425 10101 42459726 0.0000407 42459726 0.0000407
10101062008 10101 72 2017 0.0066439 Puerto Montt 268732.4 2017 10101 245902 66081845388 581 0.0023627 10101 156133550 0.0002928 156133550 0.0002928
10101062013 10101 61 2017 0.0056289 Puerto Montt 268732.4 2017 10101 245902 66081845388 571 0.0023221 10101 153446225 0.0002481 153446225 0.0002481
10101062029 10101 1 2017 0.0000923 Puerto Montt 268732.4 2017 10101 245902 66081845388 47 0.0001911 10101 12630425 0.0000041 12630425 0.0000041
10101062039 10101 4 2017 0.0003691 Puerto Montt 268732.4 2017 10101 245902 66081845388 67 0.0002725 10101 18005074 0.0000163 18005074 0.0000163
10101071001 10101 20 2017 0.0018455 Puerto Montt 268732.4 2017 10101 245902 66081845388 2352 0.0095648 10101 632058708 0.0000813 632058708 0.0000813
10101071002 10101 54 2017 0.0049829 Puerto Montt 268732.4 2017 10101 245902 66081845388 3919 0.0159372 10101 1053162447 0.0002196 1053162447 0.0002196
10101071003 10101 112 2017 0.0103350 Puerto Montt 268732.4 2017 10101 245902 66081845388 4978 0.0202438 10101 1337750105 0.0004555 1337750105 0.0004555
10101071004 10101 75 2017 0.0069207 Puerto Montt 268732.4 2017 10101 245902 66081845388 3443 0.0140015 10101 925245804 0.0003050 925245804 0.0003050
10101071005 10101 61 2017 0.0056289 Puerto Montt 268732.4 2017 10101 245902 66081845388 2751 0.0111874 10101 739282953 0.0002481 739282953 0.0002481
10101071006 10101 60 2017 0.0055366 Puerto Montt 268732.4 2017 10101 245902 66081845388 4214 0.0171369 10101 1132438518 0.0002440 1132438518 0.0002440
10101071007 10101 29 2017 0.0026760 Puerto Montt 268732.4 2017 10101 245902 66081845388 2345 0.0095363 10101 630177581 0.0001179 630177581 0.0001179
10101071008 10101 77 2017 0.0071053 Puerto Montt 268732.4 2017 10101 245902 66081845388 5480 0.0222853 10101 1472653792 0.0003131 1472653792 0.0003131
10101071009 10101 49 2017 0.0045215 Puerto Montt 268732.4 2017 10101 245902 66081845388 3549 0.0144326 10101 953731443 0.0001993 953731443 0.0001993
10101071010 10101 48 2017 0.0044293 Puerto Montt 268732.4 2017 10101 245902 66081845388 3521 0.0143187 10101 946206935 0.0001952 946206935 0.0001952
10101071011 10101 43 2017 0.0039679 Puerto Montt 268732.4 2017 10101 245902 66081845388 3094 0.0125822 10101 831458181 0.0001749 831458181 0.0001749
10101071012 10101 47 2017 0.0043370 Puerto Montt 268732.4 2017 10101 245902 66081845388 2621 0.0106587 10101 704347735 0.0001911 704347735 0.0001911
10101071014 10101 26 2017 0.0023992 Puerto Montt 268732.4 2017 10101 245902 66081845388 875 0.0035583 10101 235140888 0.0001057 235140888 0.0001057
10101072014 10101 36 2017 0.0033220 Puerto Montt 268732.4 2017 10101 245902 66081845388 997 0.0040545 10101 267926246 0.0001464 267926246 0.0001464
10101072021 10101 4 2017 0.0003691 Puerto Montt 268732.4 2017 10101 245902 66081845388 44 0.0001789 10101 11824228 0.0000163 11824228 0.0000163
10101072028 10101 4 2017 0.0003691 Puerto Montt 268732.4 2017 10101 245902 66081845388 145 0.0005897 10101 38966204 0.0000163 38966204 0.0000163
10101072029 10101 36 2017 0.0033220 Puerto Montt 268732.4 2017 10101 245902 66081845388 1051 0.0042741 10101 282437798 0.0001464 282437798 0.0001464
10101072036 10101 1 2017 0.0000923 Puerto Montt 268732.4 2017 10101 245902 66081845388 118 0.0004799 10101 31710428 0.0000041 31710428 0.0000041
10101072045 10101 7 2017 0.0006459 Puerto Montt 268732.4 2017 10101 245902 66081845388 113 0.0004595 10101 30366766 0.0000285 30366766 0.0000285
10101082016 10101 13 2017 0.0011996 Puerto Montt 268732.4 2017 10101 245902 66081845388 121 0.0004921 10101 32516626 0.0000529 32516626 0.0000529
10101082017 10101 5 2017 0.0004614 Puerto Montt 268732.4 2017 10101 245902 66081845388 38 0.0001545 10101 10211833 0.0000203 10211833 0.0000203
10101082018 10101 13 2017 0.0011996 Puerto Montt 268732.4 2017 10101 245902 66081845388 623 0.0025335 10101 167420312 0.0000529 167420312 0.0000529
10101082030 10101 3 2017 0.0002768 Puerto Montt 268732.4 2017 10101 245902 66081845388 176 0.0007157 10101 47296910 0.0000122 47296910 0.0000122
10101082034 10101 5 2017 0.0004614 Puerto Montt 268732.4 2017 10101 245902 66081845388 66 0.0002684 10101 17736341 0.0000203 17736341 0.0000203
10101082042 10101 12 2017 0.0011073 Puerto Montt 268732.4 2017 10101 245902 66081845388 253 0.0010289 10101 67989308 0.0000488 67989308 0.0000488
10101082045 10101 6 2017 0.0005537 Puerto Montt 268732.4 2017 10101 245902 66081845388 123 0.0005002 10101 33054091 0.0000244 33054091 0.0000244
10101092004 10101 6 2017 0.0005537 Puerto Montt 268732.4 2017 10101 245902 66081845388 97 0.0003945 10101 26067047 0.0000244 26067047 0.0000244
10101092008 10101 83 2017 0.0076589 Puerto Montt 268732.4 2017 10101 245902 66081845388 752 0.0030581 10101 202086798 0.0003375 202086798 0.0003375
10101092037 10101 11 2017 0.0010150 Puerto Montt 268732.4 2017 10101 245902 66081845388 276 0.0011224 10101 74170154 0.0000447 74170154 0.0000447
10101092040 10101 33 2017 0.0030451 Puerto Montt 268732.4 2017 10101 245902 66081845388 509 0.0020699 10101 136784814 0.0001342 136784814 0.0001342
10101092041 10101 44 2017 0.0040602 Puerto Montt 268732.4 2017 10101 245902 66081845388 1683 0.0068442 10101 452276703 0.0001789 452276703 0.0001789
10101092044 10101 21 2017 0.0019378 Puerto Montt 268732.4 2017 10101 245902 66081845388 530 0.0021553 10101 142428195 0.0000854 142428195 0.0000854
10101102005 10101 1 2017 0.0000923 Puerto Montt 268732.4 2017 10101 245902 66081845388 147 0.0005978 10101 39503669 0.0000041 39503669 0.0000041
10101102007 10101 12 2017 0.0011073 Puerto Montt 268732.4 2017 10101 245902 66081845388 824 0.0033509 10101 221435534 0.0000488 221435534 0.0000488
10101102035 10101 22 2017 0.0020301 Puerto Montt 268732.4 2017 10101 245902 66081845388 940 0.0038227 10101 252608497 0.0000895 252608497 0.0000895
10101102037 10101 3 2017 0.0002768 Puerto Montt 268732.4 2017 10101 245902 66081845388 164 0.0006669 10101 44072121 0.0000122 44072121 0.0000122
10101102051 10101 1 2017 0.0000923 Puerto Montt 268732.4 2017 10101 245902 66081845388 57 0.0002318 10101 15317749 0.0000041 15317749 0.0000041
10101112025 10101 13 2017 0.0011996 Puerto Montt 268732.4 2017 10101 245902 66081845388 1078 0.0043839 10101 289693574 0.0000529 289693574 0.0000529
10101122024 10101 6 2017 0.0005537 Puerto Montt 268732.4 2017 10101 245902 66081845388 952 0.0038715 10101 255833286 0.0000244 255833286 0.0000244
10101131001 10101 88 2017 0.0081203 Puerto Montt 268732.4 2017 10101 245902 66081845388 604 0.0024563 10101 162314396 0.0003579 162314396 0.0003579
10101132022 10101 15 2017 0.0013841 Puerto Montt 268732.4 2017 10101 245902 66081845388 703 0.0028589 10101 188918908 0.0000610 188918908 0.0000610
10101132023 10101 12 2017 0.0011073 Puerto Montt 268732.4 2017 10101 245902 66081845388 603 0.0024522 10101 162045664 0.0000488 162045664 0.0000488
10101132027 10101 1 2017 0.0000923 Puerto Montt 268732.4 2017 10101 245902 66081845388 105 0.0004270 10101 28216907 0.0000041 28216907 0.0000041
10101132049 10101 77 2017 0.0071053 Puerto Montt 268732.4 2017 10101 245902 66081845388 1883 0.0076575 10101 506023192 0.0003131 506023192 0.0003131
10101142009 10101 2 2017 0.0001846 Puerto Montt 268732.4 2017 10101 245902 66081845388 59 0.0002399 10101 15855214 0.0000081 15855214 0.0000081
10101142015 10101 4 2017 0.0003691 Puerto Montt 268732.4 2017 10101 245902 66081845388 124 0.0005043 10101 33322823 0.0000163 33322823 0.0000163
10101142027 10101 4 2017 0.0003691 Puerto Montt 268732.4 2017 10101 245902 66081845388 192 0.0007808 10101 51596629 0.0000163 51596629 0.0000163
10101142038 10101 3 2017 0.0002768 Puerto Montt 268732.4 2017 10101 245902 66081845388 53 0.0002155 10101 14242820 0.0000122 14242820 0.0000122
10101142046 10101 9 2017 0.0008305 Puerto Montt 268732.4 2017 10101 245902 66081845388 317 0.0012891 10101 85188185 0.0000366 85188185 0.0000366
10101142047 10101 11 2017 0.0010150 Puerto Montt 268732.4 2017 10101 245902 66081845388 263 0.0010695 10101 70676633 0.0000447 70676633 0.0000447
10101142049 10101 61 2017 0.0056289 Puerto Montt 268732.4 2017 10101 245902 66081845388 973 0.0039569 10101 261476668 0.0002481 261476668 0.0002481


par(mfrow = c(1,2))
boxplot(tabla_001$multi_pob_mean,  col="#FF6B00" , xlab="bottom & left box",
        main = "multi_pob_mean",
        boxwex = 0.9)
boxplot(tabla_001$p_variable_mean , col="#2398AB" , xlab="bottom & left box",
        main = "p_variable_mean",
        boxwex = 0.9)

La línea central es la mediana y los extremos de la caja el primer y el tercer cuartil (rango intercuartílico), en el que cae el 50% de las observaciones.

1.3 Transformaciones sobre outliers

Podemos enmascarar outliers con transformaciones. Por ejemplo, podemos reemplazar los valores que estén debajo del quinto percentil y los que estén por sobre el 95 avo percentil con los valores medios, tal como lo hicimos previamente sobre los NAs o con las medianas.

1.3.1 Reemplazo de outliers con promedios:

sobre multi_pob_mean:

outliers <- function(x, removeNA = TRUE){
      quantiles <- quantile(x, c(0.05, 0.95), na.rm = removeNA)
      x[x < quantiles[1]] <- mean(x, na.rm = removeNA)
      x[x > quantiles[2]] <- mean(x, na.rm = removeNA)
      x
}

outliers_data <- outliers(tabla_001$multi_pob_mean)

par(mfrow = c(1,2))
boxplot(tabla_001$multi_pob,   col="#FF6B00" , xlab="multi_pob_mean", main = "con outliers")
boxplot(outliers_data,  col="#FF6B00" , xlab="multi_pob_mean", main = "sin outliers")

1.3.2 Reemplazo de outliers con medianas:

sobre multi_pob_mean:

outliers <- function(x, removeNA = TRUE){
      quantiles <- quantile(x, c(0.05, 0.95), na.rm = removeNA)
      x[x < quantiles[1]] <- median(x, na.rm = removeNA)
      x[x > quantiles[2]] <- median(x, na.rm = removeNA)
      x
}

outliers_data <- outliers(tabla_001$multi_pob_mean)

par(mfrow = c(1,2))
boxplot(tabla_001$multi_pob_mean,   col="#FF6B00" , xlab="multi_pob_mean medianas", main = "con outliers")
boxplot(outliers_data,  col="#FF6B00" , xlab="multi_pob_mean medianas", main = "sin outliers")

1.3.3 Sustitución de outliers

Sustituímos los valores que están fuera de los bigotes con los valores del percentil 5 y el 95 respectivamente.

sobre multi_pob_mean:

replace_outliers <- function(x, removeNA = TRUE){
     qrts <- quantile(x, probs = c(0.25, 0.75), na.rm =removeNA) 
      caps <- quantile(x, probs = c(0.05, 0.95), na.rm =removeNA)  
      iqr <- qrts[2] - qrts[1]
      h <- 1.5*iqr
      x[x < qrts[1] - h] <- caps[1]
      x[x > qrts[2] + h] <- caps[2]
      x
}
multi_pob_capped <- replace_outliers(tabla_001$multi_pob_mean)
par(mfrow = c(1,2))
boxplot(tabla_001$multi_pob_mean,    col="#FF6B00" , xlab="multi_pob_mean", main = "con outliers")
boxplot(multi_pob_capped, col="#FF6B00" , xlab="multi_pob_mean", main = "sin outliers")


length(multi_pob_capped)
## [1] 12671

Vemos que resulta mucho mejor ésta última técnica, por lo que lo aplicamos para los campos multi_pob_mean y p_variable_mean.

p_variable_capped_multi_pob_mean <- replace_outliers(tabla_001$multi_pob_mean)
p_variable_capped_p_variable_mean <- replace_outliers(tabla_001$p_variable_mean)
par(mfrow = c(1,2))

boxplot(p_variable_capped_multi_pob_mean, col="#FF6B00" , xlab="multi_pob_mean", main = "multi_pob_mean sin outliers")

boxplot(p_variable_capped_p_variable_mean, col="#2398AB" , xlab="p_variable_mean", main = "p_variable_mean sin outliers")

2 Histogramas y densidades

Histograma y densidad para multi_pob_mean sin outliers

par(mfrow = c(1,2))
hist(p_variable_capped_multi_pob_mean, col="#FF6B00", main="Histograma")
plot(density(p_variable_capped_multi_pob_mean), main="Densidad", col="red")

Histograma y densidad para p_variable_mean sin outliers

par(mfrow = c(1,2))
hist(p_variable_capped_p_variable_mean, col="#2398AB" ,main="Histograma")
plot(density(p_variable_capped_p_variable_mean), main="Densidad", col="red")

3 Aplicación al dataframe

x <- quantile(tabla_001$multi_pob_mean,c(0.05,0.95))
data_clean <- tabla_001[tabla_001$multi_pob_mean >= x[1] & tabla_001$multi_pob_mean <= x[2],]
y <- quantile(data_clean$p_variable_mean,c(0.05,0.95))
data_clean <- data_clean[data_clean$p_variable_mean >= y[1] & data_clean$p_variable_mean <= y[2],]
head(data_clean,10) 
##           zona código.x Freq.x anio           p     comuna.x promedio_i  año
## 1  10101011001    10101     60 2017 0.005536588 Puerto Montt   268732.4 2017
## 2  10101011002    10101    177 2017 0.016332933 Puerto Montt   268732.4 2017
## 3  10101021001    10101     82 2017 0.007566670 Puerto Montt   268732.4 2017
## 4  10101021002    10101     77 2017 0.007105287 Puerto Montt   268732.4 2017
## 5  10101021003    10101     70 2017 0.006459352 Puerto Montt   268732.4 2017
## 6  10101021004    10101     99 2017 0.009135370 Puerto Montt   268732.4 2017
## 7  10101021005    10101    171 2017 0.015779275 Puerto Montt   268732.4 2017
## 8  10101031001    10101    133 2017 0.012272769 Puerto Montt   268732.4 2017
## 9  10101031002    10101    115 2017 0.010611793 Puerto Montt   268732.4 2017
## 10 10101031003    10101     94 2017 0.008673987 Puerto Montt   268732.4 2017
##    comuna.y personas Ingresos_expandidos Freq.y p_poblacional código.y
## 1     10101   245902         66081845388    584   0.002374930    10101
## 2     10101   245902         66081845388   2941   0.011960049    10101
## 3     10101   245902         66081845388   3953   0.016075510    10101
## 4     10101   245902         66081845388   1107   0.004501793    10101
## 5     10101   245902         66081845388   2294   0.009328920    10101
## 6     10101   245902         66081845388   3391   0.013790046    10101
## 7     10101   245902         66081845388   2564   0.010426918    10101
## 8     10101   245902         66081845388   4530   0.018421973    10101
## 9     10101   245902         66081845388   4740   0.019275972    10101
## 10    10101   245902         66081845388   4107   0.016701776    10101
##     multi_pob   p_variable multi_pob_mean p_variable_mean
## 1   156939747 0.0002439996      156939747    0.0002439996
## 2   790342117 0.0007197989      790342117    0.0007197989
## 3  1062299350 0.0003334662     1062299350    0.0003334662
## 4   297486815 0.0003131329      297486815    0.0003131329
## 5   616472226 0.0002846662      616472226    0.0002846662
## 6   911271717 0.0004025994      911271717    0.0004025994
## 7   689029986 0.0006953990      689029986    0.0006953990
## 8  1217357970 0.0005408659     1217357970    0.0005408659
## 9  1273791783 0.0004676660     1273791783    0.0004676660
## 10 1103684147 0.0003822661     1103684147    0.0003822661

4 Modelo lineal

scatter.smooth(x=data_clean$p_variable_mean, y=data_clean$multi_pob_mean, main="p_variable ~ multi_pob") 

ggplot(data_clean, aes(x = p_variable_mean, y = multi_pob_mean)) + 
  geom_point() +
  stat_smooth(method = "lm", col = "red")
## `geom_smooth()` using formula 'y ~ x'

linearMod <- lm( multi_pob_mean~p_variable_mean , data=data_clean)  
summary(linearMod) 
## 
## Call:
## lm(formula = multi_pob_mean ~ p_variable_mean, data = data_clean)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.161e+09 -2.168e+08 -1.712e+08  1.945e+08  1.279e+09 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.862e+08  5.119e+06   36.38   <2e-16 ***
## p_variable_mean 2.271e+11  4.537e+09   50.05   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 394700000 on 10271 degrees of freedom
## Multiple R-squared:  0.1961, Adjusted R-squared:  0.196 
## F-statistic:  2505 on 1 and 10271 DF,  p-value: < 2.2e-16

4.1 Análisis del \(R^2\) (coeficiente de determinación)

El \(R^2\) es una medida estadística de qué tan cerca están los datos de la línea de regresión ajustada.

Es el porcentaje de la variación en la variable de respuesta que es explicado por un modelo lineal. Es decir:

\[ R^2 = \frac{Variación\ explicada}{variación\ total} \]

Cuanto mayor sea la varianza explicada por el modelo de regresión, más cerca estarán los puntos de los datos de la línea de regresión ajustada.

Un valor bajo de \(R^2\) no es inherentemente malo. Si el valor del \(R^2\) es bajo pero se tienen predictores estadísticamente significativos, aún se puede obtener conclusiones importantes acerca de la asociación entre los cambios en los valores de los predictores y los cambios en el valor de respuesta. Independientemente del \(R^2\), los coeficientes significativos aún representan el cambio medio en la respuesta para una unidad de cambio en el predictor mientras se mantienen constantes los otros predictores del modelo.

4.2 Análisis de los residuos

Los residuos son los errores que se cometen en la estimación.

boxplot(linearMod$residuals)

par(mfrow = c (2,2))
plot(linearMod)

4.2.1 Residuos versus valores ajustados

Corroboramos que los residuos sigan un patrón lineal. La línea es recta y horizontal, por lo que deducimos que la relación es efectivamente lineal. Se busca verificar la linealidad entre las variables de entrada y salida. Un modelo lineal nunca podrá capturar una relación no lineal.

4.2.2 Q-Q

Aquí verificamos que los errores del modelo estén normalmente distribuídos. Parece cumplirse pues los valores están muy próximos a la línea recta punteada.

4.2.3 Raíz cuadrada de los residuos ajustados versus valores ajustados

Verificamos las condiciones de homocedasticidad, es decir que todos los residuos posean la misma varianza, que es uno de los supuestos al realizar un análisis de regresión. El supuesto parace cumplirse pues la línea roja no parece seguir ningún patrón.

4.2.4 Residuos versus apalancamiento

Acá podemos identificar los outliers influyentes en el análisis de regresión, que pueden sesgar el modelo. Parece haber dos valores influyentes (caen fuera de la distancia de Cook).


5 Fórmulas alternativas del modelo lineal

5.1 Función cuadrática

\[ \hat Y = \beta_0 + \beta_1 X^2 \]

linearMod <- lm( multi_pob_mean~p_variable_mean^2 , data=data_clean)  
summary(linearMod) 
## 
## Call:
## lm(formula = multi_pob_mean ~ p_variable_mean^2, data = data_clean)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.161e+09 -2.168e+08 -1.712e+08  1.945e+08  1.279e+09 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.862e+08  5.119e+06   36.38   <2e-16 ***
## p_variable_mean 2.271e+11  4.537e+09   50.05   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 394700000 on 10271 degrees of freedom
## Multiple R-squared:  0.1961, Adjusted R-squared:  0.196 
## F-statistic:  2505 on 1 and 10271 DF,  p-value: < 2.2e-16

5.2 Función logarítmica

\[ \hat Y = \beta_0 + \beta_1 logX \]

linearMod <- lm( multi_pob_mean~log(p_variable_mean) , data=data_clean)
summary(linearMod) 
## 
## Call:
## lm(formula = multi_pob_mean ~ log(p_variable_mean), data = data_clean)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -787823248 -263759357  -83185338  176681879 1249525349 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          1.828e+09  2.452e+07   74.54   <2e-16 ***
## log(p_variable_mean) 1.873e+08  3.076e+06   60.87   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 377400000 on 10271 degrees of freedom
## Multiple R-squared:  0.2651, Adjusted R-squared:  0.265 
## F-statistic:  3705 on 1 and 10271 DF,  p-value: < 2.2e-16

5.3 Función cúbica

\[ \hat Y = \beta_0 + \beta_1 X^3 \]

linearMod <- lm( multi_pob_mean~p_variable_mean^3 , data=data_clean)  
summary(linearMod) 
## 
## Call:
## lm(formula = multi_pob_mean ~ p_variable_mean^3, data = data_clean)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.161e+09 -2.168e+08 -1.712e+08  1.945e+08  1.279e+09 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.862e+08  5.119e+06   36.38   <2e-16 ***
## p_variable_mean 2.271e+11  4.537e+09   50.05   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 394700000 on 10271 degrees of freedom
## Multiple R-squared:  0.1961, Adjusted R-squared:  0.196 
## F-statistic:  2505 on 1 and 10271 DF,  p-value: < 2.2e-16


Vemos que el ajuste logarítmico nos ofrece el mayor \(R^2\).