Analizando el presupuesto provincial del programa JUNTOS

Índice

1.Carga de datos

Data de la tasa de desnutrición

link="https://docs.google.com/spreadsheets/d/e/2PACX-1vS2q_eR6argdYmBBb_y1EEM68hPzWZKCa9m0dJcTWZIPv0z2D4GdIYIlvnLxw_mpi4ALHQcnugrGUC1/pub?output=xlsx"

library(openxlsx)
 
Desnut=read.xlsx(link, 
                sheet = 1, 
                startRow = 4, 
                skipEmptyRows = T, skipEmptyCols = T)

head(Desnut)

##   Ubigeo          X2 Departamento,.provincia.y.distrito
## 1 000000        <NA>                               PERÚ
## 2 010000        <NA>                           AMAZONAS
## 3 010100 CHACHAPOYAS                               <NA>
## 4 010101        <NA>                        CHACHAPOYAS
## 5 010102        <NA>                           ASUNCION
## 6 010103        <NA>                             BALSAS
##   Tasa.de.Mortalidad.Infantil.1/.2007 Tasa.Global.de.Fecundidad.2/.2007
## 1                                <NA>                                NA
## 2                                20.7                              2.83
## 3                                20.4                              2.32
## 4                                19.5                              2.11
## 5                                19.5                              2.20
## 6                                21.2                              2.57
##   Desnutrición.Crónica.en.Menores.de.5.años3/.(Patrón.OMS)
## 1                                                     <NA>
## 2                                                     26.8
## 3                                                20.658649
## 4                                                9.0577125
## 5                                                     12.0
## 6                                                25.461957

Data de ingreso percapitaf y población con secundaria completa

link="https://docs.google.com/spreadsheets/d/e/2PACX-1vQA8RBVTCzZqApfoN5J6mue1eJF-B_RQyzRGo8FVCHY0wlx1Avbz0qM7WnCVONEdfWq3ADDJehaLmRM/pub?output=xlsx"

library(openxlsx)

idh=read.xlsx(link, 
                sheet = 1, 
                startRow = 5, 
                skipEmptyRows = T, skipEmptyCols = T)

head(idh)

##       X1       X2     Distrito habitantes ranking       IDH ranking
## 1 000000 PERÚ  a/         <NA>   30135875      NA 0.5058382      NA
## 2 010000 AMAZONAS         <NA>     417508      19 0.3845648      19
## 3 010100     <NA>  CHACHAPOYAS      54783     114 0.4343753      60
## 4 010101      1.0 Chachapoyas       27737     192 0.5467212     136
## 5 010102      2.0 Asuncion            296    1825 0.1683225    1747
## 6 010103      3.0    Balsas          1590    1462 0.2633761    1214
##       años ranking          % ranking     años ranking N.S..mes ranking
## 1 74.31045      NA 67.8731952      NA 8.995900      NA 696.9370      NA
## 2 73.98588      10 53.6528783      19 6.661000      20 435.6736      18
## 3 73.95475      80 45.4878254     100 7.799699      64 599.8076      45
## 4 73.38709     918 62.7192982     476 9.968829     159 866.5725     103
## 5 73.28082     931  0.7514946    1823 6.079101    1068 341.9903     846
## 6 71.96708    1128 14.4828735    1707 5.282190    1362 340.2772     854

Data de medicos por habitantes y asistencia a secundaria

link="https://docs.google.com/spreadsheets/d/e/2PACX-1vSuPexnuLI55RV1Ok5E6kkLT0gZfluCJGzSzEAnP-AuphEVChlSCiAXkREb8gngFqkWquVKTp0hgC7E/pub?output=xlsx"

library(openxlsx)

ide=read.xlsx(link, 
                sheet = 1, 
                startRow = 6, 
                skipEmptyRows = T, skipEmptyCols = T)

head(ide)

##       X1       X2     Distrito habitantes ranking       IDE ranking
## 1 000000 PERÚ  a/         <NA>   30135875       - 0.7666263       -
## 2 010000 AMAZONAS         <NA>     417508    20.0 0.6279527    20.0
## 3 010100     <NA>  CHACHAPOYAS      54783   113.0 0.7736995    26.0
## 4 010200     <NA>        BAGUA      77438    79.0 0.6622805    82.0
## 5 010300     <NA>      BONGARA      32317   145.0 0.6318249   103.0
## 6 010400     <NA> CONDORCANQUI      51802   119.0 0.4598242   191.0
##          % ranking     Razon ranking        % ranking        % ranking
## 1 98.31383       - 22.731906       - 84.98840       - 76.01278       -
## 2 94.94203    24.0 13.313368    14.0 76.32827    24.0 54.35446    17.0
## 3 98.61788    59.0 25.450024    13.0 91.49856    32.0 70.34540    54.0
## 4 94.60787   180.0 14.609121    49.0 79.79018   114.0 64.47904    67.0
## 5 97.46807   113.0  9.010207   114.0 76.42404   133.0 54.83408    96.0
## 6 86.23196   192.0  8.556959   120.0 52.21494   191.0 37.71451   156.0
##          % Ranking
## 1 86.11165       -
## 2 66.16264    24.0
## 3 83.97119    52.0
## 4 67.91462   119.0
## 5 72.16926   105.0
## 6 39.48908   191.0

Data de Dinero abonado, hogares abonados, n°de distritos y población infantil

link ="https://docs.google.com/spreadsheets/d/e/2PACX-1vTnk7ME3FuvTBgn-1C7yFCwhHJHc0MAZFuTKXGYSSyqrLMv8seSYSLbR4Juw6NtYD8r2QSGQ9jYzNAX/pub?output=xlsx"

library(openxlsx)

datos=read.xlsx(link, 
                sheet =2,
                startRow = 1, 
                skipEmptyRows = T, skipEmptyCols = T)

head(datos)

##   DEPARTAMENTO    PROVINCIA  DineroAbo NumHog NumDis PobNinos
## 1     AMAZONAS         <NA> 6572899.36  33212     NA     <NA>
## 2         <NA>        BAGUA 1244799.95   6288      5    33.05
## 3         <NA>      BONGARA  390799.97   1962     11    28.36
## 4         <NA>  CHACHAPOYAS   440400.0   2217     20    25.97
## 5         <NA> CONDORCANQUI 1291199.53   6589      3    46.84
## 6         <NA>         LUYA  873799.99   4420     23    31.32

Data de voto a ppk en primera vuelta y relación entre regiones y provincias

link ="https://docs.google.com/spreadsheets/d/e/2PACX-1vTnk7ME3FuvTBgn-1C7yFCwhHJHc0MAZFuTKXGYSSyqrLMv8seSYSLbR4Juw6NtYD8r2QSGQ9jYzNAX/pub?output=xlsx"

library(openxlsx)

datos2=read.xlsx(link, 
                sheet =3,
                startRow = 1, 
                skipEmptyRows = T, skipEmptyCols = T)

head(datos2)

##   DEPARTAMENTO    PROVINCIA VotoPPK1                      Reg_Prov Capital
## 1     AMAZONAS         <NA>    11.89 Sentimiento Amazonas Regional    <NA>
## 2         <NA>        BAGUA     9.77                             1       0
## 3         <NA>      BONGARA     9.63                             0       0
## 4         <NA>  CHACHAPOYAS    17.01                             0       1
## 5         <NA> CONDORCANQUI     1.40                             0       0
## 6         <NA>         LUYA    09.09                             0       0
##               X6   X7
## 1           <NA> <NA>
## 2           <NA> <NA>
## 3           <NA> <NA>
## 4           <NA> <NA>
## 5 votos anulados <NA>
## 6           <NA> <NA>

Data del SIS, analfabetismo, saneamiento, electrificación e identidad

link ="https://docs.google.com/spreadsheets/d/e/2PACX-1vTnk7ME3FuvTBgn-1C7yFCwhHJHc0MAZFuTKXGYSSyqrLMv8seSYSLbR4Juw6NtYD8r2QSGQ9jYzNAX/pub?output=xlsx"

library(openxlsx)

datos3=read.xlsx(link, 
                sheet =4,
                startRow = 1, 
                skipEmptyRows = T, skipEmptyCols = T)

head(datos3)

##   DEPARTAMENTO    PROVINCIA   SIS Analf  Elec Sanea  Iden
## 1     AMAZONAS         <NA> 72.97  <NA>  <NA>  <NA>  <NA>
## 2         <NA>        BAGUA 72.99  16.6 67.64  48.3 89.77
## 3         <NA>      BONGARA 70.43 14.16 82.44 62.52 91.32
## 4         <NA>  CHACHAPOYAS  59.2  11.5 87.49 76.06 91.69
## 5         <NA> CONDORCANQUI 79.59 22.27 81.56 13.45 84.11
## 6         <NA>         LUYA 82.23 18.19 85.33 56.22 91.04

2.Limpieza de la base de datos

Data de la tasa de desnutrición

Se revisan últimas filas de la data

tail(Desnut,12)

##                                                                                                                                                                                                          Ubigeo
## 2058                                                                                                                                                                                                     250400
## 2059                                                                                                                                                                                                     250401
## 2060                                                                                                1/ Fuente: INEI. Perú: Mortalidad Infantil y sus Diferenciales por Departamento, Provincia y Distrito 2007.
## 2061                                                                                                                2/ Fuente: Perú: Fecundidad y sus Diferenciales por Departamento, Provincia y Distrito 2007
## 2062                                                                                                3/ Fuente: Mapa de Desnutrición Crónica en Niños Menores de Cinco Años a nivel Provincial y Distrital, 2009
## 2063                                                                                                      4/ Por Ley Nº 28884 se crea en el departamento de Amazonas, provincia de Bagua, el distrito de Bagua 
## 2064  y por Ley Nº 29218 del 25 de Abril del 2008, 'se ha delimitado y redelimitado la Provincia de Bagua y los distritos Bagua, Aramango, Copallin, El parco, Imaza y La Peca, en el departamento de Amazonas.
## 2065                                                                                                                              5/ Creación territorial, según Decreto Ley Nº 29558 del 15 de Julio del 2010.
## 2066                                                                                                                               6/ Creación territorial, según Decreto Ley Nº 29538 del 7 de Junio del 2010.
## 2067                                                                                                                              7/ Creación territorial, según Decreto Ley Nº 29539 del 14 de Julio del 2010.
## 2068                                                                                                                              8/ Creación territorial, según Decreto Ley Nº 29541 del 14 de Junio del 2010.
## 2069                                                                                                                                             FUENTE: Insituto Nacional de Estadística e Informática - INEI.
##         X2 Departamento,.provincia.y.distrito
## 2058 PURUS                               <NA>
## 2059  <NA>                              PURUS
## 2060  <NA>                               <NA>
## 2061  <NA>                               <NA>
## 2062  <NA>                               <NA>
## 2063  <NA>                               <NA>
## 2064  <NA>                               <NA>
## 2065  <NA>                               <NA>
## 2066  <NA>                               <NA>
## 2067  <NA>                               <NA>
## 2068  <NA>                               <NA>
## 2069  <NA>                               <NA>
##      Tasa.de.Mortalidad.Infantil.1/.2007 Tasa.Global.de.Fecundidad.2/.2007
## 2058                                31.3                              2.84
## 2059                                31.3                              2.84
## 2060                                <NA>                                NA
## 2061                                <NA>                                NA
## 2062                                <NA>                                NA
## 2063                                <NA>                                NA
## 2064                                <NA>                                NA
## 2065                                <NA>                                NA
## 2066                                <NA>                                NA
## 2067                                <NA>                                NA
## 2068                                <NA>                                NA
## 2069                                <NA>                                NA
##      Desnutrición.Crónica.en.Menores.de.5.años3/.(Patrón.OMS)
## 2058                                                66.606504
## 2059                                                66.606504
## 2060                                                     <NA>
## 2061                                                     <NA>
## 2062                                                     <NA>
## 2063                                                     <NA>
## 2064                                                     <NA>
## 2065                                                     <NA>
## 2066                                                     <NA>
## 2067                                                     <NA>
## 2068                                                     <NA>
## 2069                                                     <NA>

Se elimina las filas que no nos interesan

Desnut=Desnut[-c(2060:2069),]
tail(Desnut)

##      Ubigeo         X2 Departamento,.provincia.y.distrito
## 2054 250300 PADRE ABAD                               <NA>
## 2055 250301       <NA>                         PADRE ABAD
## 2056 250302       <NA>                            IRAZOLA
## 2057 250303       <NA>                           CURIMANA
## 2058 250400      PURUS                               <NA>
## 2059 250401       <NA>                              PURUS
##      Tasa.de.Mortalidad.Infantil.1/.2007 Tasa.Global.de.Fecundidad.2/.2007
## 2054                                13.2                              3.04
## 2055                                12.6                              2.98
## 2056                                13.7                              3.11
## 2057                                14.0                              3.06
## 2058                                31.3                              2.84
## 2059                                31.3                              2.84
##      Desnutrición.Crónica.en.Menores.de.5.años3/.(Patrón.OMS)
## 2054                                       33.732955999999994
## 2055                                                27.400616
## 2056                                                38.085798
## 2057                                                43.038106
## 2058                                                66.606504
## 2059                                                66.606504

Se elimina la información de los departamentos y distritos

Desnut=Desnut[!is.na(Desnut$X2),]
head(Desnut)

##    Ubigeo                   X2 Departamento,.provincia.y.distrito
## 3  010100          CHACHAPOYAS                               <NA>
## 25 010200                BAGUA                               <NA>
## 32 010300              BONGARA                               <NA>
## 45 010400         CONDORCANQUI                               <NA>
## 49 010500                 LUYA                               <NA>
## 73 010600 RODRIGUEZ DE MENDOZA                               <NA>
##    Tasa.de.Mortalidad.Infantil.1/.2007 Tasa.Global.de.Fecundidad.2/.2007
## 3                                 20.4                              2.32
## 25                                16.9                              2.81
## 32                                21.4                              2.47
## 45                                28.5                              4.81
## 49                                24.5                              3.07
## 73                                14.3                              2.39
##    Desnutrición.Crónica.en.Menores.de.5.años3/.(Patrón.OMS)
## 3                                                 20.658649
## 25                                                40.005875
## 32                                                30.370397
## 45                                                56.755867
## 49                                                30.627197
## 73                                                 23.47929

Se resetea el índice y se elimina las columnas que no nos interesan

row.names(Desnut)=NULL
Desnut=Desnut[-c(1,3:5)]
head(Desnut)

##                     X2
## 1          CHACHAPOYAS
## 2                BAGUA
## 3              BONGARA
## 4         CONDORCANQUI
## 5                 LUYA
## 6 RODRIGUEZ DE MENDOZA
##   Desnutrición.Crónica.en.Menores.de.5.años3/.(Patrón.OMS)
## 1                                                20.658649
## 2                                                40.005875
## 3                                                30.370397
## 4                                                56.755867
## 5                                                30.627197
## 6                                                 23.47929

Se renombra las columnas

names(Desnut)=c("PROVINCIA","desn")
head(Desnut)

##              PROVINCIA      desn
## 1          CHACHAPOYAS 20.658649
## 2                BAGUA 40.005875
## 3              BONGARA 30.370397
## 4         CONDORCANQUI 56.755867
## 5                 LUYA 30.627197
## 6 RODRIGUEZ DE MENDOZA  23.47929

Se convierte a numérica la variable

Desnut$desn= as.numeric(Desnut$desn)
str(Desnut$desn)

##  num [1:195] 20.7 40 30.4 56.8 30.6 ...

*Data de ingreso percapitaf y población con secundaria completa

Se revisan las últimas filas de la data

tail(idh,20)

##                                                                                                                                                                                                                      X1
## 2042                                                                                                                                                                                                             250105
## 2043                                                                                                                                                                                                             250106
## 2044                                                                                                                                                                                                             250107
## 2045                                                                                                                                                                                                             250200
## 2046                                                                                                                                                                                                             250201
## 2047                                                                                                                                                                                                             250202
## 2048                                                                                                                                                                                                             250203
## 2049                                                                                                                                                                                                             250204
## 2050                                                                                                                                                                                                             250300
## 2051                                                                                                                                                                                                             250301
## 2052                                                                                                                                                                                                             250302
## 2053                                                                                                                                                                                                             250303
## 2054                                                                                                                                                                                                             250400
## 2055                                                                                                                                                                                                             250401
## 2056 a/ Incluye las cifras estimadas del distrito de Carmen Alto en la provincia de Huamanga, departamento de Ayacucho, donde. Autoridades locales no permitieron la ejecución del Censo de Población y Vivienda 2007. 
## 2057                                                                                                       1/ Cifras estimadas. Autoridades locales no permitieron la ejecución del Censo de Población y Vivienda 2007.
## 2058                                                                                                 2/ Incluye a la población ubicada en área temporal por límites de fronteras de los distritos de Pangoa y Mazamari.
## 2059                                                                                                                                                                                    3/ Provincias de Lima y Callao.
## 2060                                                                                                                                                 Fuente: INEI. Censo de Población y Vivienda 2007. ENAHO y ENAPRES.
## 2061                                                                                                                                                                                            Elaboración: PNUD-Perú.
##        X2       Distrito habitantes ranking       IDH ranking     años
## 2042  5.0  Yarinacocha        94304      55 0.4750668     324 75.24140
## 2043  6.0 Nueva Requena        5468     818 0.3131789     897 73.34425
## 2044  7.0       Manantay      77653      70 0.4810236     309 75.14924
## 2045 <NA>        ATALAYA      50569     125 0.2611797     162 62.05999
## 2046  1.0     Raymondi        32474     167 0.2624791    1219 61.59682
## 2047  2.0      Sepahua         8037     618 0.2980093     995 61.21639
## 2048  3.0     Tahuania         7860     631 0.1919767    1659 65.56441
## 2049  4.0       Yurua          2198    1314 0.1693254    1743 62.00338
## 2050 <NA>     PADRE ABAD      56756     111 0.3728368      82 78.77254
## 2051  1.0  Padre Abad         26364     212 0.4161015     476 77.62689
## 2052  2.0     Irazola         22779     243 0.3283280     808 78.89258
## 2053  3.0     Curimana         7613     641 0.3039023     962 78.83507
## 2054 <NA>          PURUS       4251     195 0.2862257     138 68.60871
## 2055  1.0         Purus        4251     957 0.2862257    1076 68.60871
## 2056 <NA>           <NA>         NA      NA        NA      NA       NA
## 2057 <NA>           <NA>         NA      NA        NA      NA       NA
## 2058 <NA>           <NA>         NA      NA        NA      NA       NA
## 2059 <NA>           <NA>         NA      NA        NA      NA       NA
## 2060 <NA>           <NA>         NA      NA        NA      NA       NA
## 2061 <NA>           <NA>         NA      NA        NA      NA       NA
##      ranking        % ranking     años ranking N.S..mes ranking
## 2042     633 60.73808     522 9.449050     239 586.7984     357
## 2043     923 31.91933    1242 6.519032     912 323.5306     901
## 2044     651 53.19289     679 8.551766     388 687.6970     249
## 2045     187 22.99781     183 6.239831     122 300.1544     131
## 2046    1764 20.89197    1541 6.323195     976 318.2964     916
## 2047    1772 26.76953    1384 7.032598     751 379.1261     726
## 2048    1628 11.40391    1762 5.548338    1261 183.6848    1466
## 2049    1753 13.18104    1728 4.402193    1654 159.8511    1565
## 2050      17 38.73143     121 7.193236      87 406.6648      88
## 2051     326 44.32234     914 7.676127     572 507.7652     463
## 2052     200 31.03530    1268 6.657738     861 333.1001     878
## 2053     213 28.93897    1322 6.795543     823 276.6664    1053
## 2054     157 17.43073     191 7.233476      86 342.9416     105
## 2055    1473 17.43073    1635 7.233476     685 342.9416     843
## 2056      NA       NA      NA       NA      NA       NA      NA
## 2057      NA       NA      NA       NA      NA       NA      NA
## 2058      NA       NA      NA       NA      NA       NA      NA
## 2059      NA       NA      NA       NA      NA       NA      NA
## 2060      NA       NA      NA       NA      NA       NA      NA
## 2061      NA       NA      NA       NA      NA       NA      NA

Se eliminan las filas innecesarias

idh=idh[-c(2056:2061),]
tail(idh)

##          X1   X2      Distrito habitantes ranking       IDH ranking
## 2050 250300 <NA>    PADRE ABAD      56756     111 0.3728368      82
## 2051 250301  1.0 Padre Abad         26364     212 0.4161015     476
## 2052 250302  2.0    Irazola         22779     243 0.3283280     808
## 2053 250303  3.0    Curimana         7613     641 0.3039023     962
## 2054 250400 <NA>         PURUS       4251     195 0.2862257     138
## 2055 250401  1.0        Purus        4251     957 0.2862257    1076
##          años ranking        % ranking     años ranking N.S..mes ranking
## 2050 78.77254      17 38.73143     121 7.193236      87 406.6648      88
## 2051 77.62689     326 44.32234     914 7.676127     572 507.7652     463
## 2052 78.89258     200 31.03530    1268 6.657738     861 333.1001     878
## 2053 78.83507     213 28.93897    1322 6.795543     823 276.6664    1053
## 2054 68.60871     157 17.43073     191 7.233476      86 342.9416     105
## 2055 68.60871    1473 17.43073    1635 7.233476     685 342.9416     843

Se elimina la información de los distritos y departamentos

idh=idh[is.na(idh$X2),]
head(idh)

##        X1   X2             Distrito habitantes ranking       IDH ranking
## 3  010100 <NA>          CHACHAPOYAS      54783     114 0.4343753      60
## 25 010200 <NA>                BAGUA      77438      80 0.3866636      75
## 32 010300 <NA>              BONGARA      32317     146 0.3497911      94
## 45 010400 <NA>         CONDORCANQUI      51802     120 0.1866263     192
## 49 010500 <NA>                 LUYA      52185     118 0.3075981     128
## 73 010600 <NA> RODRIGUEZ DE MENDOZA      30236     154 0.3310619     108
##        años ranking         % ranking     años ranking N.S..mes ranking
## 3  73.95475      80 45.487825     100 7.799699      64 599.8076      45
## 25 76.87897      37 39.843663     118 6.971553     101 467.6442      73
## 32 73.36928      95 29.456796     160 6.464213     114 455.7142      79
## 45 70.39137     144  8.012634     195 5.577313     147 180.0702     186
## 49 74.06573      79 34.403834     137 5.504034     150 327.8493     116
## 73 73.35318      97 26.603732     169 6.076635     128 427.0962      86

Se elimina las columnas que no nos interesan

idh=idh[-c(1:2,4:9,11:13,15)]
head(idh)

##                Distrito         % N.S..mes
## 3           CHACHAPOYAS 45.487825 599.8076
## 25                BAGUA 39.843663 467.6442
## 32              BONGARA 29.456796 455.7142
## 45         CONDORCANQUI  8.012634 180.0702
## 49                 LUYA 34.403834 327.8493
## 73 RODRIGUEZ DE MENDOZA 26.603732 427.0962

Se resetea el índice y se nombra a las variables

row.names(idh)=NULL
names(idh)=c("PROVINCIA","secundaria","percapitaf")
head(idh)

##              PROVINCIA secundaria percapitaf
## 1          CHACHAPOYAS  45.487825   599.8076
## 2                BAGUA  39.843663   467.6442
## 3              BONGARA  29.456796   455.7142
## 4         CONDORCANQUI   8.012634   180.0702
## 5                 LUYA  34.403834   327.8493
## 6 RODRIGUEZ DE MENDOZA  26.603732   427.0962

Se convierte a numéricas las variables

idh[c(2,3)]=lapply(idh[c(2,3)],as.numeric)
str(idh)

## 'data.frame':    195 obs. of  3 variables:
##  $ PROVINCIA : chr  "CHACHAPOYAS" "BAGUA" "BONGARA" "CONDORCANQUI" ...
##  $ secundaria: num  45.49 39.84 29.46 8.01 34.4 ...
##  $ percapitaf: num  600 468 456 180 328 ...

*Data de medicos por habitantes y asistencia a secundaria

Se revisan las últimas filas de la data

tail(ide,20)

##                                                                                                                                                                                                                     X1
## 205                                                                                                                                                                                                             220900
## 206                                                                                                                                                                                                             221000
## 207                                                                                                                                                                                                             230000
## 208                                                                                                                                                                                                             230100
## 209                                                                                                                                                                                                             230200
## 210                                                                                                                                                                                                             230300
## 211                                                                                                                                                                                                             230400
## 212                                                                                                                                                                                                             240000
## 213                                                                                                                                                                                                             240100
## 214                                                                                                                                                                                                             240200
## 215                                                                                                                                                                                                             240300
## 216                                                                                                                                                                                                             250000
## 217                                                                                                                                                                                                             250100
## 218                                                                                                                                                                                                             250200
## 219                                                                                                                                                                                                             250300
## 220                                                                                                                                                                                                             250400
## 221 a/ Incluye las cifras estimadas del distrito de Carmen Alto en la provincia de Huamanga, departamento de Ayacucho, donde. Autoridades locales no permitieron la ejecución del Censo de Población y Vivienda 2007. 
## 222                                                                                                 1/ Incluye a la población ubicada en área temporal por límites de fronteras de los distritos de Pangoa y Mazamari.
## 223                                                                                                                                       Fuente: INEI. Censo de Población y Vivienda 2007. ENAHO 2012 y ENAPRES 2012.
## 224                                                                                                                                                                                            Elaboración: PNUD-Perú.
##          X2              Distrito habitantes ranking       IDE ranking
## 205    <NA>            SAN MARTIN     179184    28.0 0.8285461    10.0
## 206    <NA>               TOCACHE      73460    82.0 0.6456299    91.0
## 207   TACNA                  <NA>     328915    21.0 0.8952655     1.0
## 208    <NA>                 TACNA     302852    14.0 0.9104090     1.0
## 209    <NA>             CANDARAVE       8435   189.0 0.6520944    88.0
## 210    <NA>         JORGE BASADRE       9641   187.0 0.8487091     7.0
## 211    <NA>                TARATA       7987   192.0 0.7413551    41.0
## 212  TUMBES                  <NA>     228227    23.0 0.7374755    10.0
## 213    <NA>                TUMBES     159548    32.0 0.7622368    29.0
## 214    <NA> CONTRALMIRANTE VILLAR      19180   174.0 0.6658642    77.0
## 215    <NA>             ZARUMILLA      49499   126.0 0.6849057    65.0
## 216 UCAYALI                  <NA>     477616    18.0 0.6202063    23.0
## 217    <NA>      CORONEL PORTILLO     366040    11.0 0.6670186    76.0
## 218    <NA>               ATALAYA      50569   124.0 0.4245137   194.0
## 219    <NA>            PADRE ABAD      56756   110.0 0.5022896   183.0
## 220    <NA>                 PURUS       4251   194.0 0.4473006   192.0
## 221    <NA>                  <NA>         NA    <NA>        NA    <NA>
## 222    <NA>                  <NA>         NA    <NA>        NA    <NA>
## 223    <NA>                  <NA>         NA    <NA>        NA    <NA>
## 224    <NA>                  <NA>         NA    <NA>        NA    <NA>
##            % ranking     Razon ranking        % ranking         % ranking
## 205 98.32579    77.0 21.329846    20.0 99.50000     3.0 85.393466    15.0
## 206 94.94572   176.0 10.509511    93.0 92.43233    28.0 43.607708   134.0
## 207 99.49787     1.0 42.655397     1.0 90.22830     3.0 92.681588     3.0
## 208 99.49284    14.0 44.741326     1.0 90.05181    41.0 95.876591     5.0
## 209 99.50000    11.0  7.113219   134.0 90.04666    42.0 40.111196   148.0
## 210 99.50000    12.0 30.079867     6.0 97.32764     6.0 83.975437    24.0
## 211 99.31568    24.0 22.536622    14.0 90.17304    40.0 68.725352    57.0
## 212 98.78743     9.0 14.919978    13.0 84.74258    14.0 74.758732     9.0
## 213 99.11836    37.0 18.381964    30.0 86.26609    61.0 77.677777    34.0
## 214 99.18719    34.0  6.623644   144.0 81.46419   105.0 59.791859    81.0
## 215 97.47540   112.0  6.975793   135.0 81.20866   106.0 71.022713    52.0
## 216 96.15788    23.0 10.750006    20.0 80.81996    19.0 35.612859    24.0
## 217 97.59218   109.0 12.519509    67.0 85.42861    69.0 42.851141   139.0
## 218 84.28946   193.0  4.520634   181.0 55.46519   188.0 13.242539   193.0
## 219 97.30866   119.0  4.498354   183.0 73.20130   149.0 13.243004   192.0
## 220 91.96672   187.0 15.954234    43.0 51.38777   192.0  0.235054   194.0
## 221       NA    <NA>        NA    <NA>       NA    <NA>        NA    <NA>
## 222       NA    <NA>        NA    <NA>       NA    <NA>        NA    <NA>
## 223       NA    <NA>        NA    <NA>       NA    <NA>        NA    <NA>
## 224       NA    <NA>        NA    <NA>       NA    <NA>        NA    <NA>
##            % Ranking
## 205 95.50407    10.0
## 206 74.31333    95.0
## 207 94.13266     4.0
## 208 95.21436    11.0
## 209 84.53397    50.0
## 210 93.41834    15.0
## 211 74.90245    89.0
## 212 85.58236     8.0
## 213 87.41957    37.0
## 214 81.44944    58.0
## 215 81.11974    60.0
## 216 79.59577    14.0
## 217 86.77154    43.0
## 218 51.72527   170.0
## 219 59.89457   149.0
## 220 53.47037   167.0
## 221       NA    <NA>
## 222       NA    <NA>
## 223       NA    <NA>
## 224       NA    <NA>

Se eliminan las filas que no se necesitan

ide=ide[-c(221:224),]
tail(ide)

##         X1      X2         Distrito habitantes ranking       IDE ranking
## 215 240300    <NA>        ZARUMILLA      49499   126.0 0.6849057    65.0
## 216 250000 UCAYALI             <NA>     477616    18.0 0.6202063    23.0
## 217 250100    <NA> CORONEL PORTILLO     366040    11.0 0.6670186    76.0
## 218 250200    <NA>          ATALAYA      50569   124.0 0.4245137   194.0
## 219 250300    <NA>       PADRE ABAD      56756   110.0 0.5022896   183.0
## 220 250400    <NA>            PURUS       4251   194.0 0.4473006   192.0
##            % ranking     Razon ranking        % ranking         % ranking
## 215 97.47540   112.0  6.975793   135.0 81.20866   106.0 71.022713    52.0
## 216 96.15788    23.0 10.750006    20.0 80.81996    19.0 35.612859    24.0
## 217 97.59218   109.0 12.519509    67.0 85.42861    69.0 42.851141   139.0
## 218 84.28946   193.0  4.520634   181.0 55.46519   188.0 13.242539   193.0
## 219 97.30866   119.0  4.498354   183.0 73.20130   149.0 13.243004   192.0
## 220 91.96672   187.0 15.954234    43.0 51.38777   192.0  0.235054   194.0
##            % Ranking
## 215 81.11974    60.0
## 216 79.59577    14.0
## 217 86.77154    43.0
## 218 51.72527   170.0
## 219 59.89457   149.0
## 220 53.47037   167.0

Se elimina la información de los departamentos

ide=ide[complete.cases(ide$Distrito),]
head(ide)

##       X1   X2             Distrito habitantes ranking       IDE ranking
## 3 010100 <NA>          CHACHAPOYAS      54783   113.0 0.7736995    26.0
## 4 010200 <NA>                BAGUA      77438    79.0 0.6622805    82.0
## 5 010300 <NA>              BONGARA      32317   145.0 0.6318249   103.0
## 6 010400 <NA>         CONDORCANQUI      51802   119.0 0.4598242   191.0
## 7 010500 <NA>                 LUYA      52185   117.0 0.6047198   126.0
## 8 010600 <NA> RODRIGUEZ DE MENDOZA      30236   153.0 0.6312264   104.0
##          % ranking     Razon ranking        % ranking        % ranking
## 3 98.61788    59.0 25.450024    13.0 91.49856    32.0 70.34540    54.0
## 4 94.60787   180.0 14.609121    49.0 79.79018   114.0 64.47904    67.0
## 5 97.46807   113.0  9.010207   114.0 76.42404   133.0 54.83408    96.0
## 6 86.23196   192.0  8.556959   120.0 52.21494   191.0 37.71451   156.0
## 7 96.19272   155.0 12.418003    70.0 74.72597   143.0 43.34842   136.0
## 8 97.34310   117.0 14.878682    48.0 79.42439   115.0 46.50182   124.0
##          % Ranking
## 3 83.97119    52.0
## 4 67.91462   119.0
## 5 72.16926   105.0
## 6 39.48908   191.0
## 7 67.39611   123.0
## 8 67.54610   120.0

Se eliminan las columnas que no nos interesan

ide=ide[-c(1:2,4:11,13,15:17)]
head(ide)

##               Distrito        %      %.1
## 3          CHACHAPOYAS 91.49856 70.34540
## 4                BAGUA 79.79018 64.47904
## 5              BONGARA 76.42404 54.83408
## 6         CONDORCANQUI 52.21494 37.71451
## 7                 LUYA 74.72597 43.34842
## 8 RODRIGUEZ DE MENDOZA 79.42439 46.50182

Se resetea el índice y se renombra las columnas

row.names(ide)= NULL
names(ide)=c("PROVINCIA","MedxHab","AsisSec")
head(ide)

##              PROVINCIA  MedxHab  AsisSec
## 1          CHACHAPOYAS 91.49856 70.34540
## 2                BAGUA 79.79018 64.47904
## 3              BONGARA 76.42404 54.83408
## 4         CONDORCANQUI 52.21494 37.71451
## 5                 LUYA 74.72597 43.34842
## 6 RODRIGUEZ DE MENDOZA 79.42439 46.50182

Se convierte las variables a numéricas

ide[c(2,3)]=lapply(ide[c(2,3)],as.numeric)
str(ide)

## 'data.frame':    195 obs. of  3 variables:
##  $ PROVINCIA: chr  "CHACHAPOYAS" "BAGUA" "BONGARA" "CONDORCANQUI" ...
##  $ MedxHab  : num  91.5 79.8 76.4 52.2 74.7 ...
##  $ AsisSec  : num  70.3 64.5 54.8 37.7 43.3 ...

Data de Dinero abonado, hogares abonados, n°de distritos y población infantil

Se elimina la información de los departamentos

datos=datos[!is.na(datos$PROVINCIA),]
datos=datos[-1]
head(datos)

##              PROVINCIA  DineroAbo NumHog NumDis PobNinos
## 2                BAGUA 1244799.95   6288      5    33.05
## 3              BONGARA  390799.97   1962     11    28.36
## 4          CHACHAPOYAS   440400.0   2217     20    25.97
## 5         CONDORCANQUI 1291199.53   6589      3    46.84
## 6                 LUYA  873799.99   4420     23    31.32
## 7 RODRIGUEZ DE MENDOZA   362400.0   1828      8    30.79

Se convierte a numéricas las variables

datos[c(2:5)]=lapply(datos[c(2:5)],as.numeric)
str(datos)

## 'data.frame':    173 obs. of  5 variables:
##  $ PROVINCIA: chr  "BAGUA" "BONGARA" "CHACHAPOYAS" "CONDORCANQUI" ...
##  $ DineroAbo: num  1244800 390800 440400 1291200 873800 ...
##  $ NumHog   : num  6288 1962 2217 6589 4420 ...
##  $ NumDis   : num  5 11 20 3 23 8 7 4 6 1 ...
##  $ PobNinos : num  33 28.4 26 46.8 31.3 ...

Data de voto a ppk en primera vuelta, relación entre regiones y provincias y si la provincia es capital o no

Se elimina la data de los departamentos y las columnas que no nos interesan

datos2=datos2[!is.na(datos2$PROVINCIA),]
datos2=datos2[-c(1,6:7)]
head(datos2)

##              PROVINCIA VotoPPK1 Reg_Prov Capital
## 2                BAGUA     9.77        1       0
## 3              BONGARA     9.63        0       0
## 4          CHACHAPOYAS    17.01        0       1
## 5         CONDORCANQUI     1.40        0       0
## 6                 LUYA    09.09        0       0
## 7 RODRIGUEZ DE MENDOZA    20.17        1       0

Se convierten variables a numéricas y factores según corresponde

datos2$VotoPPK1=as.numeric(datos2$VotoPPK1)
datos2[c(3:4)]=lapply(datos2[c(3:4)],factor)
str(datos2)

## 'data.frame':    173 obs. of  4 variables:
##  $ PROVINCIA: chr  "BAGUA" "BONGARA" "CHACHAPOYAS" "CONDORCANQUI" ...
##  $ VotoPPK1 : num  9.77 9.63 17.01 1.4 9.09 ...
##  $ Reg_Prov : Factor w/ 2 levels "0","1": 2 1 1 1 1 2 2 1 1 1 ...
##  $ Capital  : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 1 1 ...

Data del SIS, analfabetismo, saneamiento, electrificación e identidad

Se elimina la información de los departamentos

datos3=datos3[!is.na(datos3$PROVINCIA),]
datos3=datos3[-c(1)]
head(datos3)

##              PROVINCIA   SIS Analf  Elec Sanea  Iden
## 2                BAGUA 72.99  16.6 67.64  48.3 89.77
## 3              BONGARA 70.43 14.16 82.44 62.52 91.32
## 4          CHACHAPOYAS  59.2  11.5 87.49 76.06 91.69
## 5         CONDORCANQUI 79.59 22.27 81.56 13.45 84.11
## 6                 LUYA 82.23 18.19 85.33 56.22 91.04
## 7 RODRIGUEZ DE MENDOZA 69.39 15.21 77.44 59.03 90.26

Se convierte las variables a numéricas

datos3[c(2:6)]=lapply(datos3[c(2:6)],as.numeric)
str(datos3)

## 'data.frame':    173 obs. of  6 variables:
##  $ PROVINCIA: chr  "BAGUA" "BONGARA" "CHACHAPOYAS" "CONDORCANQUI" ...
##  $ SIS      : num  73 70.4 59.2 79.6 82.2 ...
##  $ Analf    : num  16.6 14.2 11.5 22.3 18.2 ...
##  $ Elec     : num  67.6 82.4 87.5 81.6 85.3 ...
##  $ Sanea    : num  48.3 62.5 76.1 13.4 56.2 ...
##  $ Iden     : num  89.8 91.3 91.7 84.1 91 ...

3.Aplicación del merge

Merge de datas

JUNTOS=merge(Desnut,idh,by="PROVINCIA")
JUNTOS=merge(JUNTOS,ide, by= "PROVINCIA")
JUNTOS=merge(datos,JUNTOS,by="PROVINCIA")
JUNTOS= merge(JUNTOS, datos2, by= "PROVINCIA")
JUNTOS= merge(JUNTOS,datos3, by= "PROVINCIA")
head(JUNTOS)

##       PROVINCIA DineroAbo NumHog NumDis PobNinos     desn secundaria
## 1       ABANCAY  918499.4   4615      8    26.85 24.42078   66.56863
## 2      ACOBAMBA  938099.6   4731      8    30.27 55.16297   36.52375
## 3       ACOMAYO  525000.0   2645      7    30.07 63.59085   43.78907
## 4          AIJA   84700.0    427      4    27.26 38.89286   34.78537
## 5 ALTO AMAZONAS 2356300.0  11927      6    38.66 31.50458   31.06007
## 6          AMBO  943296.3   4762      8    29.14 44.29124   36.00732
##   percapitaf  MedxHab  AsisSec VotoPPK1 Reg_Prov Capital   SIS Analf  Elec
## 1   532.8766 93.69086 63.00113     8.59        0       1 63.18 13.85 90.66
## 2   199.5093 77.54075 31.17210     7.38        1       0 88.12 22.98 79.90
## 3   234.7709 83.12670 48.23934     4.22        0       0 83.11 21.65 77.68
## 4   269.8365 89.49891 47.78407     9.48        0       0 71.91 12.07 78.72
## 5   342.6022 61.71118 44.49094    24.19        0       0 79.92 17.55 71.59
## 6   343.1504 74.88339 25.58357    14.06        1       0 75.51 20.37 72.22
##   Sanea  Iden
## 1 68.21 88.90
## 2 60.74 90.06
## 3 69.63 90.02
## 4 74.76 91.64
## 5 46.90 90.08
## 6 44.50 88.52

Se pone las Provincias en el índice

row.names(JUNTOS)=JUNTOS$PROVINCIA
JUNTOS=JUNTOS[-1]

Se revisa las variables de la data

names(JUNTOS)

##  [1] "DineroAbo"  "NumHog"     "NumDis"     "PobNinos"   "desn"      
##  [6] "secundaria" "percapitaf" "MedxHab"    "AsisSec"    "VotoPPK1"  
## [11] "Reg_Prov"   "Capital"    "SIS"        "Analf"      "Elec"      
## [16] "Sanea"      "Iden"

Se ordena las columnas de la data

JUNTOS= JUNTOS[c(1:7,13:14,8:9,15:17,10:12)]
head(JUNTOS)

##               DineroAbo NumHog NumDis PobNinos     desn secundaria
## ABANCAY        918499.4   4615      8    26.85 24.42078   66.56863
## ACOBAMBA       938099.6   4731      8    30.27 55.16297   36.52375
## ACOMAYO        525000.0   2645      7    30.07 63.59085   43.78907
## AIJA            84700.0    427      4    27.26 38.89286   34.78537
## ALTO AMAZONAS 2356300.0  11927      6    38.66 31.50458   31.06007
## AMBO           943296.3   4762      8    29.14 44.29124   36.00732
##               percapitaf   SIS Analf  MedxHab  AsisSec  Elec Sanea  Iden
## ABANCAY         532.8766 63.18 13.85 93.69086 63.00113 90.66 68.21 88.90
## ACOBAMBA        199.5093 88.12 22.98 77.54075 31.17210 79.90 60.74 90.06
## ACOMAYO         234.7709 83.11 21.65 83.12670 48.23934 77.68 69.63 90.02
## AIJA            269.8365 71.91 12.07 89.49891 47.78407 78.72 74.76 91.64
## ALTO AMAZONAS   342.6022 79.92 17.55 61.71118 44.49094 71.59 46.90 90.08
## AMBO            343.1504 75.51 20.37 74.88339 25.58357 72.22 44.50 88.52
##               VotoPPK1 Reg_Prov Capital
## ABANCAY           8.59        0       1
## ACOBAMBA          7.38        1       0
## ACOMAYO           4.22        0       0
## AIJA              9.48        0       0
## ALTO AMAZONAS    24.19        0       0
## AMBO             14.06        1       0

4.Descripción de datos

Variable Dependiente

Dinero abonado

summary(JUNTOS$DineroAbo)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3400  288500  572900  812818 1044800 5307300

hist(JUNTOS$DineroAbo)

Variables Independientes

Variable Desnutrición

summary(JUNTOS$desn)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    4.04   25.70   38.79   35.81   44.24   66.61

Variable Ingreso percapita familiar

summary(JUNTOS$percapitaf)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   108.4   260.6   340.5   403.1   509.4  1113.7

Variable Población con secundaria completa

summary(JUNTOS$secundaria)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   8.013  31.422  41.661  44.929  57.743  85.946

Variable Médicos por cada 10 000 habitantes

summary(JUNTOS$MedxHab)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   44.03   72.59   81.51   79.61   88.41   99.50

Variable Asistencia Secundaria

summary(JUNTOS$AsisSec)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2351 39.6135 52.3377 53.1703 66.3363 98.0614

Variable Analfabetismo

summary(JUNTOS$Analf)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    7.08   13.36   16.40   16.74   20.70   29.00

Variable saneamiento

summary(JUNTOS$Sanea)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##     0.29    44.50    57.26  1053.89    69.89 43369.00

Variable electrificación

summary(JUNTOS$Elec)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   39.95   70.61   78.84   77.10   85.32   96.82

Variable identidad

summary(JUNTOS$Iden)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   73.18   87.97   89.52   89.15   91.11   99.15

Variable SIS

summary(JUNTOS$SIS)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   21.05   56.35   71.25   67.19   79.56   90.63

Variable Voto a PPK en 1era vuelta

summary(JUNTOS$VotoPPK1)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.400   5.650   9.050   9.916  12.440  42.540

Variable Capital

table(JUNTOS$Capital)

## 
##   0   1 
## 156  17

prop.table(table(JUNTOS$Capital))*100

## 
##        0        1 
## 90.17341  9.82659

Variable de relación entre movimientos regionales y provinciales

table(JUNTOS$Reg_Prov)

## 
##   0   1 
## 115  58

prop.table(table(JUNTOS$Reg_Prov))*100

## 
##        0        1 
## 66.47399 33.52601

Variables de control

Variable de Población de niños menores a 14 años

summary(JUNTOS$PobNinos)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   17.48   26.60   29.17   29.64   32.38   46.84

Variable de número de distritos donde se ejecuta el programa JUNTOS

summary(JUNTOS$NumDis)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   4.000   7.000   7.636  10.000  23.000

Variable de número de hogares abonados

summary(JUNTOS$NumHog)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      17    1456    2897    4099    5262   26911

5.Análisis Factorial

Latente técnica Se revisa la correlación entre las variables observables

library(psych)
KMO(JUNTOS[,c(5:9)])

## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = JUNTOS[, c(5:9)])
## Overall MSA =  0.85
## MSA for each item = 
##       desn secundaria percapitaf        SIS      Analf 
##       0.94       0.83       0.83       0.80       0.90

Se realiza la prueba de esfericidad

cortest.bartlett(JUNTOS[,c(5:9)], n=nrow(JUNTOS[,c(5:9)]))

## R was not square, finding R from data

## $chisq
## [1] 711.4177
## 
## $p.value
## [1] 2.221694e-146
## 
## $df
## [1] 10

Se ve resultado de latente

resultadoEFA=fa(JUNTOS[c(5:9)],
                1, 
                rotate="oblimin", scores=T)
print(resultadoEFA,digits=2,cut = 0.1)

## Factor Analysis using method =  minres
## Call: fa(r = JUNTOS[c(5:9)], nfactors = 1, rotate = "oblimin", scores = T)
## Standardized loadings (pattern matrix) based upon correlation matrix
##              MR1   h2   u2 com
## desn        0.74 0.55 0.45   1
## secundaria -0.83 0.69 0.31   1
## percapitaf -0.84 0.70 0.30   1
## SIS         0.94 0.88 0.12   1
## Analf       0.89 0.80 0.20   1
## 
##                 MR1
## SS loadings    3.63
## Proportion Var 0.73
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  10  and the objective function was  4.2 with Chi Square of  711.42
## The degrees of freedom for the model are 5  and the objective function was  0.18 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic number of observations is  173 with the empirical chi square  3.12  with prob <  0.68 
## The total number of observations was  173  with Likelihood Chi Square =  31.14  with prob <  8.8e-06 
## 
## Tucker Lewis Index of factoring reliability =  0.925
## RMSEA index =  0.176  and the 90 % confidence intervals are  0.119 0.235
## BIC =  5.37
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.97
## Multiple R square of scores with factors          0.95
## Minimum correlation of possible factor scores     0.90

Se guarda la latente en nueva variable

JUNTOS[,c(18)]=factor.scores(JUNTOS[,c(5:9)],resultadoEFA)$scores

names(JUNTOS)[c(18)]=c("latTecnica")

Latente de presencia estatal

Se revisa la correlación entre las variables observables

library(psych)
KMO(JUNTOS[,c(10:14)])

## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = JUNTOS[, c(10:14)])
## Overall MSA =  0.66
## MSA for each item = 
## MedxHab AsisSec    Elec   Sanea    Iden 
##    0.74    0.66    0.62    0.74    0.66

Se realiza la prueba de esfericidad

cortest.bartlett(JUNTOS[c(10:14)], n=nrow(JUNTOS[c(10:14)]))

## R was not square, finding R from data

## $chisq
## [1] 170.384
## 
## $p.value
## [1] 2.309919e-31
## 
## $df
## [1] 10

resultadoEFA=fa(JUNTOS[c(10:14)],
                1, 
                rotate="oblimin", scores=T)
print(resultadoEFA,digits=2,cut = 0.2)

## Factor Analysis using method =  minres
## Call: fa(r = JUNTOS[c(10:14)], nfactors = 1, rotate = "oblimin", scores = T)
## Standardized loadings (pattern matrix) based upon correlation matrix
##           MR1    h2   u2 com
## MedxHab  0.52 0.269 0.73   1
## AsisSec  0.69 0.477 0.52   1
## Elec     0.94 0.878 0.12   1
## Sanea   -0.22 0.046 0.95   1
## Iden     0.33 0.107 0.89   1
## 
##                 MR1
## SS loadings    1.78
## Proportion Var 0.36
## 
## Mean item complexity =  1
## Test of the hypothesis that 1 factor is sufficient.
## 
## The degrees of freedom for the null model are  10  and the objective function was  1.01 with Chi Square of  170.38
## The degrees of freedom for the model are 5  and the objective function was  0.04 
## 
## The root mean square of the residuals (RMSR) is  0.05 
## The df corrected root mean square of the residuals is  0.07 
## 
## The harmonic number of observations is  173 with the empirical chi square  8.07  with prob <  0.15 
## The total number of observations was  173  with Likelihood Chi Square =  7.33  with prob <  0.2 
## 
## Tucker Lewis Index of factoring reliability =  0.971
## RMSEA index =  0.054  and the 90 % confidence intervals are  0 0.127
## BIC =  -18.44
## Fit based upon off diagonal values = 0.98
## Measures of factor score adequacy             
##                                                    MR1
## Correlation of (regression) scores with factors   0.95
## Multiple R square of scores with factors          0.90
## Minimum correlation of possible factor scores     0.79

Se guarda la latente en nueva variable

JUNTOS[,c(19)]=factor.scores(JUNTOS[,c(10:14)],resultadoEFA)$scores

names(JUNTOS)[c(19)]=c("ide")

6.Análisis de Clusters

Cluster de Variable dependiente [Dinero Abonado]

Se plantea seed para que análisis aleatorio sea siempre el mismo

set.seed(123)

Se crea subset con solo la variable dependiente

ProvinciasSub=JUNTOS[c(1)]

Se revisa que no haya casos perdidos

summary(ProvinciasSub)

##    DineroAbo      
##  Min.   :   3400  
##  1st Qu.: 288500  
##  Median : 572900  
##  Mean   : 812818  
##  3rd Qu.:1044800  
##  Max.   :5307300

Se normaliza la escala de las variables

prov_sub.scaled<- scale(ProvinciasSub)
# resultado
head(prov_sub.scaled)

##                DineroAbo
## ABANCAY        0.1307946
## ACOBAMBA       0.1550523
## ACOMAYO       -0.3562115
## AIJA          -0.9011392
## ALTO AMAZONAS  1.9102573
## AMBO           0.1614839

Se realiza método jerárquico para ver en cuantos grupos dividir los casos

library(NbClust)
nb <- NbClust(prov_sub.scaled, method = "complete")

## Warning in max(DiffLev[, 5], na.rm = TRUE): ningun argumento finito para
## max; retornando -Inf

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
##

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##

## Warning in matrix(c(results), nrow = 2, ncol = 26): la longitud de los
## datos [51] no es un submúltiplo o múltiplo del número de filas [2] en la
## matriz

## Warning in matrix(c(results), nrow = 2, ncol = 26, dimnames =
## list(c("Number_clusters", : la longitud de los datos [51] no es un
## submúltiplo o múltiplo del número de filas [2] en la matriz

## ******************************************************************* 
## * Among all indices:                                                
## * 1 proposed 3 as the best number of clusters 
## * 2 proposed 4 as the best number of clusters 
## * 1 proposed 9 as the best number of clusters 
## * 2 proposed 15 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  4 
##  
##  
## *******************************************************************

Crear variable nueva con la división de los grupos

library(factoextra)

## Loading required package: ggplot2

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ

solucionJerarquica =  eclust(prov_sub.scaled, 
                      FUNcluster ="hclust", 
                      k = 4, 
                      method = "complete",
                      graph = F)

ProvinciasSub$jerarquica = solucionJerarquica$cluster

Revisar la distribución de la variable nueva

table(ProvinciasSub$jerarquica)

## 
##   1   2   3   4 
##  45 108  19   1

Revisar media de las variables según su grupo

aggregate(cbind(DineroAbo)~ jerarquica, data=ProvinciasSub,FUN=mean)

##   jerarquica DineroAbo
## 1          1 1145639.8
## 2          2  345707.4
## 3          3 2443157.7
## 4          4 5307299.9

Se reconfigura el orden de los grupos de la variable jerárquica si necesario y se la convierte en ordinal

library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:psych':
## 
##     logit

ProvinciasSub$jerarquica=factor(car::recode(ProvinciasSub$jerarquica, 
                                   "1=2;2=1;3=3;4=4"), ordered=T)

str(ProvinciasSub$jerarquica)

##  Ord.factor w/ 4 levels "1"<"2"<"3"<"4": 2 2 1 1 3 2 3 2 1 1 ...

Se revisa el nuevo orden

table(ProvinciasSub$jerarquica)

## 
##   1   2   3   4 
## 108  45  19   1

Se revisa las siluetas

siluetas = solucionJerarquica$silinfo$widths
siluetas[siluetas$sil_width<0,]

##          cluster neighbor   sil_width
## JULCAN         2        1 -0.04298952
## TAYACAJA       3        1 -0.03285671

Se carga el mapa de provincias

folder='mapas'
fileName='provinciasPeru.shp' # nivel 2 son provincias
fileToRead=file.path(folder,fileName)

library(rgdal)
PeruProvs <- rgdal::readOGR(fileToRead,stringsAsFactors=FALSE)

Se crea subdata con variable jerárquica y se hace merge con el mapa

miniData=as.data.frame(ProvinciasSub[c(2)])
PeruProvs=merge(PeruProvs,miniData,by.x='NOMBPROV',by.y=0,all.x=T)

Se configura los colores, el borde y leyenda del mapa

paleta=c("indianred4","lightcoral","lightgreen","green4") #colores
plot(PeruProvs,col='white',border=NA) 
plot(PeruProvs,
     col=paleta[PeruProvs$jerarquica],
     border='gray25',add=T)
legend("right",title= "JUNTOS",legend = c("Muy Bajo","Bajo","Medio","Alto"),fill=paleta,cex=0.9,bty="n")

Cluster de Variable dependiente y latentes

Se crea subset con solo las variables latentes y la dependiente

provinciasSub=JUNTOS[c(1,18:19)]

Se revisa que no haya casos perdidos

summary(provinciasSub)

##    DineroAbo          latTecnica.MR1          ide.MR1      
##  Min.   :   3400   Min.   :-2.5679900   Min.   :-3.537939  
##  1st Qu.: 288500   1st Qu.:-0.7195119   1st Qu.:-0.572601  
##  Median : 572900   Median : 0.0971227   Median : 0.118791  
##  Mean   : 812818   Mean   : 0.0000000   Mean   : 0.000000  
##  3rd Qu.:1044800   3rd Qu.: 0.7798755   3rd Qu.: 0.653689  
##  Max.   :5307300   Max.   : 1.5791641   Max.   : 1.833048

Se normaliza la escala de las variables

prov_sub.scaled<- scale(provinciasSub)
# resultado
head(prov_sub.scaled)

##                DineroAbo   latTecnica         ide
## ABANCAY        0.1307946 -0.581394062  1.21778124
## ACOBAMBA       0.1550523  1.349894979  0.05283593
## ACOMAYO       -0.3562115  1.117922818  0.04422499
## AIJA          -0.9011392  0.009583572  0.17022089
## ALTO AMAZONAS  1.9102573  0.524564219 -0.59315509
## AMBO           0.1614839  0.633341592 -0.60447639

Se realiza método jerárquico para ver en cuantos grupos dividir los casos

library(NbClust)
nb <- NbClust(prov_sub.scaled, method = "complete")

## *** : The Hubert index is a graphical method of determining the number of clusters.
##                 In the plot of Hubert index, we seek a significant knee that corresponds to a 
##                 significant increase of the value of the measure i.e the significant peak in Hubert
##                 index second differences plot. 
##

## *** : The D index is a graphical method of determining the number of clusters. 
##                 In the plot of D index, we seek a significant knee (the significant peak in Dindex
##                 second differences plot) that corresponds to a significant increase of the value of
##                 the measure. 
##  
## ******************************************************************* 
## * Among all indices:                                                
## * 8 proposed 2 as the best number of clusters 
## * 5 proposed 3 as the best number of clusters 
## * 3 proposed 6 as the best number of clusters 
## * 1 proposed 8 as the best number of clusters 
## * 1 proposed 9 as the best number of clusters 
## * 3 proposed 10 as the best number of clusters 
## * 1 proposed 12 as the best number of clusters 
## * 1 proposed 14 as the best number of clusters 
## * 1 proposed 15 as the best number of clusters 
## 
##                    ***** Conclusion *****                            
##  
## * According to the majority rule, the best number of clusters is  2 
##  
##  
## *******************************************************************

Crear variable nueva con la división de los grupos

library(factoextra)

solucionJerarquica =  eclust(prov_sub.scaled, 
                      FUNcluster ="hclust", 
                      k = 2, 
                      method = "complete",
                      graph = F)

provinciasSub$jerarquica = solucionJerarquica$cluster

Revisar la distribución de la variable nueva

table(provinciasSub$jerarquica)

## 
##  1  2 
## 79 94

Revisar media de las variables según su grupo

aggregate(cbind(DineroAbo,latTecnica,ide)~ jerarquica, data=provinciasSub,FUN=mean)

##   jerarquica DineroAbo        MR1        MR1
## 1          1  327007.6 -0.7447732  0.4936740
## 2          2 1221105.1  0.6259264 -0.4148962

Se reconfigura el orden de los grupos de la variable jerárquica si necesario y se la convierte en ordinal

library(car)
provinciasSub$jerarquica=factor(car::recode(provinciasSub$jerarquica, 
                                   "1=1;2=2"), ordered=T)

str(provinciasSub$jerarquica)

##  Ord.factor w/ 2 levels "1"<"2": 1 2 2 1 2 2 2 2 1 2 ...

Se revisa las siluetas

siluetas = solucionJerarquica$silinfo$widths
siluetas[siluetas$sil_width<0,]

##               cluster neighbor    sil_width
## CHUCUITO            1        2 -0.002352550
## ESPINAR             1        2 -0.003006027
## LAMPA               1        2 -0.005090590
## MELGAR              1        2 -0.059331883
## OXAPAMPA            1        2 -0.115661064
## AYMARAES            2        1 -0.020966123
## HUANCANE            2        1 -0.038488354
## HUANUCO             2        1 -0.067722505
## LUCANAS             2        1 -0.128019868
## LA CONVENCION       2        1 -0.169375207
## HUAMANGA            2        1 -0.185233816
## LEONCIO PRADO       2        1 -0.192425117
## PUNO                2        1 -0.307774723
## SULLANA             2        1 -0.364762953

Se crea subdata con variable jerárquica y se hace merge con el mapa ya descargado

miniData=as.data.frame(provinciasSub[c(4)])
PeruProvs=merge(PeruProvs,miniData,by.x='NOMBPROV',by.y=0,all.x=T)

Se configura los colores, el borde y leyenda del mapa

paleta=c("lightcoral","lightgreen") #colores
plot(PeruProvs,col='white',border=NA) 
plot(PeruProvs,
     col=paleta[PeruProvs$jerarquica],
     border='gray25',add=T)
legend("right",title= "JUNTOS",legend = c("Bajo","Alto"),fill=paleta,cex=0.9,bty="n")

7.Análisis de regresión

Se realiza subset con las variables de interes

SubJuntos=JUNTOS[c(1:4,18,19,15:17)]
names(SubJuntos)

## [1] "DineroAbo"  "NumHog"     "NumDis"     "PobNinos"   "latTecnica"
## [6] "ide"        "VotoPPK1"   "Reg_Prov"   "Capital"

Se realiza el modelo

modelo1=glm(DineroAbo~.,data=SubJuntos,family ="gaussian")
summary(modelo1)

## 
## Call:
## glm(formula = DineroAbo ~ ., family = "gaussian", data = SubJuntos)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -22776   -1653    -253    1411   15112  
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)  8.989e+03  2.500e+03    3.596 0.000427 ***
## NumHog       1.983e+02  8.985e-02 2206.724  < 2e-16 ***
## NumDis       6.216e+01  7.667e+01    0.811 0.418718    
## PobNinos    -2.908e+02  8.311e+01   -3.499 0.000601 ***
## latTecnica   1.519e+03  5.042e+02    3.014 0.002990 ** 
## ide          1.911e+02  4.078e+02    0.469 0.639902    
## VotoPPK1    -1.202e+01  6.140e+01   -0.196 0.845084    
## Reg_Prov1   -9.132e+02  6.571e+02   -1.390 0.166490    
## Capital1    -3.786e+03  1.202e+03   -3.148 0.001951 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 15398252)
## 
##     Null deviance: 1.1229e+14  on 172  degrees of freedom
## Residual deviance: 2.5253e+09  on 164  degrees of freedom
## AIC: 3364.8
## 
## Number of Fisher Scoring iterations: 2

8.Requisitos de la Regresión Lineal

Residuos cercanos a 0

mean(modelo1$residuals)

## [1] -9.791763e-10

Homocedasticidad

par(mfrow=c(2,2))
plot(modelo1)

Prueba de multicolinealidad

library(car)
vif(modelo1)

##     NumHog     NumDis   PobNinos latTecnica        ide   VotoPPK1 
##   1.498112   1.366002   1.829755   2.691461   1.666612   1.665415 
##   Reg_Prov    Capital 
##   1.081002   1.439447

Baja correlación de independientes y residuos

SubJuntos1= SubJuntos[complete.cases(SubJuntos),]
for (var in SubJuntos1[-1]){
  var= as.numeric(var)
  corr=cor.test(modelo1$residuals,var)
  print(corr$p.value)
}

## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] 1

Normalidad de residuos

shapiro.test(modelo1$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  modelo1$residuals
## W = 0.86589, p-value = 2.774e-11

x <- modelo1$residuals 
h<-hist(x) 
xfit<-seq(min(x),max(x),length=40) 
yfit<-dnorm(xfit,mean=mean(x),sd=sd(x)) 
yfit <- yfit*diff(h$mids[1:2])*length(x) 
lines(xfit, yfit, col="blue", lwd=2)

Estadística para el análisis político 2 - POL304

Analizando el presupuesto provincial del programa JUNTOS

1.Carga de datos

2.Limpieza de la base de datos

3.Aplicación del merge

4.Descripción de datos

5.Análisis Factorial

6.Análisis de Clusters

7.Análisis de regresión

8.Requisitos de la Regresión Lineal