Pregunta 1

library(rio)
datap=import("https://docs.google.com/spreadsheets/d/1v-Kj_zP99PTsMnavtPVYhQ86saCo_rUh6gk_qUEYHFw/edit?gid=905833369#gid=905833369")
head(datap)
##   DEPARTAMENTO UBIGEO buenEstado contribuyentesSunat peaOcupada pobUrbana
## 1     AMAZONAS  10000       18.6               75035     130019    205976
## 2       ÁNCASH  20000       13.9              302906     387976    806065
## 3     APURÍMAC  30000        8.7              103981     140341    243354
## 4     AREQUIPA  40000       27.4              585628     645001   1383694
## 5     AYACUCHO  50000       17.0              151191     235857    444473
## 6    CAJAMARCA  60000       18.0              277457     461312    567141
##   PobRural pobTotal
## 1   211389   417365
## 2   333050  1139115
## 3   180905   424259
## 4    76739  1460433
## 5   206467   650940
## 6   860386  1427527

“buenEstado”: Porcentaje de locales escolares en buen estado

“contribuyentesSunat”: Cantidad de contribuyentes a la SUNAT (PEA)

“peaOcupada”: Cantidad de PEA ocupada

str(datap$buenEstado)
##  num [1:25] 18.6 13.9 8.7 27.4 17 18 33.8 11.9 10.1 15.6 ...
str(datap$contribuyentesSunat)
##  int [1:25] 75035 302906 103981 585628 151191 277457 499257 466883 80353 185658 ...
datap$pcon=datap$contribuyentesSunat/datap$pobTotal *100
datap$ppea=datap$peaOcupada/datap$pobTotal*100
modelo1=lm(datap$buenEstado~datap$pcon+datap$ppea,data=datap)
summary(modelo1)
## 
## Call:
## lm(formula = datap$buenEstado ~ datap$pcon + datap$ppea, data = datap)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.0928  -4.3610   0.2575   4.4003  11.0196 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -22.6095    15.9617  -1.416    0.171
## datap$pcon    0.1003     0.3121   0.321    0.751
## datap$ppea    1.0218     0.6424   1.590    0.126
## 
## Residual standard error: 6.299 on 22 degrees of freedom
## Multiple R-squared:  0.4669, Adjusted R-squared:  0.4184 
## F-statistic: 9.633 on 2 and 22 DF,  p-value: 0.000989

Respuesta: El p valor es menos a 0.05, lo que le da un nivel de significancia. Se puede rechazar la hipótesis nula y afirmar que las variables tienen efecto en la variable dependiente.

Pregunta 2: Al querer probar la hipotesis que la cantidad de PEA ocupada dependen de la cantidad de contribuyentes a la SUNAT ; y del porcentaje de locales escolares en buen estado; se llega a comprobar que (con una significancia del 0.05):

modelo2=lm(datap$peaOcupada~datap$contribuyentesSunat+datap$buenEstado)
summary(modelo2)
## 
## Call:
## lm(formula = datap$peaOcupada ~ datap$contribuyentesSunat + datap$buenEstado)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -91867 -58573 -11166  46174 155851 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                1.155e+05  3.787e+04   3.049  0.00588 ** 
## datap$contribuyentesSunat  9.206e-01  1.741e-02  52.872  < 2e-16 ***
## datap$buenEstado          -1.412e+03  1.983e+03  -0.712  0.48395    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 74540 on 22 degrees of freedom
## Multiple R-squared:  0.9932, Adjusted R-squared:  0.9926 
## F-statistic:  1603 on 2 and 22 DF,  p-value: < 2.2e-16

PARTE 2:

library(rvest)
link = "https://es.wikipedia.org/wiki/Elecciones_municipales_de_Lima_de_2022"
path = '//*[@id="mw-content-text"]/div[1]/table[9]'
dataelec <- read_html(link)%>%html_nodes(xpath = path)%>%html_table()%>% .[[1]]
head(dataelec)
## # A tibble: 6 × 17
##   Distrito RP       RP     PP    PP    SP    SP    FE    FE    APP   APP   JP   
##   <chr>    <chr>    <chr>  <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Distrito ""       ""     ""    ""    ""    ""    ""    ""    ""    ""    ""   
## 2 Distrito "V"      "%"    "V"   "%"   "V"   "%"   "V"   "%"   "V"   "%"   "V"  
## 3 Ancón    "3,725"  "13.2… "9,3… "33.… "5,9… "21.… "2,0… "7.2… "3,5… "12.… "1,6…
## 4 Ate      "57,374" "17.4… "98,… "29.… "52,… "15.… "27,… "8.5… "26,… "7.9… "25,…
## 5 Barranco "11,604" "36.9… "5,7… "18.… "6,4… "20.… "2,7… "8.8… "2,1… "6.9… "1,6…
## 6 Breña    "22,721" "31.1… "18,… "25.… "14,… "19.… "8,2… "11.… "2,4… "3.3… "3,8…
## # ℹ 5 more variables: JP <chr>, AvP <chr>, AvP <chr>, PL <chr>, PL <chr>
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
dataelec=dataelec[-c(1,2,46),]
head(dataelec)
## # A tibble: 6 × 17
##   Distrito   RP     RP    PP     PP    SP    SP    FE    FE    APP   APP   JP   
##   <chr>      <chr>  <chr> <chr>  <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Ancón      3,725  13.28 9,332  33.27 5,987 21.35 2,043 7.28  3,587 12.79 1,679
## 2 Ate        57,374 17.49 98,373 29.99 52,0… 15.87 27,9… 8.51  26,1… 7.97  25,1…
## 3 Barranco   11,604 36.92 5,766  18.34 6,401 20.37 2,772 8.82  2,193 6.98  1,628
## 4 Breña      22,721 31.15 18,676 25.61 14,0… 19.23 8,240 11.30 2,473 3.39  3,813
## 5 Carabayllo 30,418 19.24 43,980 27.82 33,3… 21.12 15,7… 9.94  16,4… 10.40 11,6…
## 6 Chaclacayo 8,111  28.42 6,491  22.74 5,360 18.78 3,014 10.56 1,301 4.56  1,733
## # ℹ 5 more variables: JP <chr>, AvP <chr>, AvP <chr>, PL <chr>, PL <chr>
resi=import("https://docs.google.com/spreadsheets/d/1wIkWzWfhbF_KyufWyhjeFxLNO3h6f-oa/edit?gid=1754805882#gid=1754805882")
head(resi)
##                                          V1    V2     V3      V4           V5
## 1 Residuos municipales generados anualmente                                  
## 2                               FECHA_CORTE N_SEC UBIGEO REG_NAT DEPARTAMENTO
## 3                                  20230614     1  10101   SELVA     AMAZONAS
## 4                                  20230614     2  10102   SELVA     AMAZONAS
## 5                                  20230614     3  10103  SIERRA     AMAZONAS
## 6                                  20230614     4  10104  SIERRA     AMAZONAS
##            V6          V7        V8         V9       V10     V11           V12
## 1                                                                             
## 2   PROVINCIA    DISTRITO POB_TOTAL POB_URBANA POB_RURAL GPC_DOM QRESIDUOS_DOM
## 3 CHACHAPOYAS CHACHAPOYAS     28423      27548       875    0,48        4857,5
## 4 CHACHAPOYAS    ASUNCION       291        151       140    0,61         33,56
## 5 CHACHAPOYAS      BALSAS      1615        299      1316    0,45         48,96
## 6 CHACHAPOYAS       CHETO       597        388       209    0,45         63,59
##                V13           V14     V15
## 1                                       
## 2 QRESIDUOS_NO_DOM QRESIDUOS_MUN PERIODO
## 3          2081,78       6939,28    2014
## 4            14,38         47,95    2014
## 5            20,98         69,95    2014
## 6            27,25         90,84    2014
resi=resi[-c(1,2),]
head(resi)
##         V1 V2    V3     V4       V5          V6          V7    V8    V9  V10
## 3 20230614  1 10101  SELVA AMAZONAS CHACHAPOYAS CHACHAPOYAS 28423 27548  875
## 4 20230614  2 10102  SELVA AMAZONAS CHACHAPOYAS    ASUNCION   291   151  140
## 5 20230614  3 10103 SIERRA AMAZONAS CHACHAPOYAS      BALSAS  1615   299 1316
## 6 20230614  4 10104 SIERRA AMAZONAS CHACHAPOYAS       CHETO   597   388  209
## 7 20230614  5 10105 SIERRA AMAZONAS CHACHAPOYAS   CHILIQUIN   737   197  540
## 8 20230614  6 10106 SIERRA AMAZONAS CHACHAPOYAS CHUQUIBAMBA  2096   630 1466
##    V11    V12     V13     V14  V15
## 3 0,48 4857,5 2081,78 6939,28 2014
## 4 0,61  33,56   14,38   47,95 2014
## 5 0,45  48,96   20,98   69,95 2014
## 6 0,45  63,59   27,25   90,84 2014
## 7 0,45  32,38   13,88   46,26 2014
## 8 0,45 103,25   44,25   147,5 2014
venti=resi[resi$V15==2021,]
head(venti)
##             V1    V2    V3     V4       V5          V6          V7    V8    V9
## 13091 20230614 13089 10101  SELVA AMAZONAS CHACHAPOYAS CHACHAPOYAS 39691 38800
## 13092 20230614 13090 10102  SELVA AMAZONAS CHACHAPOYAS    ASUNCION   277   135
## 13093 20230614 13091 10103 SIERRA AMAZONAS CHACHAPOYAS      BALSAS  1169   296
## 13094 20230614 13092 10104 SIERRA AMAZONAS CHACHAPOYAS       CHETO   705   452
## 13095 20230614 13093 10105 SIERRA AMAZONAS CHACHAPOYAS   CHILIQUIN   581   156
## 13096 20230614 13094 10106 SIERRA AMAZONAS CHACHAPOYAS CHUQUIBAMBA  1922   464
##        V10  V11     V12     V13     V14  V15
## 13091  891 0,39 5523,18 2367,08 7890,26 2021
## 13092  142 0,51   25,33   10,86   36,19 2021
## 13093  873 0,44   47,54   20,37   67,91 2021
## 13094  253 0,44   72,59   31,11   103,7 2021
## 13095  425 0,44   25,05   10,74   35,79 2021
## 13096 1458 0,44   74,52   31,94  106,45 2021
venti=venti[,-c(1,2,3,4)]
head(venti)
##             V5          V6          V7    V8    V9  V10  V11     V12     V13
## 13091 AMAZONAS CHACHAPOYAS CHACHAPOYAS 39691 38800  891 0,39 5523,18 2367,08
## 13092 AMAZONAS CHACHAPOYAS    ASUNCION   277   135  142 0,51   25,33   10,86
## 13093 AMAZONAS CHACHAPOYAS      BALSAS  1169   296  873 0,44   47,54   20,37
## 13094 AMAZONAS CHACHAPOYAS       CHETO   705   452  253 0,44   72,59   31,11
## 13095 AMAZONAS CHACHAPOYAS   CHILIQUIN   581   156  425 0,44   25,05   10,74
## 13096 AMAZONAS CHACHAPOYAS CHUQUIBAMBA  1922   464 1458 0,44   74,52   31,94
##           V14  V15
## 13091 7890,26 2021
## 13092   36,19 2021
## 13093   67,91 2021
## 13094   103,7 2021
## 13095   35,79 2021
## 13096  106,45 2021
link <- "https://es.wikipedia.org/wiki/Elecciones_municipales_de_Lima_de_2022"

webpage <- read_html(link)

distri <- webpage %>%
  html_node(xpath = '//*[@id="mw-content-text"]/div[1]/table[9]') %>% 
  html_table()
head(distri)
## # A tibble: 6 × 17
##   Distrito RP       RP     PP    PP    SP    SP    FE    FE    APP   APP   JP   
##   <chr>    <chr>    <chr>  <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Distrito ""       ""     ""    ""    ""    ""    ""    ""    ""    ""    ""   
## 2 Distrito "V"      "%"    "V"   "%"   "V"   "%"   "V"   "%"   "V"   "%"   "V"  
## 3 Ancón    "3,725"  "13.2… "9,3… "33.… "5,9… "21.… "2,0… "7.2… "3,5… "12.… "1,6…
## 4 Ate      "57,374" "17.4… "98,… "29.… "52,… "15.… "27,… "8.5… "26,… "7.9… "25,…
## 5 Barranco "11,604" "36.9… "5,7… "18.… "6,4… "20.… "2,7… "8.8… "2,1… "6.9… "1,6…
## 6 Breña    "22,721" "31.1… "18,… "25.… "14,… "19.… "8,2… "11.… "2,4… "3.3… "3,8…
## # ℹ 5 more variables: JP <chr>, AvP <chr>, AvP <chr>, PL <chr>, PL <chr>
distri=distri[-c(1,2,46),]
head(distri)
## # A tibble: 6 × 17
##   Distrito   RP     RP    PP     PP    SP    SP    FE    FE    APP   APP   JP   
##   <chr>      <chr>  <chr> <chr>  <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Ancón      3,725  13.28 9,332  33.27 5,987 21.35 2,043 7.28  3,587 12.79 1,679
## 2 Ate        57,374 17.49 98,373 29.99 52,0… 15.87 27,9… 8.51  26,1… 7.97  25,1…
## 3 Barranco   11,604 36.92 5,766  18.34 6,401 20.37 2,772 8.82  2,193 6.98  1,628
## 4 Breña      22,721 31.15 18,676 25.61 14,0… 19.23 8,240 11.30 2,473 3.39  3,813
## 5 Carabayllo 30,418 19.24 43,980 27.82 33,3… 21.12 15,7… 9.94  16,4… 10.40 11,6…
## 6 Chaclacayo 8,111  28.42 6,491  22.74 5,360 18.78 3,014 10.56 1,301 4.56  1,733
## # ℹ 5 more variables: JP <chr>, AvP <chr>, AvP <chr>, PL <chr>, PL <chr>
merg=merge(dataelec,distri, by="Distrito")
## Warning in merge.data.frame(dataelec, distri, by = "Distrito"): column names
## 'RP.x', 'PP.x', 'SP.x', 'FE.x', 'APP.x', 'JP.x', 'AvP.x', 'PL.x', 'RP.y',
## 'PP.y', 'SP.y', 'FE.y', 'APP.y', 'JP.y', 'AvP.y', 'PL.y' are duplicated in the
## result
head(merg)
##     Distrito   RP.x  RP.x   PP.x  PP.x   SP.x  SP.x   FE.x  FE.x  APP.x APP.x
## 1      Ancón  3,725 13.28  9,332 33.27  5,987 21.35  2,043  7.28  3,587 12.79
## 2        Ate 57,374 17.49 98,373 29.99 52,069 15.87 27,911  8.51 26,140  7.97
## 3   Barranco 11,604 36.92  5,766 18.34  6,401 20.37  2,772  8.82  2,193  6.98
## 4      Breña 22,721 31.15 18,676 25.61 14,022 19.23  8,240 11.30  2,473  3.39
## 5 Carabayllo 30,418 19.24 43,980 27.82 33,394 21.12 15,708  9.94 16,434 10.40
## 6 Chaclacayo  8,111 28.42  6,491 22.74  5,360 18.78  3,014 10.56  1,301  4.56
##     JP.x JP.x  AvP.x AvP.x  PL.x PL.x   RP.y  RP.y   PP.y  PP.y   SP.y  SP.y
## 1  1,679 5.99  1,228  4.38   465 1.67  3,725 13.28  9,332 33.27  5,987 21.35
## 2 25,113 7.66 32,851 10.01 8,191 2.50 57,374 17.49 98,373 29.99 52,069 15.87
## 3  1,628 5.18    845  2.69   223 0.71 11,604 36.92  5,766 18.34  6,401 20.37
## 4  3,813 5.23  2,435  3.34   552 0.76 22,721 31.15 18,676 25.61 14,022 19.23
## 5 11,655 7.37  2,868  1.81 3,636 2.30 30,418 19.24 43,980 27.82 33,394 21.12
## 6  1,733 6.07  1,675  5.87   855 3.00  8,111 28.42  6,491 22.74  5,360 18.78
##     FE.y  FE.y  APP.y APP.y   JP.y JP.y  AvP.y AvP.y  PL.y PL.y
## 1  2,043  7.28  3,587 12.79  1,679 5.99  1,228  4.38   465 1.67
## 2 27,911  8.51 26,140  7.97 25,113 7.66 32,851 10.01 8,191 2.50
## 3  2,772  8.82  2,193  6.98  1,628 5.18    845  2.69   223 0.71
## 4  8,240 11.30  2,473  3.39  3,813 5.23  2,435  3.34   552 0.76
## 5 15,708  9.94 16,434 10.40 11,655 7.37  2,868  1.81 3,636 2.30
## 6  3,014 10.56  1,301  4.56  1,733 6.07  1,675  5.87   855 3.00
final=merge(merg,venti, by=)