Pregunta 1
library(rio)
datap=import("https://docs.google.com/spreadsheets/d/1v-Kj_zP99PTsMnavtPVYhQ86saCo_rUh6gk_qUEYHFw/edit?gid=905833369#gid=905833369")
head(datap)
## DEPARTAMENTO UBIGEO buenEstado contribuyentesSunat peaOcupada pobUrbana
## 1 AMAZONAS 10000 18.6 75035 130019 205976
## 2 ÁNCASH 20000 13.9 302906 387976 806065
## 3 APURÍMAC 30000 8.7 103981 140341 243354
## 4 AREQUIPA 40000 27.4 585628 645001 1383694
## 5 AYACUCHO 50000 17.0 151191 235857 444473
## 6 CAJAMARCA 60000 18.0 277457 461312 567141
## PobRural pobTotal
## 1 211389 417365
## 2 333050 1139115
## 3 180905 424259
## 4 76739 1460433
## 5 206467 650940
## 6 860386 1427527
“buenEstado”: Porcentaje de locales escolares en buen estado
“contribuyentesSunat”: Cantidad de contribuyentes a la SUNAT (PEA)
“peaOcupada”: Cantidad de PEA ocupada
str(datap$buenEstado)
## num [1:25] 18.6 13.9 8.7 27.4 17 18 33.8 11.9 10.1 15.6 ...
str(datap$contribuyentesSunat)
## int [1:25] 75035 302906 103981 585628 151191 277457 499257 466883 80353 185658 ...
datap$pcon=datap$contribuyentesSunat/datap$pobTotal *100
datap$ppea=datap$peaOcupada/datap$pobTotal*100
modelo1=lm(datap$buenEstado~datap$pcon+datap$ppea,data=datap)
summary(modelo1)
##
## Call:
## lm(formula = datap$buenEstado ~ datap$pcon + datap$ppea, data = datap)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.0928 -4.3610 0.2575 4.4003 11.0196
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -22.6095 15.9617 -1.416 0.171
## datap$pcon 0.1003 0.3121 0.321 0.751
## datap$ppea 1.0218 0.6424 1.590 0.126
##
## Residual standard error: 6.299 on 22 degrees of freedom
## Multiple R-squared: 0.4669, Adjusted R-squared: 0.4184
## F-statistic: 9.633 on 2 and 22 DF, p-value: 0.000989
Respuesta: El p valor es menos a 0.05, lo que le da un nivel de significancia. Se puede rechazar la hipótesis nula y afirmar que las variables tienen efecto en la variable dependiente.
Pregunta 2: Al querer probar la hipotesis que la cantidad de PEA ocupada dependen de la cantidad de contribuyentes a la SUNAT ; y del porcentaje de locales escolares en buen estado; se llega a comprobar que (con una significancia del 0.05):
modelo2=lm(datap$peaOcupada~datap$contribuyentesSunat+datap$buenEstado)
summary(modelo2)
##
## Call:
## lm(formula = datap$peaOcupada ~ datap$contribuyentesSunat + datap$buenEstado)
##
## Residuals:
## Min 1Q Median 3Q Max
## -91867 -58573 -11166 46174 155851
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.155e+05 3.787e+04 3.049 0.00588 **
## datap$contribuyentesSunat 9.206e-01 1.741e-02 52.872 < 2e-16 ***
## datap$buenEstado -1.412e+03 1.983e+03 -0.712 0.48395
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 74540 on 22 degrees of freedom
## Multiple R-squared: 0.9932, Adjusted R-squared: 0.9926
## F-statistic: 1603 on 2 and 22 DF, p-value: < 2.2e-16
PARTE 2:
library(rvest)
link = "https://es.wikipedia.org/wiki/Elecciones_municipales_de_Lima_de_2022"
path = '//*[@id="mw-content-text"]/div[1]/table[9]'
dataelec <- read_html(link)%>%html_nodes(xpath = path)%>%html_table()%>% .[[1]]
head(dataelec)
## # A tibble: 6 × 17
## Distrito RP RP PP PP SP SP FE FE APP APP JP
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Distrito "" "" "" "" "" "" "" "" "" "" ""
## 2 Distrito "V" "%" "V" "%" "V" "%" "V" "%" "V" "%" "V"
## 3 Ancón "3,725" "13.2… "9,3… "33.… "5,9… "21.… "2,0… "7.2… "3,5… "12.… "1,6…
## 4 Ate "57,374" "17.4… "98,… "29.… "52,… "15.… "27,… "8.5… "26,… "7.9… "25,…
## 5 Barranco "11,604" "36.9… "5,7… "18.… "6,4… "20.… "2,7… "8.8… "2,1… "6.9… "1,6…
## 6 Breña "22,721" "31.1… "18,… "25.… "14,… "19.… "8,2… "11.… "2,4… "3.3… "3,8…
## # ℹ 5 more variables: JP <chr>, AvP <chr>, AvP <chr>, PL <chr>, PL <chr>
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
dataelec=dataelec[-c(1,2,46),]
head(dataelec)
## # A tibble: 6 × 17
## Distrito RP RP PP PP SP SP FE FE APP APP JP
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Ancón 3,725 13.28 9,332 33.27 5,987 21.35 2,043 7.28 3,587 12.79 1,679
## 2 Ate 57,374 17.49 98,373 29.99 52,0… 15.87 27,9… 8.51 26,1… 7.97 25,1…
## 3 Barranco 11,604 36.92 5,766 18.34 6,401 20.37 2,772 8.82 2,193 6.98 1,628
## 4 Breña 22,721 31.15 18,676 25.61 14,0… 19.23 8,240 11.30 2,473 3.39 3,813
## 5 Carabayllo 30,418 19.24 43,980 27.82 33,3… 21.12 15,7… 9.94 16,4… 10.40 11,6…
## 6 Chaclacayo 8,111 28.42 6,491 22.74 5,360 18.78 3,014 10.56 1,301 4.56 1,733
## # ℹ 5 more variables: JP <chr>, AvP <chr>, AvP <chr>, PL <chr>, PL <chr>
resi=import("https://docs.google.com/spreadsheets/d/1wIkWzWfhbF_KyufWyhjeFxLNO3h6f-oa/edit?gid=1754805882#gid=1754805882")
head(resi)
## V1 V2 V3 V4 V5
## 1 Residuos municipales generados anualmente
## 2 FECHA_CORTE N_SEC UBIGEO REG_NAT DEPARTAMENTO
## 3 20230614 1 10101 SELVA AMAZONAS
## 4 20230614 2 10102 SELVA AMAZONAS
## 5 20230614 3 10103 SIERRA AMAZONAS
## 6 20230614 4 10104 SIERRA AMAZONAS
## V6 V7 V8 V9 V10 V11 V12
## 1
## 2 PROVINCIA DISTRITO POB_TOTAL POB_URBANA POB_RURAL GPC_DOM QRESIDUOS_DOM
## 3 CHACHAPOYAS CHACHAPOYAS 28423 27548 875 0,48 4857,5
## 4 CHACHAPOYAS ASUNCION 291 151 140 0,61 33,56
## 5 CHACHAPOYAS BALSAS 1615 299 1316 0,45 48,96
## 6 CHACHAPOYAS CHETO 597 388 209 0,45 63,59
## V13 V14 V15
## 1
## 2 QRESIDUOS_NO_DOM QRESIDUOS_MUN PERIODO
## 3 2081,78 6939,28 2014
## 4 14,38 47,95 2014
## 5 20,98 69,95 2014
## 6 27,25 90,84 2014
resi=resi[-c(1,2),]
head(resi)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10
## 3 20230614 1 10101 SELVA AMAZONAS CHACHAPOYAS CHACHAPOYAS 28423 27548 875
## 4 20230614 2 10102 SELVA AMAZONAS CHACHAPOYAS ASUNCION 291 151 140
## 5 20230614 3 10103 SIERRA AMAZONAS CHACHAPOYAS BALSAS 1615 299 1316
## 6 20230614 4 10104 SIERRA AMAZONAS CHACHAPOYAS CHETO 597 388 209
## 7 20230614 5 10105 SIERRA AMAZONAS CHACHAPOYAS CHILIQUIN 737 197 540
## 8 20230614 6 10106 SIERRA AMAZONAS CHACHAPOYAS CHUQUIBAMBA 2096 630 1466
## V11 V12 V13 V14 V15
## 3 0,48 4857,5 2081,78 6939,28 2014
## 4 0,61 33,56 14,38 47,95 2014
## 5 0,45 48,96 20,98 69,95 2014
## 6 0,45 63,59 27,25 90,84 2014
## 7 0,45 32,38 13,88 46,26 2014
## 8 0,45 103,25 44,25 147,5 2014
venti=resi[resi$V15==2021,]
head(venti)
## V1 V2 V3 V4 V5 V6 V7 V8 V9
## 13091 20230614 13089 10101 SELVA AMAZONAS CHACHAPOYAS CHACHAPOYAS 39691 38800
## 13092 20230614 13090 10102 SELVA AMAZONAS CHACHAPOYAS ASUNCION 277 135
## 13093 20230614 13091 10103 SIERRA AMAZONAS CHACHAPOYAS BALSAS 1169 296
## 13094 20230614 13092 10104 SIERRA AMAZONAS CHACHAPOYAS CHETO 705 452
## 13095 20230614 13093 10105 SIERRA AMAZONAS CHACHAPOYAS CHILIQUIN 581 156
## 13096 20230614 13094 10106 SIERRA AMAZONAS CHACHAPOYAS CHUQUIBAMBA 1922 464
## V10 V11 V12 V13 V14 V15
## 13091 891 0,39 5523,18 2367,08 7890,26 2021
## 13092 142 0,51 25,33 10,86 36,19 2021
## 13093 873 0,44 47,54 20,37 67,91 2021
## 13094 253 0,44 72,59 31,11 103,7 2021
## 13095 425 0,44 25,05 10,74 35,79 2021
## 13096 1458 0,44 74,52 31,94 106,45 2021
venti=venti[,-c(1,2,3,4)]
head(venti)
## V5 V6 V7 V8 V9 V10 V11 V12 V13
## 13091 AMAZONAS CHACHAPOYAS CHACHAPOYAS 39691 38800 891 0,39 5523,18 2367,08
## 13092 AMAZONAS CHACHAPOYAS ASUNCION 277 135 142 0,51 25,33 10,86
## 13093 AMAZONAS CHACHAPOYAS BALSAS 1169 296 873 0,44 47,54 20,37
## 13094 AMAZONAS CHACHAPOYAS CHETO 705 452 253 0,44 72,59 31,11
## 13095 AMAZONAS CHACHAPOYAS CHILIQUIN 581 156 425 0,44 25,05 10,74
## 13096 AMAZONAS CHACHAPOYAS CHUQUIBAMBA 1922 464 1458 0,44 74,52 31,94
## V14 V15
## 13091 7890,26 2021
## 13092 36,19 2021
## 13093 67,91 2021
## 13094 103,7 2021
## 13095 35,79 2021
## 13096 106,45 2021
link <- "https://es.wikipedia.org/wiki/Elecciones_municipales_de_Lima_de_2022"
webpage <- read_html(link)
distri <- webpage %>%
html_node(xpath = '//*[@id="mw-content-text"]/div[1]/table[9]') %>%
html_table()
head(distri)
## # A tibble: 6 × 17
## Distrito RP RP PP PP SP SP FE FE APP APP JP
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Distrito "" "" "" "" "" "" "" "" "" "" ""
## 2 Distrito "V" "%" "V" "%" "V" "%" "V" "%" "V" "%" "V"
## 3 Ancón "3,725" "13.2… "9,3… "33.… "5,9… "21.… "2,0… "7.2… "3,5… "12.… "1,6…
## 4 Ate "57,374" "17.4… "98,… "29.… "52,… "15.… "27,… "8.5… "26,… "7.9… "25,…
## 5 Barranco "11,604" "36.9… "5,7… "18.… "6,4… "20.… "2,7… "8.8… "2,1… "6.9… "1,6…
## 6 Breña "22,721" "31.1… "18,… "25.… "14,… "19.… "8,2… "11.… "2,4… "3.3… "3,8…
## # ℹ 5 more variables: JP <chr>, AvP <chr>, AvP <chr>, PL <chr>, PL <chr>
distri=distri[-c(1,2,46),]
head(distri)
## # A tibble: 6 × 17
## Distrito RP RP PP PP SP SP FE FE APP APP JP
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Ancón 3,725 13.28 9,332 33.27 5,987 21.35 2,043 7.28 3,587 12.79 1,679
## 2 Ate 57,374 17.49 98,373 29.99 52,0… 15.87 27,9… 8.51 26,1… 7.97 25,1…
## 3 Barranco 11,604 36.92 5,766 18.34 6,401 20.37 2,772 8.82 2,193 6.98 1,628
## 4 Breña 22,721 31.15 18,676 25.61 14,0… 19.23 8,240 11.30 2,473 3.39 3,813
## 5 Carabayllo 30,418 19.24 43,980 27.82 33,3… 21.12 15,7… 9.94 16,4… 10.40 11,6…
## 6 Chaclacayo 8,111 28.42 6,491 22.74 5,360 18.78 3,014 10.56 1,301 4.56 1,733
## # ℹ 5 more variables: JP <chr>, AvP <chr>, AvP <chr>, PL <chr>, PL <chr>
merg=merge(dataelec,distri, by="Distrito")
## Warning in merge.data.frame(dataelec, distri, by = "Distrito"): column names
## 'RP.x', 'PP.x', 'SP.x', 'FE.x', 'APP.x', 'JP.x', 'AvP.x', 'PL.x', 'RP.y',
## 'PP.y', 'SP.y', 'FE.y', 'APP.y', 'JP.y', 'AvP.y', 'PL.y' are duplicated in the
## result
head(merg)
## Distrito RP.x RP.x PP.x PP.x SP.x SP.x FE.x FE.x APP.x APP.x
## 1 Ancón 3,725 13.28 9,332 33.27 5,987 21.35 2,043 7.28 3,587 12.79
## 2 Ate 57,374 17.49 98,373 29.99 52,069 15.87 27,911 8.51 26,140 7.97
## 3 Barranco 11,604 36.92 5,766 18.34 6,401 20.37 2,772 8.82 2,193 6.98
## 4 Breña 22,721 31.15 18,676 25.61 14,022 19.23 8,240 11.30 2,473 3.39
## 5 Carabayllo 30,418 19.24 43,980 27.82 33,394 21.12 15,708 9.94 16,434 10.40
## 6 Chaclacayo 8,111 28.42 6,491 22.74 5,360 18.78 3,014 10.56 1,301 4.56
## JP.x JP.x AvP.x AvP.x PL.x PL.x RP.y RP.y PP.y PP.y SP.y SP.y
## 1 1,679 5.99 1,228 4.38 465 1.67 3,725 13.28 9,332 33.27 5,987 21.35
## 2 25,113 7.66 32,851 10.01 8,191 2.50 57,374 17.49 98,373 29.99 52,069 15.87
## 3 1,628 5.18 845 2.69 223 0.71 11,604 36.92 5,766 18.34 6,401 20.37
## 4 3,813 5.23 2,435 3.34 552 0.76 22,721 31.15 18,676 25.61 14,022 19.23
## 5 11,655 7.37 2,868 1.81 3,636 2.30 30,418 19.24 43,980 27.82 33,394 21.12
## 6 1,733 6.07 1,675 5.87 855 3.00 8,111 28.42 6,491 22.74 5,360 18.78
## FE.y FE.y APP.y APP.y JP.y JP.y AvP.y AvP.y PL.y PL.y
## 1 2,043 7.28 3,587 12.79 1,679 5.99 1,228 4.38 465 1.67
## 2 27,911 8.51 26,140 7.97 25,113 7.66 32,851 10.01 8,191 2.50
## 3 2,772 8.82 2,193 6.98 1,628 5.18 845 2.69 223 0.71
## 4 8,240 11.30 2,473 3.39 3,813 5.23 2,435 3.34 552 0.76
## 5 15,708 9.94 16,434 10.40 11,655 7.37 2,868 1.81 3,636 2.30
## 6 3,014 10.56 1,301 4.56 1,733 6.07 1,675 5.87 855 3.00
final=merge(merg,venti, by=)