rm(list = ls())
library(rio)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library (dplyr)
data1= import("dataPeru.xlsx")
str(data1)
## 'data.frame': 25 obs. of 8 variables:
## $ DEPARTAMENTO : chr "AMAZONAS" "ÁNCASH" "APURÍMAC" "AREQUIPA" ...
## $ UBIGEO : chr "010000" "020000" "030000" "040000" ...
## $ buenEstado : num 18.6 13.9 8.7 27.4 17 18 33.8 11.9 10.1 15.6 ...
## $ contribuyentesSunat: num 75035 302906 103981 585628 151191 ...
## $ peaOcupada : num 130019 387976 140341 645001 235857 ...
## $ pobUrbana : num 205976 806065 243354 1383694 444473 ...
## $ PobRural : num 211389 333050 180905 76739 206467 ...
## $ pobTotal : num 417365 1139115 424259 1460433 650940 ...
data1=data1[complete.cases(data1),]
modelo1=formula(buenEstado~contribuyentesSunat+peaOcupada)
reg1=lm(modelo1,data=data1)
summary(reg1)
##
## Call:
## lm(formula = modelo1, data = data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.589 -3.966 -1.347 1.907 21.518
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.865e+01 2.694e+00 6.922 5.98e-07 ***
## contribuyentesSunat 1.786e-05 2.060e-05 0.867 0.395
## peaOcupada -1.596e-05 2.241e-05 -0.712 0.484
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.925 on 22 degrees of freedom
## Multiple R-squared: 0.1561, Adjusted R-squared: 0.07939
## F-statistic: 2.035 on 2 and 22 DF, p-value: 0.1546
modelo2=formula(peaOcupada~contribuyentesSunat+buenEstado)
reg2=lm(modelo2,data=data1)
summary(reg2)
##
## Call:
## lm(formula = modelo2, data = data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -91867 -58573 -11166 46174 155851
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.155e+05 3.787e+04 3.049 0.00588 **
## contribuyentesSunat 9.206e-01 1.741e-02 52.872 < 2e-16 ***
## buenEstado -1.412e+03 1.983e+03 -0.712 0.48395
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 74540 on 22 degrees of freedom
## Multiple R-squared: 0.9932, Adjusted R-squared: 0.9926
## F-statistic: 1603 on 2 and 22 DF, p-value: < 2.2e-16
data1$buenEstado <- data1$buenEstado / 100
data1$logit_buenEstado <- log(data1$buenEstado / (1 - data1$buenEstado))
modelox=formula(logit_buenEstado~contribuyentesSunat+peaOcupada)
regx=lm(modelox,data=data1)
summary(regx)
##
## Call:
## lm(formula = modelox, data = data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.87418 -0.22370 0.01662 0.18746 1.18092
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.591e+00 1.788e-01 -8.897 9.67e-09 ***
## contribuyentesSunat 7.815e-07 1.367e-06 0.572 0.573
## peaOcupada -6.339e-07 1.487e-06 -0.426 0.674
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.526 on 22 degrees of freedom
## Multiple R-squared: 0.1296, Adjusted R-squared: 0.05045
## F-statistic: 1.638 on 2 and 22 DF, p-value: 0.2173