library(readxl)
variables <- read_excel("Base de datos RETO.xlsx",
sheet = "VARIABLES")
na_count <- colSums(is.na(variables))
print(na_count)
## edad_meses sexo escolaridad estrato violencia deprimido
## 0 0 8 0 0 440
## crecimiento castigo soledad soledadmenor
## 10 154 0 0
variables <- na.omit(variables)
print(variables)
## # A tibble: 393 × 10
## edad_meses sexo escolaridad estrato violencia deprimido crecimiento castigo
## <dbl> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <chr>
## 1 33 Hombre 0 Rural … 0 0 4 No
## 2 25 Hombre 12 Urbano… 21 0 0 No
## 3 35 Hombre 15 Urbano… 21 0 1 No
## 4 26 Hombre 15 Urbano… 4 0 5 No
## 5 30 Hombre 15 Urbano… 0 0 12 Sí
## 6 35 Hombre 15 Urbano… 4 0 10 No
## 7 31 Hombre 15 Metrop… 10 0 0 No
## 8 31 Hombre 12 Metrop… 19 0 4 No
## 9 25 Hombre 12 Metrop… 0 52 4 No
## 10 32 Hombre 0 Metrop… 8 0 0 No
## # ℹ 383 more rows
## # ℹ 2 more variables: soledad <dbl>, soledadmenor <dbl>
#Matriz de Correlación
cor_matrix <- cor(variables[, c("edad_meses", "escolaridad", "violencia", "deprimido", "crecimiento")])
print(cor_matrix)
## edad_meses escolaridad violencia deprimido crecimiento
## edad_meses 1.00000000 -0.01809124 -0.02381185 0.03037982 0.09759053
## escolaridad -0.01809124 1.00000000 0.02459147 -0.07958463 0.16591401
## violencia -0.02381185 0.02459147 1.00000000 0.04739506 0.02985501
## deprimido 0.03037982 -0.07958463 0.04739506 1.00000000 -0.08988854
## crecimiento 0.09759053 0.16591401 0.02985501 -0.08988854 1.00000000
interpret_correlation <- function(cor_value) {
if (cor_value > 0.8) {
return("Muy Fuerte")
} else if (cor_value > 0.6) {
return("Fuerte")
} else if (cor_value > 0.4) {
return("Moderado")
} else if (cor_value > 0.2) {
return("Debil")
} else {
return("Muy debil")
}
}
interpreted_matrix <- apply(cor_matrix, c(1, 2), interpret_correlation)
print(interpreted_matrix)
## edad_meses escolaridad violencia deprimido crecimiento
## edad_meses "Muy Fuerte" "Muy debil" "Muy debil" "Muy debil" "Muy debil"
## escolaridad "Muy debil" "Muy Fuerte" "Muy debil" "Muy debil" "Muy debil"
## violencia "Muy debil" "Muy debil" "Muy Fuerte" "Muy debil" "Muy debil"
## deprimido "Muy debil" "Muy debil" "Muy debil" "Muy Fuerte" "Muy debil"
## crecimiento "Muy debil" "Muy debil" "Muy debil" "Muy debil" "Muy Fuerte"
heatmap(cor_matrix,
annot = interpreted_matrix, # show the interpretations in each cell
cmap = "coolwarm", # color map
main = "Correlation Matrix with Interpretations")
## Warning in plot.window(...): "annot" is not a graphical parameter
## Warning in plot.window(...): "cmap" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "annot" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "cmap" is not a graphical parameter
## Warning in title(...): "annot" is not a graphical parameter
## Warning in title(...): "cmap" is not a graphical parameter
#Distribución de edades por genero
ggplot(variables, aes(x = sexo, y = edad_meses, fill = sexo)) +
geom_violin() +
labs(title = "Distribución de Edades Por Género")
#Estrato
bar_social_status <- ggplot(variables, aes(x = factor(estrato))) +
geom_bar() +
labs(title = "Bar Chart: Distribución de Estrato", x = "Estrato", y = "Conteo")
print(bar_social_status)
#Violencia por edad
bar_age_violence <- ggplot(variables, aes(x = edad_meses, y = violencia)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Grouped Bar Chart: Violencia por Edad", x = "Edad en meses", y = "Violencia")
print(bar_age_violence)
#Horas de depresión
hist_depression <- ggplot(variables, aes(x = deprimido)) +
geom_histogram(binwidth = 5, fill = "skyblue", color = "white", alpha = 0.7) +
labs(title = "Histogram: Depression Hours", x = "Depression Hours", y = "Count")
print(hist_depression)
#Horas de depresión por genero
box_gender_depression <- ggplot(variables, aes(x = sexo, y = deprimido, fill = sexo)) +
geom_boxplot() +
labs(title = "Box Plot: Depression Hours by Gender", x = "Gender", y = "Depression Hours")
print(box_gender_depression)
#Depression por castigo (si/no)
bar_punishment_depression <- ggplot(variables, aes(x = castigo, fill = factor(deprimido))) +
geom_bar(position = "fill") +
labs(title = "Stacked Bar Chart: Depression por castigo (si/no)", x = "Punishment", y = "Proportion")
print(bar_punishment_depression)
lm_model <- lm(deprimido ~ edad_meses + escolaridad + estrato + violencia + crecimiento + soledad + soledadmenor, data = variables)
summary(lm_model)
##
## Call:
## lm(formula = deprimido ~ edad_meses + escolaridad + estrato +
## violencia + crecimiento + soledad + soledadmenor, data = variables)
##
## Residuals:
## Min 1Q Median 3Q Max
## -44.60 -20.46 -14.78 -5.98 357.26
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.0060 19.4750 1.284 0.200
## edad_meses 0.2585 0.3362 0.769 0.443
## escolaridad -1.3529 1.0021 -1.350 0.178
## estratoRural ( <25 -20.3519 27.6223 -0.737 0.462
## estratoUrbano ( 250 4.5690 7.6478 0.597 0.551
## violencia 0.3931 0.4359 0.902 0.368
## crecimiento -1.1766 0.7598 -1.549 0.122
## soledad -0.2351 1.9669 -0.120 0.905
## soledadmenor -0.0347 1.6315 -0.021 0.983
##
## Residual standard error: 66.03 on 384 degrees of freedom
## Multiple R-squared: 0.01904, Adjusted R-squared: -0.001396
## F-statistic: 0.9317 on 8 and 384 DF, p-value: 0.49
summary_result <- summary(lm_model)
p_values <- summary_result$coefficients[, "Pr(>|t|)"]
alpha <- 0.05
for (i in seq_along(p_values)) {
if (p_values[i] < alpha) {
cat(names(p_values)[i], "is statistically significant (p-value <", alpha, ")\n")
} else {
cat(names(p_values)[i], "is not statistically significant (p-value >=", alpha, ")\n")
}
}
## (Intercept) is not statistically significant (p-value >= 0.05 )
## edad_meses is not statistically significant (p-value >= 0.05 )
## escolaridad is not statistically significant (p-value >= 0.05 )
## estratoRural ( <25 is not statistically significant (p-value >= 0.05 )
## estratoUrbano ( 250 is not statistically significant (p-value >= 0.05 )
## violencia is not statistically significant (p-value >= 0.05 )
## crecimiento is not statistically significant (p-value >= 0.05 )
## soledad is not statistically significant (p-value >= 0.05 )
## soledadmenor is not statistically significant (p-value >= 0.05 )
lm_model2 <- lm(violencia ~ edad_meses + escolaridad + estrato + deprimido + crecimiento + soledad + soledadmenor, data = variables)
summary(lm_model2)
##
## Call:
## lm(formula = violencia ~ edad_meses + escolaridad + estrato +
## deprimido + crecimiento + soledad + soledadmenor, data = variables)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.401 -5.907 -3.057 3.208 17.371
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.373743 2.259065 2.821 0.00503 **
## edad_meses -0.021207 0.039336 -0.539 0.59011
## escolaridad 0.098444 0.117364 0.839 0.40211
## estratoRural ( <25 -1.082388 3.232057 -0.335 0.73789
## estratoUrbano ( 250 2.241118 0.887433 2.525 0.01196 *
## deprimido 0.005377 0.005961 0.902 0.36768
## crecimiento 0.078810 0.089036 0.885 0.37663
## soledad -0.081812 0.229989 -0.356 0.72224
## soledadmenor -0.570494 0.188559 -3.026 0.00265 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.722 on 384 degrees of freedom
## Multiple R-squared: 0.0477, Adjusted R-squared: 0.02787
## F-statistic: 2.405 on 8 and 384 DF, p-value: 0.01532
summary_result2 <- summary(lm_model2)
p_values2 <- summary_result2$coefficients[, "Pr(>|t|)"]
alpha <- 0.05
for (i in seq_along(p_values2)) {
if (p_values2[i] < alpha) {
cat(names(p_values2)[i], "is statistically significant (p-value <", alpha, ")\n")
} else {
cat(names(p_values2)[i], "is not statistically significant (p-value >=", alpha, ")\n")
}
}
## (Intercept) is statistically significant (p-value < 0.05 )
## edad_meses is not statistically significant (p-value >= 0.05 )
## escolaridad is not statistically significant (p-value >= 0.05 )
## estratoRural ( <25 is not statistically significant (p-value >= 0.05 )
## estratoUrbano ( 250 is statistically significant (p-value < 0.05 )
## deprimido is not statistically significant (p-value >= 0.05 )
## crecimiento is not statistically significant (p-value >= 0.05 )
## soledad is not statistically significant (p-value >= 0.05 )
## soledadmenor is statistically significant (p-value < 0.05 )
lm_model3 <- lm(crecimiento ~ edad_meses + escolaridad + estrato + deprimido + violencia + soledad + soledadmenor, data = variables)
summary(lm_model3)
##
## Call:
## lm(formula = crecimiento ~ edad_meses + escolaridad + estrato +
## deprimido + violencia + soledad + soledadmenor, data = variables)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.216 -3.655 -1.153 3.684 19.579
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.670655 1.306359 0.513 0.60798
## edad_meses 0.045145 0.022413 2.014 0.04468 *
## escolaridad 0.207143 0.066425 3.118 0.00196 **
## estratoRural ( <25 0.511338 1.850660 0.276 0.78247
## estratoUrbano ( 250 -0.993561 0.509803 -1.949 0.05203 .
## deprimido -0.005275 0.003406 -1.549 0.12228
## violencia 0.025837 0.029189 0.885 0.37663
## soledad -0.181846 0.131379 -1.384 0.16712
## soledadmenor 0.060336 0.109199 0.553 0.58090
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.421 on 384 degrees of freedom
## Multiple R-squared: 0.05919, Adjusted R-squared: 0.03959
## F-statistic: 3.02 on 8 and 384 DF, p-value: 0.002662
summary_result3 <- summary(lm_model3)
p_values3 <- summary_result3$coefficients[, "Pr(>|t|)"]
alpha <- 0.05
for (i in seq_along(p_values3)) {
if (p_values3[i] < alpha) {
cat(names(p_values3)[i], "is statistically significant (p-value <", alpha, ")\n")
} else {
cat(names(p_values3)[i], "is not statistically significant (p-value >=", alpha, ")\n")
}
}
## (Intercept) is not statistically significant (p-value >= 0.05 )
## edad_meses is statistically significant (p-value < 0.05 )
## escolaridad is statistically significant (p-value < 0.05 )
## estratoRural ( <25 is not statistically significant (p-value >= 0.05 )
## estratoUrbano ( 250 is not statistically significant (p-value >= 0.05 )
## deprimido is not statistically significant (p-value >= 0.05 )
## violencia is not statistically significant (p-value >= 0.05 )
## soledad is not statistically significant (p-value >= 0.05 )
## soledadmenor is not statistically significant (p-value >= 0.05 )
multi_model <- lm(deprimido ~ edad_meses + escolaridad + estrato + violencia + crecimiento + soledad + soledadmenor, data = variables)
summary(multi_model)
##
## Call:
## lm(formula = deprimido ~ edad_meses + escolaridad + estrato +
## violencia + crecimiento + soledad + soledadmenor, data = variables)
##
## Residuals:
## Min 1Q Median 3Q Max
## -44.60 -20.46 -14.78 -5.98 357.26
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.0060 19.4750 1.284 0.200
## edad_meses 0.2585 0.3362 0.769 0.443
## escolaridad -1.3529 1.0021 -1.350 0.178
## estratoRural ( <25 -20.3519 27.6223 -0.737 0.462
## estratoUrbano ( 250 4.5690 7.6478 0.597 0.551
## violencia 0.3931 0.4359 0.902 0.368
## crecimiento -1.1766 0.7598 -1.549 0.122
## soledad -0.2351 1.9669 -0.120 0.905
## soledadmenor -0.0347 1.6315 -0.021 0.983
##
## Residual standard error: 66.03 on 384 degrees of freedom
## Multiple R-squared: 0.01904, Adjusted R-squared: -0.001396
## F-statistic: 0.9317 on 8 and 384 DF, p-value: 0.49
summary_result_multi <- summary(multi_model)
p_values_multi <- summary_result_multi$coefficients[, "Pr(>|t|)"]
alpha_multi <- 0.05
for (i in seq_along(p_values_multi)) {
if (p_values_multi[i] < alpha_multi) {
cat(names(p_values_multi)[i], "is statistically significant (p-value <", alpha_multi, ")\n")
} else {
cat(names(p_values_multi)[i], "is not statistically significant (p-value >=", alpha_multi, ")\n")
}
}
## (Intercept) is not statistically significant (p-value >= 0.05 )
## edad_meses is not statistically significant (p-value >= 0.05 )
## escolaridad is not statistically significant (p-value >= 0.05 )
## estratoRural ( <25 is not statistically significant (p-value >= 0.05 )
## estratoUrbano ( 250 is not statistically significant (p-value >= 0.05 )
## violencia is not statistically significant (p-value >= 0.05 )
## crecimiento is not statistically significant (p-value >= 0.05 )
## soledad is not statistically significant (p-value >= 0.05 )
## soledadmenor is not statistically significant (p-value >= 0.05 )
multi_model2 <- lm(violencia ~ edad_meses + escolaridad + estrato + deprimido + crecimiento + soledad + soledadmenor, data = variables)
summary(multi_model2)
##
## Call:
## lm(formula = violencia ~ edad_meses + escolaridad + estrato +
## deprimido + crecimiento + soledad + soledadmenor, data = variables)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.401 -5.907 -3.057 3.208 17.371
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.373743 2.259065 2.821 0.00503 **
## edad_meses -0.021207 0.039336 -0.539 0.59011
## escolaridad 0.098444 0.117364 0.839 0.40211
## estratoRural ( <25 -1.082388 3.232057 -0.335 0.73789
## estratoUrbano ( 250 2.241118 0.887433 2.525 0.01196 *
## deprimido 0.005377 0.005961 0.902 0.36768
## crecimiento 0.078810 0.089036 0.885 0.37663
## soledad -0.081812 0.229989 -0.356 0.72224
## soledadmenor -0.570494 0.188559 -3.026 0.00265 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.722 on 384 degrees of freedom
## Multiple R-squared: 0.0477, Adjusted R-squared: 0.02787
## F-statistic: 2.405 on 8 and 384 DF, p-value: 0.01532
summary_result_multi2 <- summary(multi_model2)
p_values_multi2 <- summary_result_multi2$coefficients[, "Pr(>|t|)"]
alpha_multi <- 0.05
for (i in seq_along(p_values_multi2)) {
if (p_values_multi2[i] < alpha_multi) {
cat(names(p_values_multi2)[i], "is statistically significant (p-value <", alpha_multi, ")\n")
} else {
cat(names(p_values_multi2)[i], "is not statistically significant (p-value >=", alpha_multi, ")\n")
}
}
## (Intercept) is statistically significant (p-value < 0.05 )
## edad_meses is not statistically significant (p-value >= 0.05 )
## escolaridad is not statistically significant (p-value >= 0.05 )
## estratoRural ( <25 is not statistically significant (p-value >= 0.05 )
## estratoUrbano ( 250 is statistically significant (p-value < 0.05 )
## deprimido is not statistically significant (p-value >= 0.05 )
## crecimiento is not statistically significant (p-value >= 0.05 )
## soledad is not statistically significant (p-value >= 0.05 )
## soledadmenor is statistically significant (p-value < 0.05 )
multi_model3 <- lm(crecimiento ~ edad_meses + escolaridad + estrato + deprimido + violencia + soledad + soledadmenor, data = variables)
summary(multi_model3)
##
## Call:
## lm(formula = crecimiento ~ edad_meses + escolaridad + estrato +
## deprimido + violencia + soledad + soledadmenor, data = variables)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.216 -3.655 -1.153 3.684 19.579
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.670655 1.306359 0.513 0.60798
## edad_meses 0.045145 0.022413 2.014 0.04468 *
## escolaridad 0.207143 0.066425 3.118 0.00196 **
## estratoRural ( <25 0.511338 1.850660 0.276 0.78247
## estratoUrbano ( 250 -0.993561 0.509803 -1.949 0.05203 .
## deprimido -0.005275 0.003406 -1.549 0.12228
## violencia 0.025837 0.029189 0.885 0.37663
## soledad -0.181846 0.131379 -1.384 0.16712
## soledadmenor 0.060336 0.109199 0.553 0.58090
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.421 on 384 degrees of freedom
## Multiple R-squared: 0.05919, Adjusted R-squared: 0.03959
## F-statistic: 3.02 on 8 and 384 DF, p-value: 0.002662
summary_result_multi3 <- summary(multi_model3)
p_values_multi3 <- summary_result_multi3$coefficients[, "Pr(>|t|)"]
alpha_multi <- 0.05
for (i in seq_along(p_values_multi3)) {
if (p_values_multi3[i] < alpha_multi) {
cat(names(p_values_multi3)[i], "is statistically significant (p-value <", alpha_multi, ")\n")
} else {
cat(names(p_values_multi3)[i], "is not statistically significant (p-value >=", alpha_multi, ")\n")
}
}
## (Intercept) is not statistically significant (p-value >= 0.05 )
## edad_meses is statistically significant (p-value < 0.05 )
## escolaridad is statistically significant (p-value < 0.05 )
## estratoRural ( <25 is not statistically significant (p-value >= 0.05 )
## estratoUrbano ( 250 is not statistically significant (p-value >= 0.05 )
## deprimido is not statistically significant (p-value >= 0.05 )
## violencia is not statistically significant (p-value >= 0.05 )
## soledad is not statistically significant (p-value >= 0.05 )
## soledadmenor is not statistically significant (p-value >= 0.05 )
anova_model <- aov(deprimido ~ edad_meses + escolaridad + estrato + violencia + crecimiento + soledad + soledadmenor, data = variables)
summary(anova_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## edad_meses 1 1575 1575 0.361 0.548
## escolaridad 1 10664 10664 2.446 0.119
## estrato 2 6543 3272 0.750 0.473
## violencia 1 3234 3234 0.742 0.390
## crecimiento 1 10404 10404 2.386 0.123
## soledad 1 72 72 0.017 0.897
## soledadmenor 1 2 2 0.000 0.983
## Residuals 384 1674185 4360
p_value_overall <- summary(anova_model)[[1]]$`Pr(>F)`[1]
cat("Overall p-value:", p_value_overall, "\n")
## Overall p-value: 0.5481478
anova_model2 <- aov(crecimiento ~ edad_meses + escolaridad + estrato + violencia + deprimido + soledad + soledadmenor, data = variables)
summary(anova_model2)
## Df Sum Sq Mean Sq F value Pr(>F)
## edad_meses 1 76 75.98 3.887 0.049371 *
## escolaridad 1 224 224.39 11.480 0.000776 ***
## estrato 2 73 36.66 1.875 0.154688
## violencia 1 13 13.30 0.680 0.409979
## deprimido 1 47 46.88 2.398 0.122276
## soledad 1 32 32.39 1.657 0.198776
## soledadmenor 1 6 5.97 0.305 0.580903
## Residuals 384 7506 19.55
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
p_value_overall2 <- summary(anova_model2)[[1]]$`Pr(>F)`[1]
cat("Overall p-value:", p_value_overall2, "\n")
## Overall p-value: 0.04937086
anova_model3 <- aov(violencia ~ edad_meses + escolaridad + estrato + crecimiento + deprimido + soledad + soledadmenor, data = variables)
summary(anova_model3)
## Df Sum Sq Mean Sq F value Pr(>F)
## edad_meses 1 14 13.6 0.229 0.63281
## escolaridad 1 14 14.0 0.235 0.62778
## estrato 2 399 199.3 3.342 0.03638 *
## crecimiento 1 41 41.3 0.693 0.40578
## deprimido 1 52 52.4 0.879 0.34899
## soledad 1 81 81.2 1.362 0.24400
## soledadmenor 1 546 545.8 9.154 0.00265 **
## Residuals 384 22896 59.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
p_value_overall3 <- summary(anova_model3)[[1]]$`Pr(>F)`[1]
cat("Overall p-value:", p_value_overall3, "\n")
## Overall p-value: 0.6328086
anova_model <- aov(violencia ~ edad_meses + escolaridad + estrato + soledad + soledadmenor, data = variables)
summary(anova_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## edad_meses 1 14 13.6 0.229 0.63258
## escolaridad 1 14 14.0 0.236 0.62755
## estrato 2 399 199.3 3.347 0.03622 *
## soledad 1 90 90.1 1.513 0.21946
## soledadmenor 1 542 542.2 9.106 0.00272 **
## Residuals 386 22984 59.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Histograma
hist(variables$deprimido, breaks = 20, col = "skyblue", main = "Histograma de deprimido")
abline(v = median(variables$deprimido), col = "red", lty = 2)
table(variables$deprimido)
##
## 0 12 52 365
## 339 13 28 13
variables$binary_deprimido <- ifelse(variables$deprimido > 365, 1, 0)
table(variables$binary_deprimido)
##
## 0
## 393
logistic_model_deprimido <- glm(binary_deprimido ~ edad_meses + escolaridad + estrato + violencia + crecimiento + soledad + soledadmenor,
data = variables,
family = binomial)
## Warning: glm.fit: algorithm did not converge
summary(logistic_model_deprimido)
##
## Call:
## glm(formula = binary_deprimido ~ edad_meses + escolaridad + estrato +
## violencia + crecimiento + soledad + soledadmenor, family = binomial,
## data = variables)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.409e-06 -2.409e-06 -2.409e-06 -2.409e-06 -2.409e-06
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.657e+01 1.050e+05 0 1
## edad_meses -1.547e-15 1.813e+03 0 1
## escolaridad -1.180e-14 5.405e+03 0 1
## estratoRural ( <25 1.019e-12 1.490e+05 0 1
## estratoUrbano ( 250 4.711e-16 4.125e+04 0 1
## violencia -1.174e-15 2.351e+03 0 1
## crecimiento 1.461e-15 4.098e+03 0 1
## soledad 1.221e-15 1.061e+04 0 1
## soledadmenor -2.130e-15 8.799e+03 0 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 0.00e+00 on 392 degrees of freedom
## Residual deviance: 2.28e-09 on 384 degrees of freedom
## AIC: 18
##
## Number of Fisher Scoring iterations: 25
#Interpretación de Logistic Regression
summary_result <- summary(logistic_model_deprimido)
# Interpretation of Null Deviance
cat("Null Deviance:\n")
## Null Deviance:
cat("The null deviance measures how well the model explains the response variable without any predictors.\n")
## The null deviance measures how well the model explains the response variable without any predictors.
cat("A smaller null deviance indicates that the model is better at explaining the outcome than a null model with no predictors.\n")
## A smaller null deviance indicates that the model is better at explaining the outcome than a null model with no predictors.
cat("Null Deviance:", summary_result$null.deviance, "on", summary_result$df.null, "degrees of freedom\n\n")
## Null Deviance: 0 on 392 degrees of freedom
# Interpretation of Residual Deviance
cat("Residual Deviance:\n")
## Residual Deviance:
cat("The residual deviance measures how well the model explains the response variable after including predictors.\n")
## The residual deviance measures how well the model explains the response variable after including predictors.
cat("A smaller residual deviance indicates a better fit of the model.\n")
## A smaller residual deviance indicates a better fit of the model.
cat("Residual Deviance:", summary_result$deviance, "on", summary_result$df.residual, "degrees of freedom\n\n")
## Residual Deviance: 2.280022e-09 on 384 degrees of freedom
# Interpretation of AIC
cat("AIC (Akaike Information Criterion):\n")
## AIC (Akaike Information Criterion):
cat("AIC is a measure of the model's goodness of fit, balancing the fit of the model with its complexity (number of parameters).\n")
## AIC is a measure of the model's goodness of fit, balancing the fit of the model with its complexity (number of parameters).
cat("A lower AIC indicates a better-fitting model.\n")
## A lower AIC indicates a better-fitting model.
cat("AIC:", summary_result$aic, "\n\n")
## AIC: 18
# Number of Fisher Scoring Iterations
cat("Number of Fisher Scoring Iterations:\n")
## Number of Fisher Scoring Iterations:
cat("The number of Fisher scoring iterations indicates how many iterations were performed by the optimization algorithm to find the maximum likelihood estimates of the model parameters.\n")
## The number of Fisher scoring iterations indicates how many iterations were performed by the optimization algorithm to find the maximum likelihood estimates of the model parameters.
cat("Number of Fisher Scoring Iterations:", summary_result$iter, "\n")
## Number of Fisher Scoring Iterations: 25
#Modelo Predictivo - DEPRIMIDO
print(paste("Modelo Predictivo - DEPRIMIDO"))
## [1] "Modelo Predictivo - DEPRIMIDO"
df_cleaned <- variables[, c("deprimido", "edad_meses", "escolaridad", "estrato", "soledad", "soledadmenor", "violencia", "crecimiento")]
df_cleaned <- df_cleaned[complete.cases(df_cleaned), ]
set.seed(123)
split_index <- sample(1:nrow(df_cleaned), 0.8 * nrow(df_cleaned))
train_data <- df_cleaned[split_index, ]
test_data <- df_cleaned[-split_index, ]
lm_model <- lm(deprimido ~ edad_meses + escolaridad + estrato + violencia + crecimiento, data = train_data)
predictions <- predict(lm_model, newdata = test_data)
mse <- mean((test_data$deprimido - predictions)^2)
r_squared <- summary(lm_model)$r.squared
adjusted_r_squared <- summary(lm_model)$adj.r.squared
mae <- mean(abs(test_data$deprimido - predictions))
rmse <- sqrt(mse)
print(paste("Model Evaluation Metrics:"))
## [1] "Model Evaluation Metrics:"
print(paste("Mean Squared Error (MSE): ", mse))
## [1] "Mean Squared Error (MSE): 3515.18060247854"
print(paste("Mean Absolute Error (MAE): ", mae))
## [1] "Mean Absolute Error (MAE): 26.7148500999942"
print(paste("Root Mean Squared Error (RMSE): ", rmse))
## [1] "Root Mean Squared Error (RMSE): 59.288958520778"
print(paste("R-squared: ", r_squared))
## [1] "R-squared: 0.0230286612368626"
print(paste("Adjusted R-squared: ", adjusted_r_squared))
## [1] "Adjusted R-squared: 0.00393475885061223"
if (mse == 0) {
print("The model perfectly predicts the 'deprimido' variable.")
} else {
print(paste("En promedio, las predicciones del modelo están equivocadas por aproximadamente", round(sqrt(mse), 2), "unidades en la escala de 'deprimido.'"))
}
## [1] "En promedio, las predicciones del modelo están equivocadas por aproximadamente 59.29 unidades en la escala de 'deprimido.'"
if (rmse < 10) {
cat("El RMSE es bajo, sugiriendo que el modelo tiene un buen ajuste.\n")
} else {
cat("El RMSE es alto, indicando que el modelo puede tener limitaciones.\n")
}
## El RMSE es alto, indicando que el modelo puede tener limitaciones.
if (r_squared > 0.5) {
cat("El R-squared es alto, indicando una buena proporción de varianza explicada por el modelo.\n")
} else {
cat("El R-squared is bajo, lo que sugiere que el modelo puede no capturar gran parte de la varianza en los datos.\n")
}
## El R-squared is bajo, lo que sugiere que el modelo puede no capturar gran parte de la varianza en los datos.
if (adjusted_r_squared > 0.5) {
cat("El Adjusted R-squared es alto, lo cual es favorable al considerar el número de predictores.\n")
} else {
cat("El Adjusted R-squared es bajo, sugiriendo que predictores adicionales pueden mejorar el modelo.\n")
}
## El Adjusted R-squared es bajo, sugiriendo que predictores adicionales pueden mejorar el modelo.
#Modelo Predictivo - VIOLENCIA
print(paste("Modelo Predictivo - VIOLENCIA"))
## [1] "Modelo Predictivo - VIOLENCIA"
df_cleaned <- variables[, c("deprimido", "edad_meses", "escolaridad", "estrato", "soledad", "soledadmenor", "violencia", "crecimiento")]
df_cleaned <- df_cleaned[complete.cases(df_cleaned), ]
set.seed(123)
split_index <- sample(1:nrow(df_cleaned), 0.8 * nrow(df_cleaned))
train_data <- df_cleaned[split_index, ]
test_data <- df_cleaned[-split_index, ]
lm_model2 <- lm(violencia ~ edad_meses + escolaridad + estrato + deprimido + crecimiento + soledad + soledadmenor, data = train_data)
predictions2 <- predict(lm_model2, newdata = test_data)
mse <- mean((test_data$violencia - predictions2)^2)
r_squared2 <- summary(lm_model2)$r.squared
adjusted_r_squared2 <- summary(lm_model2)$adj.r.squared
mae2 <- mean(abs(test_data$violencia - predictions2))
rmse2 <- sqrt(mae2)
print(paste("Model Evaluation Metrics:"))
## [1] "Model Evaluation Metrics:"
print(paste("Mean Squared Error (MSE): ", mse))
## [1] "Mean Squared Error (MSE): 55.7045582737328"
print(paste("Mean Absolute Error (MAE): ", mae))
## [1] "Mean Absolute Error (MAE): 26.7148500999942"
print(paste("Root Mean Squared Error (RMSE): ", rmse))
## [1] "Root Mean Squared Error (RMSE): 59.288958520778"
print(paste("R-squared: ", r_squared2))
## [1] "R-squared: 0.0472922481856356"
print(paste("Adjusted R-squared: ", adjusted_r_squared2))
## [1] "Adjusted R-squared: 0.0223031924003408"
if (mse == 0) {
print("The model perfectly predicts the 'violencia' variable.")
} else {
print(paste("En promedio, las predicciones del modelo están equivocadas por aproximadamente", round(sqrt(mse), 2), "unidades en la escala de 'violencia'"))
}
## [1] "En promedio, las predicciones del modelo están equivocadas por aproximadamente 7.46 unidades en la escala de 'violencia'"
if (rmse < 10) {
cat("El RMSE es bajo, sugiriendo que el modelo tiene un buen ajuste.\n")
} else {
cat("El RMSE es alto, indicando que el modelo puede tener limitaciones.\n")
}
## El RMSE es alto, indicando que el modelo puede tener limitaciones.
if (r_squared > 0.5) {
cat("El R-squared es alto, indicando una buena proporción de varianza explicada por el modelo.\n")
} else {
cat("El R-squared is bajo, lo que sugiere que el modelo puede no capturar gran parte de la varianza en los datos.\n")
}
## El R-squared is bajo, lo que sugiere que el modelo puede no capturar gran parte de la varianza en los datos.
if (adjusted_r_squared > 0.5) {
cat("El Adjusted R-squared es alto, lo cual es favorable al considerar el número de predictores.\n")
} else {
cat("El Adjusted R-squared es bajo, sugiriendo que predictores adicionales pueden mejorar el modelo.\n")
}
## El Adjusted R-squared es bajo, sugiriendo que predictores adicionales pueden mejorar el modelo.
#Modelo Predictivo - CRECIMIENTO
print(paste("Modelo Predictivo - CRECIMIENTO"))
## [1] "Modelo Predictivo - CRECIMIENTO"
df_cleaned <- variables[, c("deprimido", "edad_meses", "escolaridad", "estrato", "soledad", "soledadmenor", "violencia", "crecimiento")]
df_cleaned <- df_cleaned[complete.cases(df_cleaned), ]
set.seed(123)
split_index <- sample(1:nrow(df_cleaned), 0.8 * nrow(df_cleaned))
train_data <- df_cleaned[split_index, ]
test_data <- df_cleaned[-split_index, ]
lm_model3 <- lm(crecimiento ~ edad_meses + escolaridad + estrato + deprimido + violencia + soledad + soledadmenor, data = train_data)
predictions3 <- predict(lm_model3, newdata = test_data)
mse <- mean((test_data$crecimiento - predictions3)^2)
r_squared3 <- summary(lm_model3)$r.squared
adjusted_r_squared3 <- summary(lm_model3)$adj.r.squared
mae3 <- mean(abs(test_data$crecimiento - predictions3))
rmse3 <- sqrt(mae3)
print(paste("Model Evaluation Metrics:"))
## [1] "Model Evaluation Metrics:"
print(paste("Mean Squared Error (MSE): ", mse))
## [1] "Mean Squared Error (MSE): 16.5057489199769"
print(paste("Mean Absolute Error (MAE): ", mae))
## [1] "Mean Absolute Error (MAE): 26.7148500999942"
print(paste("Root Mean Squared Error (RMSE): ", rmse))
## [1] "Root Mean Squared Error (RMSE): 59.288958520778"
print(paste("R-squared: ", r_squared3))
## [1] "R-squared: 0.0620773525638316"
print(paste("Adjusted R-squared: ", adjusted_r_squared3))
## [1] "Adjusted R-squared: 0.0374761027950142"
if (mse == 0) {
print("The model perfectly predicts the 'violencia' variable.")
} else {
print(paste("En promedio, las predicciones del modelo están equivocadas por aproximadamente", round(sqrt(mse), 2), "unidades en la escala de 'violencia'"))
}
## [1] "En promedio, las predicciones del modelo están equivocadas por aproximadamente 4.06 unidades en la escala de 'violencia'"
if (rmse < 10) {
cat("El RMSE es bajo, sugiriendo que el modelo tiene un buen ajuste.\n")
} else {
cat("El RMSE es alto, indicando que el modelo puede tener limitaciones.\n")
}
## El RMSE es alto, indicando que el modelo puede tener limitaciones.
if (r_squared > 0.5) {
cat("El R-squared es alto, indicando una buena proporción de varianza explicada por el modelo.\n")
} else {
cat("El R-squared is bajo, lo que sugiere que el modelo puede no capturar gran parte de la varianza en los datos.\n")
}
## El R-squared is bajo, lo que sugiere que el modelo puede no capturar gran parte de la varianza en los datos.
if (adjusted_r_squared > 0.5) {
cat("El Adjusted R-squared es alto, lo cual es favorable al considerar el número de predictores.\n")
} else {
cat("El Adjusted R-squared es bajo, sugiriendo que predictores adicionales pueden mejorar el modelo.\n")
}
## El Adjusted R-squared es bajo, sugiriendo que predictores adicionales pueden mejorar el modelo.