title: “Suelos supresivos” author: “Juan Guillermo Narvaez Diaz” date: ‘2022-06-04’ output: html_document —
#Ejercicio Practico Suelos supresivos ##librerias Utilizadas
library(grid)
library(readr)
library(ggplot2)
library(vcd)
library(rpart)
library(rpart.plot)
datos<- read.csv("D:\\Escritorio\\enfermedades R\\DatosSS.csv")
elementos<- data.frame(pH=datos$pH, Densidad=datos$densidad, Fosforo=datos$fosforo, Nitrogeno=datos$nitrogeno, Carbono=datos$carbono, DHR=datos$DHR,Enfermedad=as.factor(datos$enfermedad))
head(datos)
## pH densidad fosforo nitrogeno carbono DHR enfermedad
## 1 7.243010 2.509729 4.046360 295.2965 9.101403 88.00022 0
## 2 7.097866 2.528358 3.967870 155.9847 10.137367 87.98722 0
## 3 7.117176 2.607729 3.982230 203.8881 8.531144 87.93810 0
## 4 7.078885 2.602025 3.908889 268.2733 8.636144 87.81867 0
## 5 7.156829 2.593926 3.848988 245.4495 8.253107 89.60307 0
## 6 6.976956 2.659834 3.923082 187.4674 7.104908 87.85850 0
ggplot(data=elementos,aes(x=Enfermedad,y=Nitrogeno,color=Enfermedad)) +
geom_boxplot(outlier.shape=NA)+
geom_jitter(width=0.1)+
theme_bw()+
theme(legend.position = "null")
modelo<-glm(Enfermedad ~ Nitrogeno, data = elementos, family = "binomial")
summary(modelo)
##
## Call:
## glm(formula = Enfermedad ~ Nitrogeno, family = "binomial", data = elementos)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.81976 -1.10809 0.06811 1.07785 1.64567
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.220584 0.698887 -4.608 4.06e-06 ***
## Nitrogeno 0.013950 0.002975 4.688 2.75e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 415.89 on 299 degrees of freedom
## Residual deviance: 391.34 on 298 degrees of freedom
## AIC: 395.34
##
## Number of Fisher Scoring iterations: 4
confint(object = modelo, level = 0.95)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) -4.633869342 -1.88668995
## Nitrogeno 0.008267479 0.01996195
elementos$Enfermedad<-as.character(elementos$Enfermedad)
elementos$Enfermedad<- as.numeric(elementos$Enfermedad)
plot(Enfermedad ~ Nitrogeno , elementos, col = "darkblue",
main = "Modelo regresión logística",
ylab = "P(Enfermedad=1|Nitrogeno)",
xlab = "Nitrogeno", pch = "I")
curve(predict(modelo, data.frame(Nitrogeno= x), type = "response"),
col = "firebrick", lwd = 2.5, add = TRUE)
anova(modelo,test = "Chisq")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Enfermedad
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 299 415.89
## Nitrogeno 1 24.549 298 391.34 7.246e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
predicciones <- ifelse(test = modelo$fitted.values > 0.5, yes = 1, no = 0)
matriz_confusion <- table(modelo$model$Enfermedad, predicciones,
dnn = c("observaciones", "predicciones"))
matriz_confusion
## predicciones
## observaciones 0 1
## 0 85 65
## 1 59 91
mosaic(matriz_confusion, shade = T, colorize = T,
gp = gpar(fill = matrix(c("green3", "red2", "red2", "green3"), 2, 2)))
###Porcentaje de prediccion /frac{85+91}{85+65+59+91}=0.58(58%) ##Modelo
Logistico de Carbono
ggplot(data=elementos,aes(x=as.factor(Enfermedad),y=Carbono,color=as.factor(Enfermedad))) +
geom_boxplot(outlier.shape=NA)+
geom_jitter(width=0.1)+
theme_bw()+
theme(legend.position = "null")
modelo2<-glm(Enfermedad ~ Carbono, data=elementos, family="binomial")
summary(modelo2)
##
## Call:
## glm(formula = Enfermedad ~ Carbono, family = "binomial", data = elementos)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.63477 -1.09419 -0.01015 1.09476 1.91133
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.7312 0.9834 -4.811 1.50e-06 ***
## Carbono 0.5162 0.1066 4.844 1.27e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 415.89 on 299 degrees of freedom
## Residual deviance: 389.04 on 298 degrees of freedom
## AIC: 393.04
##
## Number of Fisher Scoring iterations: 4
confint(object = modelo2,level = 0.95)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) -6.7238643 -2.8593441
## Carbono 0.3134614 0.7322024
elementos$Enfermedad <- as.character(elementos$Enfermedad)
elementos$Enfermedad <- as.numeric(elementos$Enfermedad)
plot(Enfermedad ~ Carbono, elementos, col = "darkblue",
main = "Modelo regresión logística",
ylab = "P(enfermedad=1|Carbono)",
xlab = "Carbono", pch = "I")
curve(predict(modelo2, data.frame(Carbono = x), type = "response"),
col = "firebrick", lwd = 2.5, add = TRUE)
anova(modelo2, test = "Chisq")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Enfermedad
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 299 415.89
## Carbono 1 26.852 298 389.04 2.197e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
predicciones <- ifelse(test = modelo2$fitted.values > 0.5, yes = 1, no = 0)
matriz_confusion <- table(modelo2$model$Enfermedad, predicciones,
dnn = c("observaciones", "predicciones"))
matriz_confusion
## predicciones
## observaciones 0 1
## 0 97 53
## 1 55 95
mosaic(matriz_confusion, shade = T, colorize = T,
gp = gpar(fill = matrix(c("green3", "red2", "red2", "green3"), 2, 2)))
### Procentaje de prediccion /fac{97+95}{55+53+95+97}=0.64(64%) ##Modelo
logistico para la variable DHR
ggplot(data=elementos,aes(x=as.factor(Enfermedad),y=DHR,color=as.factor(Enfermedad))) +
geom_boxplot(outlier.shape=NA)+
geom_jitter(width=0.1)+
theme_bw()+
theme(legend.position = "null")
modelo3<-glm(Enfermedad ~ DHR, data=elementos, family="binomial")
summary(modelo3)
##
## Call:
## glm(formula = Enfermedad ~ DHR, family = "binomial", data = elementos)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.67639 -0.71663 0.07877 0.75924 1.77660
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 195.5844 23.1383 8.453 <2e-16 ***
## DHR -2.2369 0.2648 -8.448 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 415.89 on 299 degrees of freedom
## Residual deviance: 278.37 on 298 degrees of freedom
## AIC: 282.37
##
## Number of Fisher Scoring iterations: 5
confint(object = modelo3, level = 0.95 )
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 153.015271 243.987286
## DHR -2.790738 -1.749762
elementos$Enfermedad <- as.character(elementos$Enfermedad)
elementos$Enfermedad <- as.numeric(elementos$Enfermedad)
plot(Enfermedad ~ DHR, elementos, col = "darkblue",
main = "Modelo regresión logística",
ylab = "P(enfermedad=1|DHR)",
xlab = "DHR", pch = "I")
curve(predict(modelo3, data.frame(DHR = x), type = "response"),
col = "firebrick", lwd = 2.5, add = TRUE)
anova(modelo3, test = "Chisq")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: Enfermedad
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 299 415.89
## DHR 1 137.51 298 278.37 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
predicciones <- ifelse(test = modelo3$fitted.values > 0.5, yes = 1, no = 0)
matriz_confusion <- table(modelo3$model$Enfermedad, predicciones,
dnn = c("observaciones", "predicciones"))
matriz_confusion
## predicciones
## observaciones 0 1
## 0 116 34
## 1 30 120
mosaic(matriz_confusion, shade = T, colorize = T,
gp = gpar(fill = matrix(c("green3", "red2", "red2", "green3"), 2, 2)))
###Procentaje de prediccion /frac{116+120}{116+120+34+30}=0.79(79%)