title: “Suelos supresivos” author: “Juan Guillermo Narvaez Diaz” date: ‘2022-06-04’ output: html_document —

#Ejercicio Practico Suelos supresivos ##librerias Utilizadas

library(grid)
library(readr)
library(ggplot2)
library(vcd)
library(rpart)
library(rpart.plot)

Importacion de datos

 datos<- read.csv("D:\\Escritorio\\enfermedades R\\DatosSS.csv")
elementos<- data.frame(pH=datos$pH, Densidad=datos$densidad, Fosforo=datos$fosforo, Nitrogeno=datos$nitrogeno, Carbono=datos$carbono, DHR=datos$DHR,Enfermedad=as.factor(datos$enfermedad))
head(datos)
##         pH densidad  fosforo nitrogeno   carbono      DHR enfermedad
## 1 7.243010 2.509729 4.046360  295.2965  9.101403 88.00022          0
## 2 7.097866 2.528358 3.967870  155.9847 10.137367 87.98722          0
## 3 7.117176 2.607729 3.982230  203.8881  8.531144 87.93810          0
## 4 7.078885 2.602025 3.908889  268.2733  8.636144 87.81867          0
## 5 7.156829 2.593926 3.848988  245.4495  8.253107 89.60307          0
## 6 6.976956 2.659834 3.923082  187.4674  7.104908 87.85850          0

Modelo Logistico para variable Nitrogeno

ggplot(data=elementos,aes(x=Enfermedad,y=Nitrogeno,color=Enfermedad)) +
  geom_boxplot(outlier.shape=NA)+
  geom_jitter(width=0.1)+
  theme_bw()+
  theme(legend.position = "null")

modelo<-glm(Enfermedad ~ Nitrogeno, data = elementos, family = "binomial")
summary(modelo)
## 
## Call:
## glm(formula = Enfermedad ~ Nitrogeno, family = "binomial", data = elementos)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.81976  -1.10809   0.06811   1.07785   1.64567  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.220584   0.698887  -4.608 4.06e-06 ***
## Nitrogeno    0.013950   0.002975   4.688 2.75e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 415.89  on 299  degrees of freedom
## Residual deviance: 391.34  on 298  degrees of freedom
## AIC: 395.34
## 
## Number of Fisher Scoring iterations: 4
confint(object = modelo, level = 0.95)
## Waiting for profiling to be done...
##                    2.5 %      97.5 %
## (Intercept) -4.633869342 -1.88668995
## Nitrogeno    0.008267479  0.01996195
elementos$Enfermedad<-as.character(elementos$Enfermedad)
elementos$Enfermedad<- as.numeric(elementos$Enfermedad)
plot(Enfermedad ~ Nitrogeno , elementos, col = "darkblue",
     main = "Modelo regresión logística",
     ylab = "P(Enfermedad=1|Nitrogeno)",
     xlab = "Nitrogeno", pch = "I")
curve(predict(modelo, data.frame(Nitrogeno= x), type = "response"),
      col = "firebrick", lwd = 2.5, add = TRUE)

anova(modelo,test = "Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Enfermedad
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                        299     415.89              
## Nitrogeno  1   24.549       298     391.34 7.246e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
predicciones <- ifelse(test = modelo$fitted.values > 0.5, yes = 1, no = 0)
matriz_confusion <- table(modelo$model$Enfermedad, predicciones,
                          dnn = c("observaciones", "predicciones"))
matriz_confusion
##              predicciones
## observaciones  0  1
##             0 85 65
##             1 59 91
mosaic(matriz_confusion, shade = T, colorize = T,
       gp = gpar(fill = matrix(c("green3", "red2", "red2", "green3"), 2, 2)))

###Porcentaje de prediccion /frac{85+91}{85+65+59+91}=0.58(58%) ##Modelo Logistico de Carbono

ggplot(data=elementos,aes(x=as.factor(Enfermedad),y=Carbono,color=as.factor(Enfermedad))) +
  geom_boxplot(outlier.shape=NA)+
  geom_jitter(width=0.1)+
  theme_bw()+
  theme(legend.position = "null")

modelo2<-glm(Enfermedad ~ Carbono, data=elementos, family="binomial")
summary(modelo2)
## 
## Call:
## glm(formula = Enfermedad ~ Carbono, family = "binomial", data = elementos)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.63477  -1.09419  -0.01015   1.09476   1.91133  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -4.7312     0.9834  -4.811 1.50e-06 ***
## Carbono       0.5162     0.1066   4.844 1.27e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 415.89  on 299  degrees of freedom
## Residual deviance: 389.04  on 298  degrees of freedom
## AIC: 393.04
## 
## Number of Fisher Scoring iterations: 4
confint(object = modelo2,level = 0.95)
## Waiting for profiling to be done...
##                  2.5 %     97.5 %
## (Intercept) -6.7238643 -2.8593441
## Carbono      0.3134614  0.7322024
elementos$Enfermedad <- as.character(elementos$Enfermedad)
elementos$Enfermedad <- as.numeric(elementos$Enfermedad)

plot(Enfermedad ~ Carbono, elementos, col = "darkblue",
     main = "Modelo regresión logística",
     ylab = "P(enfermedad=1|Carbono)",
     xlab = "Carbono", pch = "I")

curve(predict(modelo2, data.frame(Carbono = x), type = "response"),
      col = "firebrick", lwd = 2.5, add = TRUE)

anova(modelo2, test = "Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Enfermedad
## 
## Terms added sequentially (first to last)
## 
## 
##         Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                      299     415.89              
## Carbono  1   26.852       298     389.04 2.197e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
predicciones <- ifelse(test = modelo2$fitted.values > 0.5, yes = 1, no = 0)
matriz_confusion <- table(modelo2$model$Enfermedad, predicciones,
                          dnn = c("observaciones", "predicciones"))
matriz_confusion
##              predicciones
## observaciones  0  1
##             0 97 53
##             1 55 95
mosaic(matriz_confusion, shade = T, colorize = T,
       gp = gpar(fill = matrix(c("green3", "red2", "red2", "green3"), 2, 2)))

### Procentaje de prediccion /fac{97+95}{55+53+95+97}=0.64(64%) ##Modelo logistico para la variable DHR

ggplot(data=elementos,aes(x=as.factor(Enfermedad),y=DHR,color=as.factor(Enfermedad))) +
  geom_boxplot(outlier.shape=NA)+
  geom_jitter(width=0.1)+
  theme_bw()+
  theme(legend.position = "null")

modelo3<-glm(Enfermedad ~ DHR, data=elementos, family="binomial")
summary(modelo3)
## 
## Call:
## glm(formula = Enfermedad ~ DHR, family = "binomial", data = elementos)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.67639  -0.71663   0.07877   0.75924   1.77660  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) 195.5844    23.1383   8.453   <2e-16 ***
## DHR          -2.2369     0.2648  -8.448   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 415.89  on 299  degrees of freedom
## Residual deviance: 278.37  on 298  degrees of freedom
## AIC: 282.37
## 
## Number of Fisher Scoring iterations: 5
confint(object = modelo3, level = 0.95 )
## Waiting for profiling to be done...
##                  2.5 %     97.5 %
## (Intercept) 153.015271 243.987286
## DHR          -2.790738  -1.749762
elementos$Enfermedad <- as.character(elementos$Enfermedad)
elementos$Enfermedad <- as.numeric(elementos$Enfermedad)

plot(Enfermedad ~ DHR, elementos, col = "darkblue",
     main = "Modelo regresión logística",
     ylab = "P(enfermedad=1|DHR)",
     xlab = "DHR", pch = "I")

curve(predict(modelo3, data.frame(DHR = x), type = "response"),
      col = "firebrick", lwd = 2.5, add = TRUE)

anova(modelo3, test = "Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Enfermedad
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                   299     415.89              
## DHR   1   137.51       298     278.37 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
predicciones <- ifelse(test = modelo3$fitted.values > 0.5, yes = 1, no = 0)
matriz_confusion <- table(modelo3$model$Enfermedad, predicciones,
                          dnn = c("observaciones", "predicciones"))
matriz_confusion
##              predicciones
## observaciones   0   1
##             0 116  34
##             1  30 120
mosaic(matriz_confusion, shade = T, colorize = T,
       gp = gpar(fill = matrix(c("green3", "red2", "red2", "green3"), 2, 2)))

###Procentaje de prediccion /frac{116+120}{116+120+34+30}=0.79(79%)