Maximizar el rendimiento agrícola recomendando cultivos apropiados

La agricultura de precisión está de moda hoy en día. Ayuda a los agricultores a tomar decisiones informadas sobre su estrategia agrícola. Aquí les presento un conjunto de datos que permitirá a los usuarios crear un modelo predictivo para recomendar los cultivos más adecuados para una explotación agrícola específica, basándose en diversos parámetros.

Estructura de la informacion

Campos de datos:

N- Relación del contenido de nitrógeno en el suelo P- Relación del contenido de fósforo en el suelo K- Relación del contenido de potasio en el suelo Temperature- temperatura en grados Celsius Humidity- humedad relativa en % pH- valor de ph del suelo Rainfall- precipitación en mm

# Cargar librerías necesarias
library(knitr)
library(dplyr)
library(readr)

# Leer datos
datac <- read_csv("Crop_recommendation.csv")

# Ver estructura (opcional)
str(datac)
## spc_tbl_ [2,200 × 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ N          : num [1:2200] 90 85 60 74 78 69 69 94 89 68 ...
##  $ P          : num [1:2200] 42 58 55 35 42 37 55 53 54 58 ...
##  $ K          : num [1:2200] 43 41 44 40 42 42 38 40 38 38 ...
##  $ temperature: num [1:2200] 20.9 21.8 23 26.5 20.1 ...
##  $ humidity   : num [1:2200] 82 80.3 82.3 80.2 81.6 ...
##  $ ph         : num [1:2200] 6.5 7.04 7.84 6.98 7.63 ...
##  $ rainfall   : num [1:2200] 203 227 264 243 263 ...
##  $ label      : chr [1:2200] "rice" "rice" "rice" "rice" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   N = col_double(),
##   ..   P = col_double(),
##   ..   K = col_double(),
##   ..   temperature = col_double(),
##   ..   humidity = col_double(),
##   ..   ph = col_double(),
##   ..   rainfall = col_double(),
##   ..   label = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
# Calcular resumen estadístico solo para variables numéricas
summary_df <- datac %>%
  select(where(is.numeric)) %>%
  summary() %>%
  as.data.frame.matrix() %>%
  tibble::rownames_to_column("Estadística")

# Mostrar la tabla
kable(summary_df, caption = "Resumen estadístico de las variables numéricas")
Resumen estadístico de las variables numéricas
Estadística N P K temperature humidity ph rainfall
X Min. : 0.00 Min. : 5.00 Min. : 5.00 Min. : 8.826 Min. :14.26 Min. :3.505 Min. : 20.21
X.1 1st Qu.: 21.00 1st Qu.: 28.00 1st Qu.: 20.00 1st Qu.:22.769 1st Qu.:60.26 1st Qu.:5.972 1st Qu.: 64.55
X.2 Median : 37.00 Median : 51.00 Median : 32.00 Median :25.599 Median :80.47 Median :6.425 Median : 94.87
X.3 Mean : 50.55 Mean : 53.36 Mean : 48.15 Mean :25.616 Mean :71.48 Mean :6.469 Mean :103.46
X.4 3rd Qu.: 84.25 3rd Qu.: 68.00 3rd Qu.: 49.00 3rd Qu.:28.562 3rd Qu.:89.95 3rd Qu.:6.924 3rd Qu.:124.27
X.5 Max. :140.00 Max. :145.00 Max. :205.00 Max. :43.675 Max. :99.98 Max. :9.935 Max. :298.56
freq_label <- datac %>%
  count(label) %>%
  arrange(desc(n))

kable(freq_label, col.names = c("Cultivo", "Frecuencia"), caption = "Frecuencia de cultivos en el conjunto de datos")
Frecuencia de cultivos en el conjunto de datos
Cultivo Frecuencia
apple 100
banana 100
blackgram 100
chickpea 100
coconut 100
coffee 100
cotton 100
grapes 100
jute 100
kidneybeans 100
lentil 100
maize 100
mango 100
mothbeans 100
mungbean 100
muskmelon 100
orange 100
papaya 100
pigeonpeas 100
pomegranate 100
rice 100
watermelon 100
ggplot(freq_label, aes(x = reorder(label, -n), y = n)) +
  geom_bar(stat = "identity", fill = "#2E86C1") +
  labs(title = "Frecuencia de cultivos", x = "Cultivo", y = "Frecuencia") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

library(ggplot2)
library(gridExtra)

library(dplyr)

# Obtener nombres de variables numéricas
num_vars <- datac %>% select(where(is.numeric)) %>% names()

# Crear lista de boxplots
boxplots <- lapply(num_vars, function(var) {
  ggplot(datac, aes_string(x = "label", y = var, fill = "label")) +
    geom_boxplot(show.legend = FALSE, alpha = 0.7) +
    labs(title = paste("Boxplot de", var, "por cultivo"), x = "Cultivo", y = var) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
})

# Mostrar los boxplots en una cuadrícula de 2 columnas
do.call(grid.arrange, c(boxplots, ncol = 4))

##PCA (Análisis de Componentes Principales)

En la matriz de correlaciones la componente principal que explica mejor las varaibles de NPK es la CP1,las variables de temperatura, precipitacion y humedad no estan representadas.

library(stats)

data_scaled <- scale(datac[, 1:7])

#PCA

pca <- prcomp(data_scaled, center = TRUE, scale. = TRUE)
names(pca)
## [1] "sdev"     "rotation" "center"   "scale"    "x"
head(pca$rotation)
##                     PC1         PC2       PC3         PC4         PC5
## N           -0.30219096  0.33410693 0.1120450 -0.54165059  0.50778466
## P            0.64378667  0.03435809 0.1099391 -0.04629318 -0.08233115
## K            0.62260719  0.28382920 0.1631733 -0.15486709 -0.03342452
## temperature -0.21242839  0.35948683 0.2482280  0.69082649 -0.15486542
## humidity    -0.06848339  0.73791663 0.2135991 -0.06717140 -0.12887133
## ph          -0.22694272 -0.22065738 0.5485203 -0.39570047 -0.65188053
##                     PC6          PC7
## N            0.48290443  0.008472888
## P            0.37684700  0.649104376
## K            0.02896707 -0.692268474
## temperature  0.50041798 -0.111281619
## humidity    -0.54787098  0.289624027
## ph           0.12571195 -0.040027859
dim(pca$rotation)
## [1] 7 7
pca$sdev^2
## [1] 1.9312182 1.2939102 1.0765093 1.0228912 0.8059284 0.6765616 0.1929812
summary(pca)
## Importance of components:
##                           PC1    PC2    PC3    PC4    PC5     PC6     PC7
## Standard deviation     1.3897 1.1375 1.0375 1.0114 0.8977 0.82253 0.43930
## Proportion of Variance 0.2759 0.1848 0.1538 0.1461 0.1151 0.09665 0.02757
## Cumulative Proportion  0.2759 0.4607 0.6145 0.7607 0.8758 0.97243 1.00000
#comprobar la importancia del componente 1


datac1 <- datac


compx1 <- pca$x
compx1 <- as.data.frame(compx1)
datac1$PC1 <- compx1$PC1
datac1$PC2 <- compx1$PC2
datac_cor <- subset(datac1, select = -c(label))
head(datac_cor)
## # A tibble: 6 × 9
##       N     P     K temperature humidity    ph rainfall    PC1   PC2
##   <dbl> <dbl> <dbl>       <dbl>    <dbl> <dbl>    <dbl>  <dbl> <dbl>
## 1    90    42    43        20.9     82.0  6.50     203. -0.583 0.844
## 2    85    58    41        21.8     80.3  7.04     227. -0.475 0.785
## 3    60    55    44        23.0     82.3  7.84     264. -0.634 0.694
## 4    74    35    40        26.5     80.2  6.98     243. -1.05  1.09 
## 5    78    42    42        20.1     81.6  7.63     263. -0.873 0.659
## 6    69    37    42        23.1     83.4  7.07     251. -0.847 0.935
cor(datac_cor)
##                       N           P           K temperature     humidity
## N            1.00000000 -0.23145958 -0.14051184  0.02650380  0.190688379
## P           -0.23145958  1.00000000  0.73623222 -0.12754113 -0.118734116
## K           -0.14051184  0.73623222  1.00000000 -0.16038713  0.190858861
## temperature  0.02650380 -0.12754113 -0.16038713  1.00000000  0.205319677
## humidity     0.19068838 -0.11873412  0.19085886  0.20531968  1.000000000
## ph           0.09668285 -0.13801889 -0.16950310 -0.01779502 -0.008482539
## rainfall     0.05902022 -0.06383905 -0.05346135 -0.03008378  0.094423053
## PC1         -0.41994957  0.89465925  0.86522649 -0.29520807 -0.095170191
## PC2          0.38004721  0.03908239  0.32285620  0.40891688  0.839381437
##                       ph    rainfall           PC1           PC2
## N            0.096682846  0.05902022 -4.199496e-01  3.800472e-01
## P           -0.138018893 -0.06383905  8.946592e-01  3.908239e-02
## K           -0.169503098 -0.05346135  8.652265e-01  3.228562e-01
## temperature -0.017795017 -0.03008378 -2.952081e-01  4.089169e-01
## humidity    -0.008482539  0.09442305 -9.517019e-02  8.393814e-01
## ph           1.000000000 -0.10906948 -3.153784e-01 -2.509981e-01
## rainfall    -0.109069484  1.00000000 -1.007959e-01  3.300552e-01
## PC1         -0.315378382 -0.10079595  1.000000e+00  5.627413e-16
## PC2         -0.250998148  0.33005522  5.627413e-16  1.000000e+00
str(datac1)
## spc_tbl_ [2,200 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ N          : num [1:2200] 90 85 60 74 78 69 69 94 89 68 ...
##  $ P          : num [1:2200] 42 58 55 35 42 37 55 53 54 58 ...
##  $ K          : num [1:2200] 43 41 44 40 42 42 38 40 38 38 ...
##  $ temperature: num [1:2200] 20.9 21.8 23 26.5 20.1 ...
##  $ humidity   : num [1:2200] 82 80.3 82.3 80.2 81.6 ...
##  $ ph         : num [1:2200] 6.5 7.04 7.84 6.98 7.63 ...
##  $ rainfall   : num [1:2200] 203 227 264 243 263 ...
##  $ label      : chr [1:2200] "rice" "rice" "rice" "rice" ...
##  $ PC1        : num [1:2200] -0.583 -0.475 -0.634 -1.048 -0.873 ...
##  $ PC2        : num [1:2200] 0.844 0.785 0.694 1.087 0.659 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   N = col_double(),
##   ..   P = col_double(),
##   ..   K = col_double(),
##   ..   temperature = col_double(),
##   ..   humidity = col_double(),
##   ..   ph = col_double(),
##   ..   rainfall = col_double(),
##   ..   label = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>

#2 PCA en grafica y extraccion de la informacion

library(FactoMineR)
library(factoextra)

pcagr <- PCA(X = data_scaled, scale.unit = FALSE, ncp = 7, graph = FALSE)

##Distribución de individuos por coseno cuadrado y de las variables

fviz_pca_ind(pcagr, col.ind = "cos2", gradient.cols= c("#00AFBB", "#E7B800" , "#FC4E07"  ), repel = FALSE, graph = FALSE)

fviz_pca_var(pcagr, col.var = "cos2", geom.var = "arrow" , labelsize = 2 , repel = TRUE, graph = FALSE)

##Biplot

biplot(x = pca, scale = 0, cex = 0.7, col = c ("blue4", "brown3"))

##Modelo de entrenamiento y de test.

##Grafico de correlacion

index <- sample(1:nrow(datac), size = 0.7 * nrow(datac))
training_set <- datac[index, ]
testing_set <- datac[-index, ]

library(psych)
pairs.panels(training_set[ , -which(names(training_set) == "label")],
             gap = 0,
             bg = c("red", "yellow", "blue", "green", "orange")[as.factor(training_set$label)],
             pch=21)

##PCA aplicado a las variables

pc <- prcomp(training_set[ , -which(names(training_set) == "label")],
             center = TRUE,
             scale. = TRUE)
summary(pc)
## Importance of components:
##                           PC1    PC2    PC3    PC4    PC5     PC6    PC7
## Standard deviation     1.3996 1.1418 1.0369 1.0006 0.9003 0.81627 0.4291
## Proportion of Variance 0.2798 0.1862 0.1536 0.1430 0.1158 0.09518 0.0263
## Cumulative Proportion  0.2798 0.4661 0.6197 0.7627 0.8785 0.97370 1.0000

##Biplot del PCA (componente 1 y 2)

library(ggbiplot)
bipdata <- ggbiplot(pc,
              obs.scale = 1,
              var.scale = 1,
              groups = training_set$cultivo,
              ellipse = TRUE,
              circle = TRUE,
              ellipse.prob = 0.68) +
     scale_color_discrete(name = '') +
     theme(legend.direction = 'horizontal',
           legend.position = 'top')
print(bipdata)

###Anova para cada variable numerica

# Realizar ANOVA para cada variable numérica
variables_numericas <- c("N", "P", "K", "temperature", "humidity", "ph", "rainfall")

# Aplicar ANOVA y mostrar resultados
for (var in variables_numericas) {
  formula <- as.formula(paste(var, "~ label"))
  modelo <- aov(formula, data = datac)
  cat("\n\n--- ANOVA para", var, "---\n")
  print(summary(modelo))
}
## 
## 
## --- ANOVA para N ---
##               Df  Sum Sq Mean Sq F value Pr(>F)    
## label         21 2686561  127931   897.6 <2e-16 ***
## Residuals   2178  310433     143                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## --- ANOVA para P ---
##               Df  Sum Sq Mean Sq F value Pr(>F)    
## label         21 2267923  107996    1886 <2e-16 ***
## Residuals   2178  124740      57                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## --- ANOVA para K ---
##               Df  Sum Sq Mean Sq F value Pr(>F)    
## label         21 5619506  267596   27238 <2e-16 ***
## Residuals   2178   21397      10                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## --- ANOVA para temperature ---
##               Df Sum Sq Mean Sq F value Pr(>F)    
## label         21  27984    1333   102.2 <2e-16 ***
## Residuals   2178  28402      13                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## --- ANOVA para humidity ---
##               Df  Sum Sq Mean Sq F value Pr(>F)    
## label         21 1054749   50226    3104 <2e-16 ***
## Residuals   2178   35246      16                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## --- ANOVA para ph ---
##               Df Sum Sq Mean Sq F value Pr(>F)    
## label         21  484.5  23.070   60.34 <2e-16 ***
## Residuals   2178  832.7   0.382                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## --- ANOVA para rainfall ---
##               Df  Sum Sq Mean Sq F value Pr(>F)    
## label         21 5670650  270031   605.5 <2e-16 ***
## Residuals   2178  971264     446                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modeloanova <- aov(N ~ label, data = datac)
tukey <- TukeyHSD(modeloanova)
plot(tukey, las = 1)

Modelo predictivo

library(devtools)
library(magrittr)
trg <- predict(pc, training_set) %>% as.data.frame()
trg$label <- training_set$label

tst <- predict(pc, testing_set) %>% as.data.frame()
tst$label <- testing_set$label

library(nnet)
trg$label <- as.factor(trg$label)
trg$label <- relevel(trg$label, ref = levels(trg$label)[1])


# Modelo multinomial con PC1 y PC2
modelo <- multinom(datac1$label ~ datac1$PC1 + datac1$PC2, datac1 = trg)
## # weights:  88 (63 variable)
## initial  value 6800.293397 
## iter  10 value 4135.434734
## iter  20 value 3890.702543
## iter  30 value 3660.993176
## iter  40 value 3338.047015
## iter  50 value 2991.402434
## iter  60 value 2941.785238
## iter  70 value 2912.490009
## iter  80 value 2878.914009
## iter  90 value 2870.823220
## iter 100 value 2859.041959
## final  value 2859.041959 
## stopped after 100 iterations
summary(modelo)
## Call:
## multinom(formula = datac1$label ~ datac1$PC1 + datac1$PC2, datac1 = trg)
## 
## Coefficients:
##             (Intercept) datac1$PC1 datac1$PC2
## banana        40.727238 -12.058426  -2.815998
## blackgram     49.713056 -25.190985 -25.523564
## chickpea       7.201289 -13.209071 -46.452630
## coconut       42.160176 -27.104980  -7.418631
## coffee        41.854712 -36.512966 -22.273522
## cotton        44.065073 -34.086528 -19.595812
## grapes        35.154403  -7.359129  -5.925505
## jute          47.004752 -25.678755 -11.833694
## kidneybeans   23.109845 -16.402953 -41.029263
## lentil        46.673791 -21.716308 -29.407807
## maize         49.601029 -26.613083 -24.621287
## mango         48.979361 -26.766745 -25.803705
## mothbeans     46.447966 -24.626284 -29.519902
## mungbean      50.545643 -25.733333 -22.322170
## muskmelon     37.526910 -33.388167  -8.938524
## orange        44.235649 -34.407060 -23.586795
## papaya        41.031339 -20.698092  -3.440360
## pigeonpeas    50.116822 -20.179157 -25.106984
## pomegranate   50.689055 -25.592215 -20.866910
## rice          42.659030 -24.018044  -5.937387
## watermelon    43.640741 -33.032157 -16.320874
## 
## Std. Errors:
##             (Intercept) datac1$PC1 datac1$PC2
## banana         6.291892   2.383462  3.4438080
## blackgram      6.344314   2.750023  3.6615933
## chickpea       8.599361   3.311182  4.5338686
## coconut        6.356990   2.730750  3.5551995
## coffee         6.377912   2.783081  3.6561107
## cotton         6.367177   2.769606  3.6462471
## grapes         5.190923   1.129208  0.8104685
## jute           6.347830   2.725398  3.5773158
## kidneybeans    8.256244   3.236289  4.4449524
## lentil         6.354444   2.760485  3.6782532
## maize          6.345069   2.750456  3.6594806
## mango          6.345681   2.751834  3.6601627
## mothbeans      6.352529   2.757397  3.6713668
## mungbean       6.343255   2.744109  3.6559485
## muskmelon      6.400201   2.787345  3.5994781
## orange         6.364231   2.766736  3.6557637
## papaya         6.348270   2.657828  3.5253223
## pigeonpeas     6.341493   2.747993  3.6763123
## pomegranate    6.342893   2.739012  3.6497814
## rice           6.348779   2.703728  3.5393187
## watermelon     6.368762   2.769795  3.6301405
## 
## Residual Deviance: 5718.084 
## AIC: 5844.084

Matriz de confusion.

library(nnet)

pred_train <- predict(modelo, newdata = datac1)

# Matriz de confusión
tab_train <- table(pred_train, datac1$label)
print(tab_train)
##              
## pred_train    apple banana blackgram chickpea coconut coffee cotton grapes jute
##   apple          86      0         0        0       0      0      0     19    0
##   banana          0     92         0        0       0      0      0      0    0
##   blackgram       0      0        29        0       0      0      0      0    0
##   chickpea        0      0         0       80       0      0      0      0    0
##   coconut         0      0         0        0      30      0      1      0   14
##   coffee          0      0         0        0       0     46     30      0    0
##   cotton          0      0         0        0       0     17     27      0    1
##   grapes         14      0         0        0       0      0      0     81    0
##   jute            0      0         0        0      20      0      7      0   43
##   kidneybeans     0      0         0       20       0      0      0      0    0
##   lentil          0      0         3        0       0      0      0      0    0
##   maize           0      0        14        0       0      0      0      0    0
##   mango           0      0        21        0       0      1      0      0    0
##   mothbeans       0      0         5        0       0      0      0      0    0
##   mungbean        0      0         6        0       0      0      0      0    0
##   muskmelon       0      0         0        0      20      2      5      0    3
##   orange          0      0         3        0       0     25      4      0    0
##   papaya          0      7         0        0       0      0      0      0    1
##   pigeonpeas      0      0        15        0       0      0      0      0    0
##   pomegranate     0      0         4        0       0      1      4      0   13
##   rice            0      1         0        0      25      0      0      0   17
##   watermelon      0      0         0        0       5      8     22      0    8
##              
## pred_train    kidneybeans lentil maize mango mothbeans mungbean muskmelon
##   apple                 0      0     0     0         0        0         0
##   banana                0      0     0     0         0        0         0
##   blackgram             0      2    21    25         5        8         0
##   chickpea             25      0     0     0         0        0         0
##   coconut               0      0     0     0         0        0        17
##   coffee                0      0     0     0         0        0         0
##   cotton                0      0     3     1         0        0         0
##   grapes                0      0     0     0         0        0         0
##   jute                  0      0     0     0         0        0         2
##   kidneybeans          73      0     0     0         1        0         0
##   lentil                2     65     2     0        10        0         0
##   maize                 0      0    11     9         0       13         0
##   mango                 0      1    13    19         3        2         0
##   mothbeans             0     18     8    15        50        0         0
##   mungbean              0      0    17     7         1       30         0
##   muskmelon             0      0     0     0         0        0        65
##   orange                0      0    10    13         7        6         0
##   papaya                0      0     0     0         0        0         0
##   pigeonpeas            0     14     3     6        23        2         0
##   pomegranate           0      0    12     5         0       39         0
##   rice                  0      0     0     0         0        0         0
##   watermelon            0      0     0     0         0        0        16
##              
## pred_train    orange papaya pigeonpeas pomegranate rice watermelon
##   apple            0      0          0           0    0          0
##   banana           0     12          3           0    5          0
##   blackgram        1      0          7           4    0          0
##   chickpea         0      0          0           0    0          0
##   coconut          1      1          0           0   14          0
##   coffee          24      0          0           0    0          4
##   cotton           7      0          0           3    0         21
##   grapes           0      0          0           0    0          0
##   jute             0     11          2           7   15         14
##   kidneybeans      0      0          3           0    0          0
##   lentil           0      0         21           0    0          0
##   maize            4      0          2           8    0          0
##   mango           12      0          1           1    0          0
##   mothbeans        7      0          4           0    0          0
##   mungbean         2      0          5          23    0          0
##   muskmelon        4      0          0           0    8         14
##   orange          30      0          0           7    0          0
##   papaya           0     61          0           0   31          0
##   pigeonpeas       0      0         46           2    0          0
##   pomegranate      1      0          5          45    1          1
##   rice             0     15          1           0   22          0
##   watermelon       7      0          0           0    4         46
# Error en entrenamiento
cat("Error entrenamiento: ", round((1 - sum(diag(tab_train)) / sum(tab_train)) * 100, 2), "%\n")
## Error entrenamiento:  51.05 %
pred_test <- predict(modelo, newdata = datac1)
tab_test <- table(Predicho = pred_test, Real = datac1$label)
print(tab_test)
##              Real
## Predicho      apple banana blackgram chickpea coconut coffee cotton grapes jute
##   apple          86      0         0        0       0      0      0     19    0
##   banana          0     92         0        0       0      0      0      0    0
##   blackgram       0      0        29        0       0      0      0      0    0
##   chickpea        0      0         0       80       0      0      0      0    0
##   coconut         0      0         0        0      30      0      1      0   14
##   coffee          0      0         0        0       0     46     30      0    0
##   cotton          0      0         0        0       0     17     27      0    1
##   grapes         14      0         0        0       0      0      0     81    0
##   jute            0      0         0        0      20      0      7      0   43
##   kidneybeans     0      0         0       20       0      0      0      0    0
##   lentil          0      0         3        0       0      0      0      0    0
##   maize           0      0        14        0       0      0      0      0    0
##   mango           0      0        21        0       0      1      0      0    0
##   mothbeans       0      0         5        0       0      0      0      0    0
##   mungbean        0      0         6        0       0      0      0      0    0
##   muskmelon       0      0         0        0      20      2      5      0    3
##   orange          0      0         3        0       0     25      4      0    0
##   papaya          0      7         0        0       0      0      0      0    1
##   pigeonpeas      0      0        15        0       0      0      0      0    0
##   pomegranate     0      0         4        0       0      1      4      0   13
##   rice            0      1         0        0      25      0      0      0   17
##   watermelon      0      0         0        0       5      8     22      0    8
##              Real
## Predicho      kidneybeans lentil maize mango mothbeans mungbean muskmelon
##   apple                 0      0     0     0         0        0         0
##   banana                0      0     0     0         0        0         0
##   blackgram             0      2    21    25         5        8         0
##   chickpea             25      0     0     0         0        0         0
##   coconut               0      0     0     0         0        0        17
##   coffee                0      0     0     0         0        0         0
##   cotton                0      0     3     1         0        0         0
##   grapes                0      0     0     0         0        0         0
##   jute                  0      0     0     0         0        0         2
##   kidneybeans          73      0     0     0         1        0         0
##   lentil                2     65     2     0        10        0         0
##   maize                 0      0    11     9         0       13         0
##   mango                 0      1    13    19         3        2         0
##   mothbeans             0     18     8    15        50        0         0
##   mungbean              0      0    17     7         1       30         0
##   muskmelon             0      0     0     0         0        0        65
##   orange                0      0    10    13         7        6         0
##   papaya                0      0     0     0         0        0         0
##   pigeonpeas            0     14     3     6        23        2         0
##   pomegranate           0      0    12     5         0       39         0
##   rice                  0      0     0     0         0        0         0
##   watermelon            0      0     0     0         0        0        16
##              Real
## Predicho      orange papaya pigeonpeas pomegranate rice watermelon
##   apple            0      0          0           0    0          0
##   banana           0     12          3           0    5          0
##   blackgram        1      0          7           4    0          0
##   chickpea         0      0          0           0    0          0
##   coconut          1      1          0           0   14          0
##   coffee          24      0          0           0    0          4
##   cotton           7      0          0           3    0         21
##   grapes           0      0          0           0    0          0
##   jute             0     11          2           7   15         14
##   kidneybeans      0      0          3           0    0          0
##   lentil           0      0         21           0    0          0
##   maize            4      0          2           8    0          0
##   mango           12      0          1           1    0          0
##   mothbeans        7      0          4           0    0          0
##   mungbean         2      0          5          23    0          0
##   muskmelon        4      0          0           0    8         14
##   orange          30      0          0           7    0          0
##   papaya           0     61          0           0   31          0
##   pigeonpeas       0      0         46           2    0          0
##   pomegranate      1      0          5          45    1          1
##   rice             0     15          1           0   22          0
##   watermelon       7      0          0           0    4         46
cat("Error test: ", round((1 - sum(diag(tab_test)) / sum(tab_test)) * 100, 2), "%\n")
## Error test:  51.05 %

Randon Forest

library(readr)
library(dplyr)
library(caret)
library(randomForest)
library(nnet)



# Cargar y dividir los datos
set.seed(123)
datac <- read_csv("Crop_recommendation.csv")

# Separar en training (70%) y test (30%)
index <- createDataPartition(datac$label, p = 0.7, list = FALSE)
train1 <- datac[index, ]
test1 <- datac[-index, ]

train1$label <- as.factor(training_set$label)
test1$label <- as.factor(testing_set$label)
# Entrenar
modelo_rf <- randomForest(label ~ ., data = train1, ntree = 300)

# Predecir
pred_rf <- predict(modelo_rf, newdata = test1)

# Matriz de confusión
conf_rf <- confusionMatrix(pred_rf, test1$label)
print(conf_rf)
## Confusion Matrix and Statistics
## 
##              Reference
## Prediction    apple banana blackgram chickpea coconut coffee cotton grapes jute
##   apple           0      0         6        2       0      1      0      1    0
##   banana          0      2         1        1       6      1      4      1    0
##   blackgram       1      1         1        0       1      0      6      1    8
##   chickpea        0      2         1        0       1      3      0      3    2
##   coconut         4      1         2        1       1      0      3      2    5
##   coffee          3      0         2        5       1      0      0      0    1
##   cotton          1      2         2        2       0      1      0      0    0
##   grapes          2      0         2        1       0      3      0      6    5
##   jute            2      1         1        5       0      2      1      0    0
##   kidneybeans     1      5         2        0       0      1      4      0    1
##   lentil          0      4         1        1       0      0      0      1    1
##   maize           4      2         0        1       0      0      0      1    0
##   mango           1      0         3        1       2      0      1      1    0
##   mothbeans       0      0         5        0       2      0      1      2    0
##   mungbean        1      3         0        1       3      0      0      1    0
##   muskmelon       0      0         0        0       0      1      0      2    0
##   orange          1      2         0        0       1      1      4      0    0
##   papaya          1      0         0        2       0      5      1      3    0
##   pigeonpeas      2      3         1        6       4      0      0      0    0
##   pomegranate     0      0         1        4       0      8      0      0    0
##   rice            0      0         3        0       1      0      1      1    1
##   watermelon      1      0         5        0       0      2      1      0    2
##              Reference
## Prediction    kidneybeans lentil maize mango mothbeans mungbean muskmelon
##   apple                 2      1     1     3         1        5         2
##   banana                1      0     0     2         0        1         0
##   blackgram             0      0     1     0         0        2         0
##   chickpea              0      2     1     1         1        1         0
##   coconut               3      0     2     0         0        3         4
##   coffee                2      3     0     0         2        0         7
##   cotton                1      2     0     2         2        0         1
##   grapes                2      0     3     0         0        1         1
##   jute                  4      1     2     3         0        3         1
##   kidneybeans           0      1     3     2         1        0         1
##   lentil                1      0     2     3         1        2         0
##   maize                 1      1     4     1         3        1         8
##   mango                 1      0     3     0         0        6         2
##   mothbeans             3      9     2     5         5        3         1
##   mungbean              4      4     0     6         0        2         0
##   muskmelon             0      1     0     0         0        1         0
##   orange                0      2     0     2         0        0         2
##   papaya                1      3     1     0         6        1         3
##   pigeonpeas            4      0     0     0         0        0         0
##   pomegranate           0      0     3     1         1        0         2
##   rice                  0      4     1     1         5        3         2
##   watermelon            2      1     1     0         1        1         2
##              Reference
## Prediction    orange papaya pigeonpeas pomegranate rice watermelon
##   apple            1      0          2           1    0          0
##   banana           2      3          0           0    0          1
##   blackgram        1      0          0           1    5          0
##   chickpea         1      0          0           1    0          4
##   coconut          2      3          0           0    5          0
##   coffee           2      0          0           1    2          6
##   cotton           1      5          1           1    0          1
##   grapes           0      0          2           0    0          2
##   jute             6      2          0           0    0          0
##   kidneybeans      1      0          2           1    2          0
##   lentil           1      1          4           1    3          1
##   maize            0      1          2           8    0          2
##   mango            0      1          1           2    3          2
##   mothbeans        0      0          0           5    3          1
##   mungbean         1      3          3           1    0          0
##   muskmelon        0      4          2           0    1          0
##   orange           1      0          1           3    2          1
##   papaya           1      0          1           1    2          1
##   pigeonpeas       3      1          3           0    0          1
##   pomegranate      3      0          2           1    0          0
##   rice             4      1          2           3    1          0
##   watermelon       1      2          0           0    0          1
## 
## Overall Statistics
##                                           
##                Accuracy : 0.0424          
##                  95% CI : (0.0284, 0.0607)
##     No Information Rate : 0.0591          
##     P-Value [Acc > NIR] : 0.9758          
##                                           
##                   Kappa : -0.0026         
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: apple Class: banana Class: blackgram
## Sensitivity               0.00000       0.07143         0.025641
## Specificity               0.95433       0.96203         0.954911
## Pos Pred Value            0.00000       0.07692         0.034483
## Neg Pred Value            0.96038       0.95899         0.939778
## Prevalence                0.03788       0.04242         0.059091
## Detection Rate            0.00000       0.00303         0.001515
## Detection Prevalence      0.04394       0.03939         0.043939
## Balanced Accuracy         0.47717       0.51673         0.490276
##                      Class: chickpea Class: coconut Class: coffee Class: cotton
## Sensitivity                  0.00000       0.043478       0.00000       0.00000
## Specificity                  0.96172       0.937206       0.94136       0.96051
## Pos Pred Value               0.00000       0.024390       0.00000       0.00000
## Neg Pred Value               0.94811       0.964459       0.95345       0.95748
## Prevalence                   0.05000       0.034848       0.04394       0.04091
## Detection Rate               0.00000       0.001515       0.00000       0.00000
## Detection Prevalence         0.03636       0.062121       0.05606       0.03788
## Balanced Accuracy            0.48086       0.490342       0.47068       0.48025
##                      Class: grapes Class: jute Class: kidneybeans Class: lentil
## Sensitivity               0.230769     0.00000            0.00000       0.00000
## Specificity               0.962145     0.94637            0.95541       0.95520
## Pos Pred Value            0.200000     0.00000            0.00000       0.00000
## Neg Pred Value            0.968254     0.95847            0.94937       0.94462
## Prevalence                0.039394     0.03939            0.04848       0.05303
## Detection Rate            0.009091     0.00000            0.00000       0.00000
## Detection Prevalence      0.045455     0.05152            0.04242       0.04242
## Balanced Accuracy         0.596457     0.47319            0.47771       0.47760
##                      Class: maize Class: mango Class: mothbeans Class: mungbean
## Sensitivity              0.133333      0.00000         0.172414         0.05556
## Specificity              0.942857      0.95223         0.933439         0.95032
## Pos Pred Value           0.100000      0.00000         0.106383         0.06061
## Neg Pred Value           0.958065      0.94921         0.960848         0.94577
## Prevalence               0.045455      0.04848         0.043939         0.05455
## Detection Rate           0.006061      0.00000         0.007576         0.00303
## Detection Prevalence     0.060606      0.04545         0.071212         0.05000
## Balanced Accuracy        0.538095      0.47611         0.552926         0.50294
##                      Class: muskmelon Class: orange Class: papaya
## Sensitivity                   0.00000      0.031250       0.00000
## Specificity                   0.98068      0.964968       0.94787
## Pos Pred Value                0.00000      0.043478       0.00000
## Neg Pred Value                0.93981      0.951334       0.95694
## Prevalence                    0.05909      0.048485       0.04091
## Detection Rate                0.00000      0.001515       0.00000
## Detection Prevalence          0.01818      0.034848       0.05000
## Balanced Accuracy             0.49034      0.498109       0.47393
##                      Class: pigeonpeas Class: pomegranate Class: rice
## Sensitivity                   0.107143           0.032258    0.034483
## Specificity                   0.960443           0.960254    0.947702
## Pos Pred Value                0.107143           0.038462    0.029412
## Neg Pred Value                0.960443           0.952681    0.955272
## Prevalence                    0.042424           0.046970    0.043939
## Detection Rate                0.004545           0.001515    0.001515
## Detection Prevalence          0.042424           0.039394    0.051515
## Balanced Accuracy             0.533793           0.496256    0.491092
##                      Class: watermelon
## Sensitivity                   0.041667
## Specificity                   0.965409
## Pos Pred Value                0.043478
## Neg Pred Value                0.963893
## Prevalence                    0.036364
## Detection Rate                0.001515
## Detection Prevalence          0.034848
## Balanced Accuracy             0.503538
varImpPlot(modelo_rf)

library(rpart)
library(rpart.plot)



# Crear modelo de árbol
modelo_arbol <- rpart(label ~ ., data = training_set, method = "class")

# Graficar
rpart.plot(modelo_arbol, type = 4, extra = 101, fallen.leaves = TRUE)

##Red Neuronal

library(nnet)
library(caret)

# Función de normalización
normalizar <- function(datac) {
  df_norm <- datac
  df_norm[, 1:7] <- scale(datac[, 1:7])
  return(df_norm)
}

# Normalizar datos
train_n <- normalizar(training_set)
test_n <- normalizar(testing_set)

# Eliminar filas con NA
train_n <- na.omit(train_n)
test_n <- na.omit(test_n)

# Asegurar que label es factor
train_n$label <- as.factor(train_n$label)
test_n$label <- as.factor(test_n$label)

# Entrenar red neuronal
modelo_nnet <- nnet(label ~ ., data = train_n, size = 5, maxit = 500, trace = FALSE)

# Predecir sobre test
pred_nnet <- predict(modelo_nnet, newdata = test_n, type = "class")

# Asegurar que ambos vectores sean factores con mismos niveles
pred_nnet <- factor(pred_nnet, levels = levels(test_n$label))
test_n$label <- factor(test_n$label)

# Matriz de confusión
conf_nnet <- confusionMatrix(pred_nnet, test_n$label)
print(conf_nnet)
## Confusion Matrix and Statistics
## 
##              Reference
## Prediction    apple banana blackgram chickpea coconut coffee cotton grapes jute
##   apple          25      0         0        0       0      0      0      0    0
##   banana          0     25         0        0       0      0      0      0    0
##   blackgram       0      0        39        0       0      0      0      0    0
##   chickpea        0      0         0       33       0      0      0      0    0
##   coconut         0      0         0        0      20      0      0      0    1
##   coffee          0      0         0        0       0     29      0      0    0
##   cotton          0      3         0        0       0      0     27      0    0
##   grapes          0      0         0        0       0      0      0     26    0
##   jute            0      0         0        0       0      0      0      0   15
##   kidneybeans     0      0         0        0       0      0      0      0    0
##   lentil          0      0         0        0       0      0      0      0    0
##   maize           0      0         0        0       0      0      0      0    0
##   mango           0      0         0        0       0      0      0      0    0
##   mothbeans       0      0         0        0       0      0      0      0    0
##   mungbean        0      0         0        0       0      0      0      0    0
##   muskmelon       0      0         0        0       0      0      0      0    0
##   orange          0      0         0        0       0      0      0      0    0
##   papaya          0      0         0        0       3      0      0      0    0
##   pigeonpeas      0      0         0        0       0      0      0      0    0
##   pomegranate     0      0         0        0       0      0      0      0    0
##   rice            0      0         0        0       0      0      0      0   10
##   watermelon      0      0         0        0       0      0      0      0    0
##              Reference
## Prediction    kidneybeans lentil maize mango mothbeans mungbean muskmelon
##   apple                 0      0     0     0         0        0         1
##   banana                0      0     0     0         0        0         0
##   blackgram             0      4     0     0         1        1         0
##   chickpea              0      0     0     0         0        0         0
##   coconut               0      0     0     0         0        0         0
##   coffee                0      0     3     0         0        0         0
##   cotton                0      0     1     0         0        0         0
##   grapes                0      0     0     0         0        0         0
##   jute                  0      0     0     0         0        0         0
##   kidneybeans          26      0     0     0         0        0         0
##   lentil                0     29     0     0         4        0         0
##   maize                 0      0    26     0         0        0         0
##   mango                 0      0     0    31         1        0         0
##   mothbeans             0      2     0     0        23        0         0
##   mungbean              0      0     0     0         0       35         0
##   muskmelon             0      0     0     0         0        0        33
##   orange                0      0     0     0         0        0         0
##   papaya                0      0     0     0         0        0         1
##   pigeonpeas            6      0     0     0         0        0         0
##   pomegranate           0      0     0     0         0        0         0
##   rice                  0      0     0     0         0        0         0
##   watermelon            0      0     0     1         0        0         4
##              Reference
## Prediction    orange papaya pigeonpeas pomegranate rice watermelon
##   apple            0      0          0           0    0          0
##   banana           0      0          0           0    0          0
##   blackgram        0      0          0           0    0          0
##   chickpea         0      0          0           0    0          0
##   coconut          0      0          0           1    0          0
##   coffee           0      0          1           0    0          0
##   cotton           0      0          0           0    0          0
##   grapes           0      0          0           0    0          0
##   jute             0      0          0           0    0          0
##   kidneybeans      0      0          0           0    0          0
##   lentil           0      0          0           0    0          0
##   maize            0      0          0           0    0          0
##   mango            0      0          0           0    0          0
##   mothbeans        0      0          0           0    0          0
##   mungbean         0      0          0           0    0          0
##   muskmelon        0      1          0           0    0          0
##   orange          32      0          0           0    0          0
##   papaya           0     26          0           1    0          0
##   pigeonpeas       0      0         27           0    0          0
##   pomegranate      0      0          0          28    0          0
##   rice             0      0          0           0   29          0
##   watermelon       0      0          0           1    0         24
## 
## Overall Statistics
##                                          
##                Accuracy : 0.9212         
##                  95% CI : (0.898, 0.9406)
##     No Information Rate : 0.0591         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.9174         
##                                          
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: apple Class: banana Class: blackgram
## Sensitivity               1.00000       0.89286          1.00000
## Specificity               0.99843       1.00000          0.99034
## Pos Pred Value            0.96154       1.00000          0.86667
## Neg Pred Value            1.00000       0.99528          1.00000
## Prevalence                0.03788       0.04242          0.05909
## Detection Rate            0.03788       0.03788          0.05909
## Detection Prevalence      0.03939       0.03788          0.06818
## Balanced Accuracy         0.99921       0.94643          0.99517
##                      Class: chickpea Class: coconut Class: coffee Class: cotton
## Sensitivity                     1.00        0.86957       1.00000       1.00000
## Specificity                     1.00        0.99686       0.99366       0.99368
## Pos Pred Value                  1.00        0.90909       0.87879       0.87097
## Neg Pred Value                  1.00        0.99530       1.00000       1.00000
## Prevalence                      0.05        0.03485       0.04394       0.04091
## Detection Rate                  0.05        0.03030       0.04394       0.04091
## Detection Prevalence            0.05        0.03333       0.05000       0.04697
## Balanced Accuracy               1.00        0.93321       0.99683       0.99684
##                      Class: grapes Class: jute Class: kidneybeans Class: lentil
## Sensitivity                1.00000     0.57692            0.81250       0.82857
## Specificity                1.00000     1.00000            1.00000       0.99360
## Pos Pred Value             1.00000     1.00000            1.00000       0.87879
## Neg Pred Value             1.00000     0.98295            0.99054       0.99043
## Prevalence                 0.03939     0.03939            0.04848       0.05303
## Detection Rate             0.03939     0.02273            0.03939       0.04394
## Detection Prevalence       0.03939     0.02273            0.03939       0.05000
## Balanced Accuracy          1.00000     0.78846            0.90625       0.91109
##                      Class: maize Class: mango Class: mothbeans Class: mungbean
## Sensitivity               0.86667      0.96875          0.79310         0.97222
## Specificity               1.00000      0.99841          0.99683         1.00000
## Pos Pred Value            1.00000      0.96875          0.92000         1.00000
## Neg Pred Value            0.99369      0.99841          0.99055         0.99840
## Prevalence                0.04545      0.04848          0.04394         0.05455
## Detection Rate            0.03939      0.04697          0.03485         0.05303
## Detection Prevalence      0.03939      0.04848          0.03788         0.05303
## Balanced Accuracy         0.93333      0.98358          0.89497         0.98611
##                      Class: muskmelon Class: orange Class: papaya
## Sensitivity                   0.84615       1.00000       0.96296
## Specificity                   0.99839       1.00000       0.99210
## Pos Pred Value                0.97059       1.00000       0.83871
## Neg Pred Value                0.99042       1.00000       0.99841
## Prevalence                    0.05909       0.04848       0.04091
## Detection Rate                0.05000       0.04848       0.03939
## Detection Prevalence          0.05152       0.04848       0.04697
## Balanced Accuracy             0.92227       1.00000       0.97753
##                      Class: pigeonpeas Class: pomegranate Class: rice
## Sensitivity                    0.96429            0.90323     1.00000
## Specificity                    0.99051            1.00000     0.98415
## Pos Pred Value                 0.81818            1.00000     0.74359
## Neg Pred Value                 0.99841            0.99525     1.00000
## Prevalence                     0.04242            0.04697     0.04394
## Detection Rate                 0.04091            0.04242     0.04394
## Detection Prevalence           0.05000            0.04242     0.05909
## Balanced Accuracy              0.97740            0.95161     0.99208
##                      Class: watermelon
## Sensitivity                    1.00000
## Specificity                    0.99057
## Pos Pred Value                 0.80000
## Neg Pred Value                 1.00000
## Prevalence                     0.03636
## Detection Rate                 0.03636
## Detection Prevalence           0.04545
## Balanced Accuracy              0.99528

###Comparacion de resultados

cat("Precision RF:", round(conf_rf$overall["Accuracy"]*100, 2), "%\n")
## Precision RF: 4.24 %
cat("Precision NNet:", round(conf_nnet$overall["Accuracy"]*100, 2), "%\n")
## Precision NNet: 92.12 %

###Resultado entre modelos: NNet (Red Neuronal) aprendio muy bien las relaciones entre las variables (N, P, K, temperatura, humedad, pH y lluvia) y el cultivo objetivo.

# Para red neuronal
accuracy_nnet  <- conf_nnet$overall["Accuracy"]
kappa_nnet     <- conf_nnet$overall["Kappa"]
precision_nnet <- conf_nnet$byClass[, "Precision"]


# Para RF
accuracy_rf  <- conf_rf$overall["Accuracy"]
kappa_rf     <- conf_rf$overall["Kappa"]
precision_rf <- conf_rf$byClass[, "Precision"]
resultados <- data.frame(
  Modelo     = c("Red Neuronal", "Random Forest"),
  Accuracy   = c(accuracy_nnet, accuracy_rf),
  Kappa      = c(kappa_nnet, kappa_rf)
)

knitr::kable(resultados, caption = "Comparacion de modelos de clasificacion")
Comparacion de modelos de clasificacion
Modelo Accuracy Kappa
Red Neuronal 0.9212121 0.9173876
Random Forest 0.0424242 -0.0025501