#install.packages("readxl")
#install.packages("dplyr")
#install.packages("ranger")
library(readxl)
## Warning: package 'readxl' was built under R version 4.2.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ranger)
## Warning: package 'ranger' was built under R version 4.2.3
#datos
df <- read_excel("1_4cepas.xlsx")
df <- df %>%
rename(
wavelength = Wavelength,
reflectance = `Reflectance (%)`,
archivo = Archivo,
cepa = CEPA
)
df$cepa <- as.factor(df$cepa)
#Colapsar categorías de CEPA
df <- df %>%
mutate(
cepa_grupo = case_when(
cepa %in% c("ROS006", "ROS010", "ROS011") ~ "Grupo1",
TRUE ~ "Grupo2"
)
)
df$cepa_grupo <- as.factor(df$cepa_grupo)
#Selección de variables para el modelo
df_model <- df %>%
select(reflectance, wavelength, cepa_grupo)
#Muestreo para reducir tamaño
set.seed(182)
df_model <- df_model %>% sample_n(40000)
#Entrenar modelo con RANGER
rf_model <- ranger(
reflectance ~ .,
data = df_model,
num.trees = 1000,
mtry = 2,
importance = "impurity",
respect.unordered.factors = "order"
)
print(rf_model)
## Ranger result
##
## Call:
## ranger(reflectance ~ ., data = df_model, num.trees = 1000, mtry = 2, importance = "impurity", respect.unordered.factors = "order")
##
## Type: Regression
## Number of trees: 1000
## Sample size: 40000
## Number of independent variables: 2
## Mtry: 2
## Target node size: 5
## Variable importance mode: impurity
## Splitrule: variance
## OOB prediction error (MSE): 45.57202
## R squared (OOB): 0.6662542
#Importancia de variables
importance_values <- rf_model$variable.importance
print(importance_values)
## wavelength cepa_grupo
## 3986445.13 23205.01
#Predicción
predicciones <- predict(rf_model, data = df_model)$predictions
# Calcular error RMSE
rmse <- sqrt(mean((predicciones - df_model$reflectance)^2))
cat("RMSE del modelo:", rmse, "\n")
## RMSE del modelo: 6.248295
print(rf_model)
## Ranger result
##
## Call:
## ranger(reflectance ~ ., data = df_model, num.trees = 1000, mtry = 2, importance = "impurity", respect.unordered.factors = "order")
##
## Type: Regression
## Number of trees: 1000
## Sample size: 40000
## Number of independent variables: 2
## Mtry: 2
## Target node size: 5
## Variable importance mode: impurity
## Splitrule: variance
## OOB prediction error (MSE): 45.57202
## R squared (OOB): 0.6662542