Se realizó un análisis horario de los valores de Pol, Brix y Pureza
reportados por el Core Sampler.
El objetivo es verificar la hipótesis de que durante cierto rango de
horas del día los valores de Pol, Brix y Pureza tienden a ser más altos
que en el resto de horas. Específicamente, durante las primeras
horas del día.
library(ggplot2)
library(lubridate)
library(dplyr)
Conjunto de Datos Muestras Core Sampler
Este conjunto de datos se obtuvo de la base de datos Legacy de
Laboratorio
dataset <- read.csv(file = 'C:/Users/100346/OneDrive - Pantaleon. S.A/DataScience Pantaleon/R/Core Sampler/Datos_Core_Z2025.csv')
dataset$Fecha <- as.Date(dataset$Fecha,format = "%d/%m/%Y")
dataset
Verificación de Rangos
Se examinan los promedios de Pol, Brix y Pureza para los siguientes
rangos horarios:
Rango 1: 0 a 7 horas
Rango 2: 8 a 15 horas
Rango 3: 16 a 23 horas Y, para cada Rango, se
analizan distintos horizontes temporales:
Horizonte 1: 23 NOV al 30 NOV
Horizonte 2: 23 NOV al 15 DIC
Horizonte 3: 23 NOV al 16 ENE
df1 <- dataset %>% filter(Fecha < '2024-11-30')
df1_dist <- df1 %>% group_by(hora) %>% summarise(Pol = mean(Pol, na.rm = TRUE), Brix = mean(Brix, na.rm = TRUE),
Pureza = mean(Pureza, na.rm = TRUE) , ensayos = n())
df2 <- dataset %>% filter(Fecha < '2024-12-15')
df2_dist <- df2 %>% group_by(hora) %>% summarise(Pol = mean(Pol, na.rm = TRUE), Brix = mean(Brix, na.rm = TRUE),
Pureza = mean(Pureza, na.rm = TRUE) , ensayos = n())
df3 <- dataset %>% filter(Fecha < '2025-01-16')
df3_dist <- df3 %>% group_by(hora) %>% summarise(Pol = mean(Pol, na.rm = TRUE), Brix = mean(Brix, na.rm = TRUE),
Pureza = mean(Pureza, na.rm = TRUE) , ensayos = n())
Promedios Horizonte 1: 23 NOV al 30 NOV
NOV30_Primero <- df1_dist %>% filter(hora %in% (0:7) ) %>%
summarise(Pol_Promedio = mean(Pol, na.rm = TRUE), Brix_Promedio = mean(Brix, na.rm = TRUE), Pureza_Promedio = mean(Pureza, na.rm = TRUE))
NOV30_Segundo <- df1_dist %>% filter(hora %in% (8:15) ) %>%
summarise(Pol_Promedio = mean(Pol, na.rm = TRUE), Brix_Promedio = mean(Brix, na.rm = TRUE), Pureza_Promedio = mean(Pureza, na.rm = TRUE))
NOV30_Tercero <- df1_dist %>% filter(hora %in% (16:23) ) %>%
summarise(Pol_Promedio = mean(Pol, na.rm = TRUE), Brix_Promedio = mean(Brix, na.rm = TRUE), Pureza_Promedio = mean(Pureza, na.rm = TRUE))
df1_Pol <- data.frame("Variable" = c("Pol","Brix","Pureza"),
"de 0 a 7 horas" = c(NOV30_Primero$Pol_Promedio,NOV30_Primero$Brix_Promedio, NOV30_Primero$Pureza_Promedio),
"de 8 a 15 horas" = c(NOV30_Segundo$Pol_Promedio,NOV30_Segundo$Brix_Promedio, NOV30_Segundo$Pureza_Promedio),
"de 16 a 23 horas" = c(NOV30_Tercero$Pol_Promedio,NOV30_Tercero$Brix_Promedio, NOV30_Tercero$Pureza_Promedio)
)
df1_Pol
Promedios Horizonte 2: 23 NOV al 14 DIC
DIC15_Primero <- df2_dist %>% filter(hora %in% (0:7) ) %>%
summarise(Pol_Promedio = mean(Pol, na.rm = TRUE), Brix_Promedio = mean(Brix, na.rm = TRUE), Pureza_Promedio = mean(Pureza, na.rm = TRUE))
DIC15_Segundo <- df2_dist %>% filter(hora %in% (8:15) ) %>%
summarise(Pol_Promedio = mean(Pol, na.rm = TRUE), Brix_Promedio = mean(Brix, na.rm = TRUE), Pureza_Promedio = mean(Pureza, na.rm = TRUE))
DIC15_Tercero <- df2_dist %>% filter(hora %in% (16:23) ) %>%
summarise(Pol_Promedio = mean(Pol, na.rm = TRUE), Brix_Promedio = mean(Brix, na.rm = TRUE), Pureza_Promedio = mean(Pureza, na.rm = TRUE))
df2_Pol <- data.frame("Variable" = c("Pol","Brix","Pureza"),
"de 0 a 7 horas" = c(DIC15_Primero$Pol_Promedio,DIC15_Primero$Brix_Promedio, DIC15_Primero$Pureza_Promedio),
"de 8 a 15 horas" = c(DIC15_Segundo$Pol_Promedio,DIC15_Segundo$Brix_Promedio, DIC15_Segundo$Pureza_Promedio),
"de 16 a 23 horas" = c(DIC15_Tercero$Pol_Promedio,DIC15_Tercero$Brix_Promedio, DIC15_Tercero$Pureza_Promedio)
)
df2_Pol
Promedios Horizonte 3: 23 NOV al 16 ENE
ENE16_Primero <- df3_dist %>% filter(hora %in% (0:7) ) %>%
summarise(Pol_Promedio = mean(Pol, na.rm = TRUE), Brix_Promedio = mean(Brix, na.rm = TRUE), Pureza_Promedio = mean(Pureza, na.rm = TRUE))
ENE16_Segundo <- df3_dist %>% filter(hora %in% (8:15) ) %>%
summarise(Pol_Promedio = mean(Pol, na.rm = TRUE), Brix_Promedio = mean(Brix, na.rm = TRUE), Pureza_Promedio = mean(Pureza, na.rm = TRUE))
ENE16_Tercero <- df3_dist %>% filter(hora %in% (16:23) ) %>%
summarise(Pol_Promedio = mean(Pol, na.rm = TRUE), Brix_Promedio = mean(Brix, na.rm = TRUE), Pureza_Promedio = mean(Pureza, na.rm = TRUE))
df3_Pol <- data.frame("Variable" = c("Pol","Brix","Pureza"),
"de 0 a 7 horas" = c(ENE16_Primero$Pol_Promedio,ENE16_Primero$Brix_Promedio, ENE16_Primero$Pureza_Promedio),
"de 8 a 15 horas" = c(ENE16_Segundo$Pol_Promedio,ENE16_Segundo$Brix_Promedio, ENE16_Segundo$Pureza_Promedio),
"de 16 a 23 horas" = c(ENE16_Tercero$Pol_Promedio,ENE16_Tercero$Brix_Promedio, ENE16_Tercero$Pureza_Promedio)
)
df3_Pol
Número de muestreos por rango de horas
# Bar Plots:
barplot(height=df3_dist$ensayos, names=df3_dist$hora, col = "light green", xlab = "Hora Muestreo", ylab = "Acumulado Muestras", cex.axis=0.5, cex.names=0.5)
barplot(height=df2_dist$ensayos, names=df2_dist$hora, col = "green", cex.axis=0.5, cex.names=0.5, add = TRUE)
barplot(height=df1_dist$ensayos, names=df1_dist$hora, col = "dark green", cex.axis=0.5, cex.names=0.5, add = TRUE)
legend("top", title="Período", legend= c("al 30-Nov","al 15-Dic","al 16-Ene"), fill =c("dark green", "green", "light green" ), box.lty=0)

df_dist <- dataset %>% group_by(hora) %>% summarise(Pol = mean(Pol, na.rm = TRUE), Brix = mean(Brix, na.rm = TRUE),
Pureza = mean(Pureza, na.rm = TRUE) , Rendimiento = mean(Rendimiento, na.rm = TRUE), ensayos = n())
# Bar Plots:
barplot(height=df_dist$Pol, names=df_dist$hora, col = "darkorange", xlab = "Hora Muestreo", ylab = "Pol", cex.axis=0.5, cex.names=0.5)
title(main="Distribución Horaria de Pol")

# Bar Plots:
barplot(height=df_dist$Brix, names=df_dist$hora, col = "darkgreen", xlab = "Hora Muestreo", ylab = "Brix", cex.axis=0.5, cex.names=0.5)
title(main="Distribución Horaria de Brix")

# Bar Plots:
barplot(height=df_dist$Pureza, names=df_dist$hora, col = "blue", xlab = "Hora Muestreo", ylab = "Pureza", cex.axis=0.5, cex.names=0.5)
title(main="Distribución Horaria de Pureza")

# Bar Plots:
barplot(height=df_dist$Rendimiento, names=df_dist$hora, col = "darkred", xlab = "Hora Muestreo", ylab = "Rendimiento", cex.axis=0.5, cex.names=0.5)
title(main="Distribución Horaria de Rendimiento Core")

- No existe evidencia de que la Pol, Brix, Pureza o
Rendimiento del Core Sampler tenga diferencia significativa durante el
día.
- No existe evidencia que la distribución de registro de
muestras a lo largo del día esté sesgada.
