Data Filtering Criteria for Tandem A in 2023-2024 Harvest
Season.
Libraries
library(dplyr)
library(pastecs)
library(ggplot2)
library(lares)
Scatter Plot Function
mapa_dispersion <- function(label_x,label_y,dataset) {
correlacion <- round(cor(dataset[[label_x]],dataset[[label_y]]),2)
ggplot(dataset, aes(.data[[label_x]], .data[[label_y]])) +
geom_point(
color="orange",
fill="#69b3a2",
shape=21,
alpha=0.5,
size=6,
stroke = 2
) +
geom_smooth(method=lm , color="#990000", fill="#FFCF00", se=TRUE) +
ggtitle(paste(label_y,"vrs.",label_x), subtitle = paste("Correlation: ",correlacion)) +
xlab(label_x) + ylab(label_y)
}
Mill 1 TA Dataset
df_TA <- read.csv(file = 'Mill_1_TA.csv')
df_TA
Density Function: Mill 1 TA Speed (rpm)
# PDF
distr(df_TA,'ST55M101')

# Boxplot
ggplot(df_TA, aes(y=ST55M101)) +
geom_boxplot(
# custom boxes
color="blue",
fill="blue",
alpha=0.2,
# Notch
notch=TRUE,
notchwidth = 0.8,
# custom outliers
outlier.colour="red",
outlier.fill="red",
outlier.size=3) +
scale_x_discrete() +
labs(title="Boxplot",x="", y = "ST55M101 (rpm)")

# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA$ST55M101))
NA
Density Function: Mill 1 TA Torque (N*m)
# PDF
distr(df_TA,'TQ55M101')

# Boxplot
ggplot(df_TA, aes(y=TQ55M101)) +
geom_boxplot(
# custom boxes
color="blue",
fill="blue",
alpha=0.2,
# Notch
notch=TRUE,
notchwidth = 0.8,
# custom outliers
outlier.colour="red",
outlier.fill="red",
outlier.size=3) +
scale_x_discrete() +
labs(title="Boxplot",x="", y = "TQ55M101 (N*m)")

# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA$TQ55M101))
NA
Density Function: Mill 1 TA Power (kW)
# PDF
distr(df_TA,'JT55M101')

# Boxplot
ggplot(df_TA, aes(y=JT55M101)) +
geom_boxplot(
# custom boxes
color="blue",
fill="blue",
alpha=0.2,
# Notch
notch=TRUE,
notchwidth = 0.8,
# custom outliers
outlier.colour="red",
outlier.fill="red",
outlier.size=3) +
scale_x_discrete() +
labs(title="Boxplot",x="", y = "JT55M101 (kW)")

# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA$JT55M101))
NA
Density Function: Mill 1 TA Level (%)
# PDF
distr(df_TA,'LT55M101')

# Boxplot
ggplot(df_TA, aes(y=LT55M101)) +
geom_boxplot(
# custom boxes
color="blue",
fill="blue",
alpha=0.2,
# Notch
notch=TRUE,
notchwidth = 0.8,
# custom outliers
outlier.colour="red",
outlier.fill="red",
outlier.size=3) +
scale_x_discrete() +
labs(title="Boxplot",x="", y = "LT55M101 (%)")

# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA$LT55M101))
NA
Density Function: TA Bagasse Mass Flow (t/h)
# PDF
distr(df_TA,'WT555801')

# Boxplot
ggplot(df_TA, aes(y=WT555801)) +
geom_boxplot(
# custom boxes
color="blue",
fill="blue",
alpha=0.2,
# Notch
notch=TRUE,
notchwidth = 0.8,
# custom outliers
outlier.colour="red",
outlier.fill="red",
outlier.size=3) +
scale_x_discrete() +
labs(title="Boxplot",x="", y = "WT555801 (t/h)")

# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA$WT555801))
NA
Filtering Criteria:
We first need to exclude any intentional or planned downtime of TA.
This can be done by filtering the left tail of the Torque (TQ55M101) and
Speed (ST55M101) distributions of the Mill 1 T, since the Mill stops
completely.
We also need to filter out any unintentional or unplanned downtime of
TA. This can be done by filtering the left tail of the bagasse mass flow
of TA. This works because for unplanned downtime usually the first mill
does not stop (because of the uncertainty of the duration of the stop).
Also we can estimate the sugar cane crush rate by the bagasse mass flow
coming out of the tandem (WT555801).
Scatter Plots and Correlations
Torque and Bagasse Relation Mill 1 TA
label_x <- "WT555801"
label_y <- "TQ55M101"
mapa_dispersion(label_x,label_y,df_TA)

Torque and Bagasse Mass Flow are correlated, but not perfectly. So we
need to filter out observations based on these variables
individually.
Torque and Power Relation Mill 1 TA
label_x <- "JT55M101"
label_y <- "TQ55M101"
mapa_dispersion(label_x,label_y,df_TA)

Torque and Mill Power are strongly correlated, almost perfectly. So
there is no need to filter out observations based on these variables
individually. We choose Torque since it contains information about Power
and Speed simultaneously.
WT5557801 Quantiles
# Filter Dataset by Column Values:
res<-quantile(df_TA$WT555801, probs = c(0,0.25,0.5,0.75,1))
res
0% 25% 50% 75% 100%
21.56174 132.91532 147.77292 158.52945 183.46936
TQ55M101 Quantiles
# Filter Dataset by Column Values:
res<-quantile(df_TA$TQ55M101, probs = c(0,0.25,0.5,0.75,1))
res
0% 25% 50% 75% 100%
226.5511 1869.4912 2092.0420 2283.7543 2579.5171
ST55M101 Quantiles
# Filter Dataset by Column Values:
res<-quantile(df_TA$ST55M101, probs = c(0,0.25,0.5,0.75,1))
res
0% 25% 50% 75% 100%
172.5349 702.7378 746.5287 774.9827 999.9883
# Filter Dataset by Column Values:
df_TA_filtered <- df_TA[df_TA$WT555801>=130,] # Filter by Minimum Threshold of 1st Quartile
Error in exists(cacheKey, where = .rs.WorkingDataEnv, inherits = FALSE) :
invalid first argument
Error in assign(cacheKey, frame, .rs.CachedDataEnv) :
attempt to use zero-length variable name
df_TA_filtered <- df_TA_filtered[df_TA_filtered$TQ55M101>=1800,] # Filter by Minimum Threshold of 1st Quartile
Error in exists(cacheKey, where = .rs.WorkingDataEnv, inherits = FALSE) :
invalid first argument
Error in assign(cacheKey, frame, .rs.CachedDataEnv) :
attempt to use zero-length variable name
df_TA_filtered <- df_TA_filtered[df_TA_filtered$ST55M101>=700,] # Filter by Minimum Threshold of 1st Quartile
Error in exists(cacheKey, where = .rs.WorkingDataEnv, inherits = FALSE) :
invalid first argument
Error in assign(cacheKey, frame, .rs.CachedDataEnv) :
attempt to use zero-length variable name
df_TA_filtered
Torque and Bagasse Relation Mill 1 TA (Filtered
Dataset)
label_x <- "WT555801"
label_y <- "TQ55M101"
mapa_dispersion(label_x,label_y,df_TA_filtered)

Density Function: TA Bagasse Mass Flow (t/h) Filtered
Distribution
# PDF
distr(df_TA_filtered,'WT555801')

# Boxplot
ggplot(df_TA_filtered, aes(y=WT555801)) +
geom_boxplot(
# custom boxes
color="blue",
fill="blue",
alpha=0.2,
# Notch
notch=TRUE,
notchwidth = 0.8,
# custom outliers
outlier.colour="red",
outlier.fill="red",
outlier.size=3) +
scale_x_discrete() +
labs(title="Boxplot",x="", y = "WT555801 (t/h)")

# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA_filtered$WT555801))
NA
Density Function: Mill 1 TA Torque (t/h) Filtered Distribution
# PDF
distr(df_TA_filtered,'TQ55M101')

# Boxplot
ggplot(df_TA_filtered, aes(y=WT555801)) +
geom_boxplot(
# custom boxes
color="blue",
fill="blue",
alpha=0.2,
# Notch
notch=TRUE,
notchwidth = 0.8,
outlier.colour="red",
outlier.fill="red",
outlier.size=3) +
scale_x_discrete() +
labs(title="Boxplot",x="", y = "TQ55M101 (t/h)")

# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA_filtered$TQ55M101))
NA
Density Function: Mill 1 TA Speed (rpm) Filtered Distribution
# PDF
distr(df_TA_filtered,'ST55M101')

# Boxplot
ggplot(df_TA_filtered, aes(y=ST55M101)) +
geom_boxplot(
# custom boxes
color="blue",
fill="blue",
alpha=0.2,
# Notch
notch=TRUE,
notchwidth = 0.8,
# custom outliers
outlier.colour="red",
outlier.fill="red",
outlier.size=3) +
scale_x_discrete() +
labs(title="Boxplot",x="", y = "ST55M101 (rpm)")

# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA_filtered$ST55M101))
NA
Filter Criteria
Filter out any points for which:
- WT555801 < 130
- TQ55M101 < 1800
- ST55M101 < 700
