Data Filtering Criteria for Tandem A in 2023-2024 Harvest Season.

Libraries

library(dplyr)
library(pastecs)
library(ggplot2)
library(lares)

Scatter Plot Function

mapa_dispersion <- function(label_x,label_y,dataset) {
  correlacion <- round(cor(dataset[[label_x]],dataset[[label_y]]),2)
  ggplot(dataset, aes(.data[[label_x]], .data[[label_y]])) + 
    geom_point(
        color="orange",
        fill="#69b3a2",
        shape=21,
        alpha=0.5,
        size=6,
        stroke = 2
        ) +
    geom_smooth(method=lm , color="#990000", fill="#FFCF00", se=TRUE) +
    ggtitle(paste(label_y,"vrs.",label_x), subtitle = paste("Correlation: ",correlacion)) +
  xlab(label_x) + ylab(label_y)
}

Mill 1 TA Dataset

df_TA <- read.csv(file = 'Mill_1_TA.csv')

df_TA

Density Function: Mill 1 TA Speed (rpm)


# PDF
distr(df_TA,'ST55M101')


# Boxplot
ggplot(df_TA, aes(y=ST55M101)) + 
    geom_boxplot( 
        # custom boxes
        color="blue",
        fill="blue",
        alpha=0.2,
        
        # Notch
        notch=TRUE,
        notchwidth = 0.8,
        
        # custom outliers
        outlier.colour="red",
        outlier.fill="red",
        outlier.size=3) +
  scale_x_discrete() +
  labs(title="Boxplot",x="", y = "ST55M101 (rpm)")


# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA$ST55M101))
NA

Density Function: Mill 1 TA Torque (N*m)


# PDF
distr(df_TA,'TQ55M101')


# Boxplot
ggplot(df_TA, aes(y=TQ55M101)) + 
    geom_boxplot( 
        # custom boxes
        color="blue",
        fill="blue",
        alpha=0.2,
        
        # Notch
        notch=TRUE,
        notchwidth = 0.8,
        
        # custom outliers
        outlier.colour="red",
        outlier.fill="red",
        outlier.size=3) +
  scale_x_discrete() +
  labs(title="Boxplot",x="", y = "TQ55M101 (N*m)")


# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA$TQ55M101))
NA

Density Function: Mill 1 TA Power (kW)


# PDF
distr(df_TA,'JT55M101')


# Boxplot
ggplot(df_TA, aes(y=JT55M101)) + 
    geom_boxplot( 
        # custom boxes
        color="blue",
        fill="blue",
        alpha=0.2,
        
        # Notch
        notch=TRUE,
        notchwidth = 0.8,
        
        # custom outliers
        outlier.colour="red",
        outlier.fill="red",
        outlier.size=3) +
  scale_x_discrete() +
  labs(title="Boxplot",x="", y = "JT55M101 (kW)")


# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA$JT55M101))
NA

Density Function: Mill 1 TA Level (%)


# PDF
distr(df_TA,'LT55M101')


# Boxplot
ggplot(df_TA, aes(y=LT55M101)) + 
    geom_boxplot( 
        # custom boxes
        color="blue",
        fill="blue",
        alpha=0.2,
        
        # Notch
        notch=TRUE,
        notchwidth = 0.8,
        
        # custom outliers
        outlier.colour="red",
        outlier.fill="red",
        outlier.size=3) +
  scale_x_discrete() +
  labs(title="Boxplot",x="", y = "LT55M101 (%)")


# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA$LT55M101))
NA

Density Function: TA Bagasse Mass Flow (t/h)


# PDF
distr(df_TA,'WT555801')


# Boxplot
ggplot(df_TA, aes(y=WT555801)) + 
    geom_boxplot( 
        # custom boxes
        color="blue",
        fill="blue",
        alpha=0.2,
        
        # Notch
        notch=TRUE,
        notchwidth = 0.8,
        
        # custom outliers
        outlier.colour="red",
        outlier.fill="red",
        outlier.size=3) +
  scale_x_discrete() +
  labs(title="Boxplot",x="", y = "WT555801 (t/h)")


# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA$WT555801))
NA

Filtering Criteria:

We first need to exclude any intentional or planned downtime of TA. This can be done by filtering the left tail of the Torque (TQ55M101) and Speed (ST55M101) distributions of the Mill 1 T, since the Mill stops completely.

We also need to filter out any unintentional or unplanned downtime of TA. This can be done by filtering the left tail of the bagasse mass flow of TA. This works because for unplanned downtime usually the first mill does not stop (because of the uncertainty of the duration of the stop). Also we can estimate the sugar cane crush rate by the bagasse mass flow coming out of the tandem (WT555801).

Scatter Plots and Correlations

Torque and Bagasse Relation Mill 1 TA

label_x <- "WT555801"
label_y <- "TQ55M101"

mapa_dispersion(label_x,label_y,df_TA)

Torque and Bagasse Mass Flow are correlated, but not perfectly. So we need to filter out observations based on these variables individually.

Torque and Power Relation Mill 1 TA

label_x <- "JT55M101"
label_y <- "TQ55M101"

mapa_dispersion(label_x,label_y,df_TA)

Torque and Mill Power are strongly correlated, almost perfectly. So there is no need to filter out observations based on these variables individually. We choose Torque since it contains information about Power and Speed simultaneously.

WT5557801 Quantiles

# Filter Dataset by Column Values:
res<-quantile(df_TA$WT555801, probs = c(0,0.25,0.5,0.75,1)) 
res
       0%       25%       50%       75%      100% 
 21.56174 132.91532 147.77292 158.52945 183.46936 

TQ55M101 Quantiles

# Filter Dataset by Column Values:
res<-quantile(df_TA$TQ55M101, probs = c(0,0.25,0.5,0.75,1)) 
res
       0%       25%       50%       75%      100% 
 226.5511 1869.4912 2092.0420 2283.7543 2579.5171 

ST55M101 Quantiles

# Filter Dataset by Column Values:
res<-quantile(df_TA$ST55M101, probs = c(0,0.25,0.5,0.75,1)) 
res
      0%      25%      50%      75%     100% 
172.5349 702.7378 746.5287 774.9827 999.9883 
# Filter Dataset by Column Values:
df_TA_filtered <- df_TA[df_TA$WT555801>=130,] # Filter by Minimum Threshold of 1st Quartile
Error in exists(cacheKey, where = .rs.WorkingDataEnv, inherits = FALSE) : 
  invalid first argument
Error in assign(cacheKey, frame, .rs.CachedDataEnv) : 
  attempt to use zero-length variable name
df_TA_filtered <- df_TA_filtered[df_TA_filtered$TQ55M101>=1800,] # Filter by Minimum Threshold of 1st Quartile
Error in exists(cacheKey, where = .rs.WorkingDataEnv, inherits = FALSE) : 
  invalid first argument
Error in assign(cacheKey, frame, .rs.CachedDataEnv) : 
  attempt to use zero-length variable name
df_TA_filtered <- df_TA_filtered[df_TA_filtered$ST55M101>=700,] # Filter by Minimum Threshold of 1st Quartile
Error in exists(cacheKey, where = .rs.WorkingDataEnv, inherits = FALSE) : 
  invalid first argument
Error in assign(cacheKey, frame, .rs.CachedDataEnv) : 
  attempt to use zero-length variable name
df_TA_filtered

Torque and Bagasse Relation Mill 1 TA (Filtered Dataset)

label_x <- "WT555801"
label_y <- "TQ55M101"

mapa_dispersion(label_x,label_y,df_TA_filtered)

Density Function: TA Bagasse Mass Flow (t/h) Filtered Distribution


# PDF
distr(df_TA_filtered,'WT555801')


# Boxplot
ggplot(df_TA_filtered, aes(y=WT555801)) + 
    geom_boxplot( 
        # custom boxes
        color="blue",
        fill="blue",
        alpha=0.2,
        
        # Notch
        notch=TRUE,
        notchwidth = 0.8,
        
        # custom outliers
        outlier.colour="red",
        outlier.fill="red",
        outlier.size=3) +
  scale_x_discrete() +
  labs(title="Boxplot",x="", y = "WT555801 (t/h)")


# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA_filtered$WT555801))
NA

Density Function: Mill 1 TA Torque (t/h) Filtered Distribution


# PDF
distr(df_TA_filtered,'TQ55M101')


# Boxplot
ggplot(df_TA_filtered, aes(y=WT555801)) + 
    geom_boxplot( 
        # custom boxes
        color="blue",
        fill="blue",
        alpha=0.2,
        
        # Notch
        notch=TRUE,
        notchwidth = 0.8,
        
        outlier.colour="red",
        outlier.fill="red",
        outlier.size=3) +
  scale_x_discrete() +
  labs(title="Boxplot",x="", y = "TQ55M101 (t/h)")


# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA_filtered$TQ55M101))
NA

Density Function: Mill 1 TA Speed (rpm) Filtered Distribution


# PDF
distr(df_TA_filtered,'ST55M101')


# Boxplot
ggplot(df_TA_filtered, aes(y=ST55M101)) + 
    geom_boxplot( 
        # custom boxes
        color="blue",
        fill="blue",
        alpha=0.2,
        
        # Notch
        notch=TRUE,
        notchwidth = 0.8,
        
        # custom outliers
        outlier.colour="red",
        outlier.fill="red",
        outlier.size=3) +
  scale_x_discrete() +
  labs(title="Boxplot",x="", y = "ST55M101 (rpm)")


# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA_filtered$ST55M101))
NA

Filter Criteria

Filter out any points for which:

---
title: "Tandem A Data Filtering Criteria"
output: html_notebook
---

### Data Filtering Criteria for Tandem A in **2023-2024 Harvest Season**.

Libraries
```{r}
library(dplyr)
library(pastecs)
library(ggplot2)
library(lares)
```


### Scatter Plot Function
```{r}
mapa_dispersion <- function(label_x,label_y,dataset) {
  correlacion <- round(cor(dataset[[label_x]],dataset[[label_y]]),2)
  ggplot(dataset, aes(.data[[label_x]], .data[[label_y]])) + 
    geom_point(
        color="orange",
        fill="#69b3a2",
        shape=21,
        alpha=0.5,
        size=6,
        stroke = 2
        ) +
    geom_smooth(method=lm , color="#990000", fill="#FFCF00", se=TRUE) +
    ggtitle(paste(label_y,"vrs.",label_x), subtitle = paste("Correlation: ",correlacion)) +
  xlab(label_x) + ylab(label_y)
}
```

### Mill 1 TA Dataset
```{r}
df_TA <- read.csv(file = 'Mill_1_TA.csv')

df_TA
```

### Density Function: Mill 1 TA Speed (rpm)
```{r}

# PDF
distr(df_TA,'ST55M101')

# Boxplot
ggplot(df_TA, aes(y=ST55M101)) + 
    geom_boxplot( 
        # custom boxes
        color="blue",
        fill="blue",
        alpha=0.2,
        
        # Notch
        notch=TRUE,
        notchwidth = 0.8,
        
        # custom outliers
        outlier.colour="red",
        outlier.fill="red",
        outlier.size=3) +
  scale_x_discrete() +
  labs(title="Boxplot",x="", y = "ST55M101 (rpm)")

# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA$ST55M101))

```

### Density Function: Mill 1 TA Torque (N*m)
```{r}

# PDF
distr(df_TA,'TQ55M101')

# Boxplot
ggplot(df_TA, aes(y=TQ55M101)) + 
    geom_boxplot( 
        # custom boxes
        color="blue",
        fill="blue",
        alpha=0.2,
        
        # Notch
        notch=TRUE,
        notchwidth = 0.8,
        
        # custom outliers
        outlier.colour="red",
        outlier.fill="red",
        outlier.size=3) +
  scale_x_discrete() +
  labs(title="Boxplot",x="", y = "TQ55M101 (N*m)")

# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA$TQ55M101))

```

### Density Function: Mill 1 TA Power (kW)
```{r}

# PDF
distr(df_TA,'JT55M101')

# Boxplot
ggplot(df_TA, aes(y=JT55M101)) + 
    geom_boxplot( 
        # custom boxes
        color="blue",
        fill="blue",
        alpha=0.2,
        
        # Notch
        notch=TRUE,
        notchwidth = 0.8,
        
        # custom outliers
        outlier.colour="red",
        outlier.fill="red",
        outlier.size=3) +
  scale_x_discrete() +
  labs(title="Boxplot",x="", y = "JT55M101 (kW)")

# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA$JT55M101))

```

### Density Function: Mill 1 TA Level (%)
```{r}

# PDF
distr(df_TA,'LT55M101')

# Boxplot
ggplot(df_TA, aes(y=LT55M101)) + 
    geom_boxplot( 
        # custom boxes
        color="blue",
        fill="blue",
        alpha=0.2,
        
        # Notch
        notch=TRUE,
        notchwidth = 0.8,
        
        # custom outliers
        outlier.colour="red",
        outlier.fill="red",
        outlier.size=3) +
  scale_x_discrete() +
  labs(title="Boxplot",x="", y = "LT55M101 (%)")

# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA$LT55M101))

```


### Density Function: TA Bagasse Mass Flow (t/h)
```{r}

# PDF
distr(df_TA,'WT555801')

# Boxplot
ggplot(df_TA, aes(y=WT555801)) + 
    geom_boxplot( 
        # custom boxes
        color="blue",
        fill="blue",
        alpha=0.2,
        
        # Notch
        notch=TRUE,
        notchwidth = 0.8,
        
        # custom outliers
        outlier.colour="red",
        outlier.fill="red",
        outlier.size=3) +
  scale_x_discrete() +
  labs(title="Boxplot",x="", y = "WT555801 (t/h)")

# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA$WT555801))

```

## Filtering Criteria:
We first need to exclude any intentional or planned downtime of TA. This can be done by filtering the left tail of the Torque (TQ55M101) and Speed (ST55M101) distributions of the Mill 1 T, since the Mill stops completely.

We also need to filter out any unintentional or unplanned downtime of TA. This can be done by filtering the left tail of the bagasse mass flow of TA. This works because for unplanned downtime usually the first mill does not stop (because of the uncertainty of the duration of the stop). Also we can estimate the sugar cane crush rate by the bagasse mass flow coming out of the tandem (WT555801).


### Scatter Plots and Correlations

**Torque and Bagasse Relation Mill 1 TA**
```{r}
label_x <- "WT555801"
label_y <- "TQ55M101"

mapa_dispersion(label_x,label_y,df_TA)
```
Torque and Bagasse Mass Flow are correlated, but not perfectly. So we need to filter out observations based on these variables individually.

**Torque and Power Relation Mill 1 TA**
```{r}
label_x <- "JT55M101"
label_y <- "TQ55M101"

mapa_dispersion(label_x,label_y,df_TA)
```
Torque and Mill Power are strongly correlated, almost perfectly. So there is no need to filter out observations based on these variables individually. We choose Torque since it contains information about Power and Speed simultaneously.

### WT5557801 Quantiles
```{r}
# Filter Dataset by Column Values:
res<-quantile(df_TA$WT555801, probs = c(0,0.25,0.5,0.75,1)) 
res
```
### TQ55M101 Quantiles
```{r}
# Filter Dataset by Column Values:
res<-quantile(df_TA$TQ55M101, probs = c(0,0.25,0.5,0.75,1)) 
res
```
### ST55M101 Quantiles
```{r}
# Filter Dataset by Column Values:
res<-quantile(df_TA$ST55M101, probs = c(0,0.25,0.5,0.75,1)) 
res
```


```{r}
# Filter Dataset by Column Values:
df_TA_filtered <- df_TA[df_TA$WT555801>=130,] # Filter by Minimum Threshold of 1st Quartile
df_TA_filtered <- df_TA_filtered[df_TA_filtered$TQ55M101>=1800,] # Filter by Minimum Threshold of 1st Quartile
df_TA_filtered <- df_TA_filtered[df_TA_filtered$ST55M101>=700,] # Filter by Minimum Threshold of 1st Quartile
df_TA_filtered
```

**Torque and Bagasse Relation Mill 1 TA (Filtered Dataset)**
```{r}
label_x <- "WT555801"
label_y <- "TQ55M101"

mapa_dispersion(label_x,label_y,df_TA_filtered)
```

### Density Function: TA Bagasse Mass Flow (t/h) Filtered Distribution
```{r}

# PDF
distr(df_TA_filtered,'WT555801')

# Boxplot
ggplot(df_TA_filtered, aes(y=WT555801)) + 
    geom_boxplot( 
        # custom boxes
        color="blue",
        fill="blue",
        alpha=0.2,
        
        # Notch
        notch=TRUE,
        notchwidth = 0.8,
        
        # custom outliers
        outlier.colour="red",
        outlier.fill="red",
        outlier.size=3) +
  scale_x_discrete() +
  labs(title="Boxplot",x="", y = "WT555801 (t/h)")

# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA_filtered$WT555801))

```

### Density Function: Mill 1 TA Torque (t/h) Filtered Distribution
```{r}

# PDF
distr(df_TA_filtered,'TQ55M101')

# Boxplot
ggplot(df_TA_filtered, aes(y=WT555801)) + 
    geom_boxplot( 
        # custom boxes
        color="blue",
        fill="blue",
        alpha=0.2,
        
        # Notch
        notch=TRUE,
        notchwidth = 0.8,
        
        # custom outliers
        outlier.colour="red",
        outlier.fill="red",
        outlier.size=3) +
  scale_x_discrete() +
  labs(title="Boxplot",x="", y = "TQ55M101 (t/h)")

# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA_filtered$TQ55M101))

```


### Density Function: Mill 1 TA Speed (rpm) Filtered Distribution
```{r}

# PDF
distr(df_TA_filtered,'ST55M101')

# Boxplot
ggplot(df_TA_filtered, aes(y=ST55M101)) + 
    geom_boxplot( 
        # custom boxes
        color="blue",
        fill="blue",
        alpha=0.2,
        
        # Notch
        notch=TRUE,
        notchwidth = 0.8,
        
        # custom outliers
        outlier.colour="red",
        outlier.fill="red",
        outlier.size=3) +
  scale_x_discrete() +
  labs(title="Boxplot",x="", y = "ST55M101 (rpm)")

# Descriptive Statistics
data.frame(Estadistica=stat.desc(df_TA_filtered$ST55M101))

```

## Filter Criteria
**Filter out** any points for which:

* WT555801 < 130
* TQ55M101 < 1800
* ST55M101 < 700

