Import packages
library(tidyverse)
library(lubridate)
library(tsibble)
Import dataset
You can find them on my github
cheese_data <- read_csv("data/cheese_data.csv")
cheese_data_f1 <- cheese_data %>%
filter(factory == "f1")
cheese_data_f2 <- cheese_data %>%
filter(factory == "f2")
Clean the data (manage missing values)
sum_na <- cheese_data %>%
na_if(0) %>%
is.na() %>%
sum()
- There are 7 values to be replaced
to_discard <- cheese_data %>%
na_if(0) %>%
is.na() %>%
as_tibble() %>%
mutate(sumrow = rowSums(.)) %>%
filter(sumrow == 2) %>%
nrow()
- There are 2 time points to be discarded
index_replace <- cheese_data %>%
na_if(0) %>%
is.na() %>%
as_tibble() %>%
mutate(sumrow = rowSums(.),
rownum = row_number()) %>%
filter(sumrow == 1) %>%
pull(rownum)
index_remove <- cheese_data %>%
na_if(0) %>%
is.na() %>%
as_tibble() %>%
mutate(sumrow = rowSums(.),
rownum = row_number()) %>%
filter(sumrow == 2) %>%
pull(rownum)
- Remove/replace missing values according to manager
col_interested <- 4:8
cheese_data_nona <- cheese_data %>%
mutate(rownum = row_number()) %>%
filter(!rownum %in% index_remove) %>% #remove rows with 2 values
na_if(0) %>%
mutate(meanrow = rowMeans(.[col_interested], na.rm = T) %>% round(1)) %>%
rowwise() %>%
mutate_at(vars(col_interested),# choose columns of interest
funs(if_else(is.na(.),meanrow,.)))#replace by meanrow if value is na
Clean the data (manage unexpected values)
number_unexpected <- cheese_data_nona %>%
filter_at(vars(col_interested),
any_vars(. >= 65 |. <= 15)) %>%
select(col_interested) %>%
rowwise() %>%
summarise(sum(. >= 65|. <= 15)) %>%
pull()
- There are 1 unexpected values in the data.
Like before I would just replace the unexpected value if there are only 1 by row.
If there are more by row I would just discard them.
cheese_data_nona_nounexp <- cheese_data_nona %>%
mutate_at(vars(col_interested),
#replace unexpected data with mean of m1,m2,m3,m4,m5
funs(if_else(.>=65,mean(m1,m2,m3,m4,m5),.)))
Compute monitoring statistics for factory f1
- Storing xbar and range
spc_data_f1 <- cheese_data_nona_nounexp %>%
filter(factory == "f1") %>%
ungroup() %>%
mutate(xbar = rowMeans(.[col_interested])) %>% #compute new mean column
rowwise() %>%
mutate(range = max(m1,m2,m3,m4,m5)-min(m1,m2,m3,m4,m5)) %>%
select(-c(rownum,meanrow))
f1_rows <- spc_data_f1 %>%
nrow()
- spc_data_f1 has 123 rows
avg_xbar <- spc_data_f1 %>%
ungroup() %>%
summarise(mean(xbar)) %>%
pull()
avg_range <- spc_data_f1 %>%
ungroup() %>%
summarise(mean(range)) %>%
pull()
- Average of xbar : 51, and average of range : 1.7
Build Shewhart control charts for factory f1
constants <- read_csv("data/XbarR_constants.csv")
d3 <- constants %>%
filter(n == 5) %>%
pull(D3)
d4 <- constants %>%
filter(n == 5) %>%
pull(D4)
lcl <- constants %>%
mutate(lcl = d3*avg_range) %>%
summarise(lcl = mean(lcl)) %>%
pull()
ucl <- constants %>%
mutate(ucl = d4*avg_range) %>%
summarise(ucl = mean(ucl)) %>%
pull()
r_chart <- spc_data_f1 %>%
ggplot(aes(timepoint, range))+
geom_line()+
labs(title = "Factory 1 R chart",
x = "Timepoint",
y = "Range")+
theme_minimal()
r_chart
r_chart <- r_chart+
geom_hline(yintercept = c(lcl,ucl),
color = "red")+
labs(subtitle = "1. With control limits")
r_chart
r_chart <- r_chart +
geom_hline(yintercept = avg_range,
linetype = "dashed")+
labs(subtitle = "2. With control limits and average range")
r_chart
lcl_xbar <- constants %>%
mutate(lclxbar = avg_xbar - A2 * avg_range) %>%
summarise(lclxbar = mean(lclxbar)) %>%
pull()
ucl_xbar <- constants %>%
mutate(uclxbar = avg_xbar + A2 * avg_range) %>%
summarise(uclxbar = mean(uclxbar)) %>%
pull()
x_bar_chart <- spc_data_f1 %>%
ggplot(aes(timepoint,xbar))+
geom_line()+
geom_hline(yintercept = c(ucl_xbar,lcl_xbar),
color = "red")+
geom_hline(yintercept = avg_xbar,
linetype = "dashed")+
labs(title = "Factory 1 X bar chart",
subtitle = "With control limits and average Xbar",
x = "Timepoint",
y = "Xbar")+
theme_minimal()
x_bar_chart
Highlight special causes of variations
out_of_bounds <- spc_data_f1 %>%
filter(xbar > ucl_xbar | xbar < lcl_xbar) # filter out of control limits
all_lower_ref_value <- function(x) all(x<avg_xbar) # function used with slider
all_above_ref_value <- function(x) all(x>avg_xbar)
num_lower <- slider::slide_lgl(.x = spc_data_f1$xbar,
.f = all_lower_ref_value,
.after = 9) %>%
sum()
num_above <- slider::slide_lgl(.x = spc_data_f1$xbar,
.f = all_above_ref_value,
.after = 9,
# complete because there is only 1 and it is last
.complete = T) %>%
sum()
There are 7 values that have 10 consecutive values below xbar There are NA values that have 10 consecutive values above xbar
points_with_10_more <- spc_data_f1 %>%
ungroup() %>%
# check all values to see if they have 9 more values on threshold (avg_bar)
mutate(consec = slider::slide_lgl(.x = xbar,
# we use lower only because there are no values above
.f = all_lower_ref_value,
.after = 9)) %>%
filter(consec) %>% #filter these values in
pull(timepoint) # pull the values
consecutive_times <- (head(points_with_10_more,1)):
(tail(points_with_10_more,1)+9)
# get the last timepoint then substract by length
consecutive_points <- spc_data_f1 %>%
filter(timepoint %in% consecutive_times)
x_bar_chart+
geom_point(data = out_of_bounds,
mapping = aes(x = timepoint, y = xbar),
color = "red",
shape = 8)+
geom_point(data = consecutive_points,
mapping = aes(x = timepoint, y = xbar),
color = "red",
shape = 21,
size = 4)
Build Shewhart control charts for factory f2
spc_data_f2 <- cheese_data_nona_nounexp %>%
filter(factory == "f2") %>%
ungroup() %>%
mutate(xbar = rowMeans(.[col_interested])) %>% #compute new mean column
rowwise() %>%
mutate(range = max(m1,m2,m3,m4,m5)-min(m1,m2,m3,m4,m5)) %>%
select(-c(rownum,meanrow))
avg_xbar2 <- spc_data_f2 %>%
ungroup() %>%
summarise(mean(xbar)) %>%
pull()
avg_range2 <- spc_data_f2 %>%
ungroup() %>%
summarise(mean(range)) %>%
pull()
lcl2 <- constants %>%
mutate(lcl = d3*avg_range2) %>%
summarise(lcl = mean(lcl)) %>%
pull()
ucl2 <- constants %>%
mutate(ucl = d4*avg_range2) %>%
summarise(ucl = mean(ucl)) %>%
pull()
r_chart2 <- spc_data_f2 %>%
ggplot(aes(timepoint, range))+
geom_line()+
labs(title = "Factory 2 R chart",
x = "Timepoint",
y = "Range")+
theme_minimal()
r_chart2
r_chart2 <- r_chart2+
geom_hline(yintercept = c(lcl2,ucl2),
color = "red")+
geom_hline(yintercept = avg_range2,
linetype = "dashed")+
labs(subtitle = "With control limits and mean range")
r_chart2
lcl_xbar2 <- constants %>%
mutate(lclxbar = avg_xbar2 - A2 * avg_range2) %>%
summarise(lclxbar = mean(lclxbar)) %>%
pull()
ucl_xbar2 <- constants %>%
mutate(uclxbar = avg_xbar2 + A2 * avg_range2) %>%
summarise(uclxbar = mean(uclxbar)) %>%
pull()
x_bar_chart2 <- spc_data_f2 %>%
ggplot(aes(timepoint,xbar))+
geom_line()+
geom_hline(yintercept = c(ucl_xbar2,lcl_xbar2),
color = "red")+
geom_hline(yintercept = avg_xbar2,
linetype = "dashed")+
labs(title = "Factory 2 X bar chart",
subtitle = "With control limits and average Xbar",
x = "Timepoint",
y = "Xbar")+
theme_minimal()
x_bar_chart2
out_of_bounds2 <- spc_data_f2 %>%
filter(xbar > ucl_xbar2 | xbar < lcl_xbar2)
all_lower_ref_value2 <- function(x) all(x<avg_xbar2)
all_above_ref_value2 <- function(x) all(x>avg_xbar2)
num_lower2 <- slider::slide_lgl(.x = spc_data_f2$xbar,
.f = all_lower_ref_value2,
.after = 9) %>%
sum()
num_above2 <- slider::slide_lgl(.x = spc_data_f2$xbar,
.f = all_above_ref_value2,
.after = 9,
.complete = T) %>%
sum()
points_with_10_more2 <- spc_data_f2 %>%
ungroup() %>%
# check all values to see if they have 9 more values on threshold (avg_bar)
mutate(consec = slider::slide_lgl(.x = xbar,
# we use lower only because there are no values above
.f = all_lower_ref_value2,
.after = 9)) %>%
filter(consec) %>% #filter these values in
pull(timepoint) # pull the values
first_points <- points_with_10_more2[1:6]
last_points <- points_with_10_more2[7:length(points_with_10_more2)]
consecutive_times2 <- (head(first_points,1)):
(tail(first_points,1)+9)
consecutive_times3 <- (head(last_points,1)):
(tail(last_points,1)+9)
consecutive_points2 <- spc_data_f2 %>%
filter(timepoint %in% consecutive_times2)
consecutive_points3 <- spc_data_f2 %>%
filter(timepoint %in% consecutive_times3)
x_bar_chart2+
geom_point(data = out_of_bounds2,
mapping = aes(x = timepoint, y = xbar),
color = "red",
shape = 8)+
geom_point(data = consecutive_points2,
mapping = aes(x = timepoint, y = xbar),
color = "red",
shape = 21,
size = 4)+
geom_point(data = consecutive_points3,
mapping = aes(x = timepoint, y = xbar),
color = "red",
shape = 21,
size = 4)
3. The 2 factory do not have the same pattern at all even though they
have almost the same mean. Factory one is more stable whereas factory 2
is spread further away from the mean.
Redefine monitoring data for factory f2
spc_data_f2_week <- spc_data_f2 %>%
ungroup() %>%
group_by(week) %>%
summarise(avg_week = mean(xbar),
range_in_week = mean(range)) %>%
mutate(moving_range = abs(range_in_week-lag(range_in_week)))
M <- spc_data_f2_week %>%
summarise(mean(avg_week)) %>%
pull()
MR_bar <- spc_data_f2_week %>%
summarise(mean(moving_range, na.rm = T)) %>%
pull()
lcl_week <- M - 2.66 * MR_bar
ucl_week <- M + 2.66 * MR_bar
spc_data_f2_week %>%
ggplot(aes(week, avg_week))+
geom_line()+
geom_hline(yintercept = c(lcl_week,ucl_week),
color = "red")+
geom_hline(yintercept = M,
linetype = "dashed")+
labs(title = "Control chart for factory 2",
subtitle = "By week",
x = "Week",
y = "Average value")+
theme_minimal()