Washing Machine Project

Tracking progress

1. Machine lifetime & intensity

Key variables:

machine_commissioning
datapoint_time
customer_cycles_amount

What to analyse:

Machine age at service

→ datapoint_time – commissioning date

Usage intensity:

→ cycles per year / per month

Calculation of machine age

Here I created a new data frame just to test it out if the code works. At the end I made a new column in the original joined data frame.

machine.age <- join %>% 
  select(datapoint_time,machine_commissioning, machine_id) %>% 
  group_by(machine_id)

machine.age$dt <- as.vector(machine.age$datapoint_time)
machine.age$mt <- as.vector(machine.age$machine_commissioning)

machine.age <- machine.age %>% 
  group_by(machine_id) %>% 
  mutate(age_year = round((dt - mt)/365.2425), na.rm = TRUE)

join$age_year <- machine.age$age_year

library(knitr)
kable(machine.age[1:5,], caption = "New data frame to calculate age in years")

New data frame to calculate age in years
datapoint_time	machine_commissioning	machine_id	dt	mt	age_year	na.rm
2020-01-02	2014-06-20	201406209290	18263	16241	6	TRUE
2020-01-02	2007-09-14	200709147860	18263	13770	12	TRUE
2020-01-03	2007-06-17	200706173882	18264	13681	13	TRUE
2020-01-06	2012-01-28	201201287469	18267	15367	8	TRUE
2020-01-07	2009-03-20	200903202799	18268	14323	11	TRUE

Interesting questions:

Do failures depend on age or usage?
Are machines failing early or only after heavy use?

Grouped bar chart with an overlaid line

For the next part I made age intervals to more easily see the differences between machine of different use time.

I also converted the service type in order to visualize it later.

join <- join %>% 
  mutate(age_range = cut(age_year, c(0, 5, 10, 15, 20, 25, 100), labels = c("0-5", "5-10", "10-15", "15-20", "20-25", ">25")))

join$service_event_type <- as.factor(join$service_event_type)

I made separate data frames for both so it’s easier and error-free on the original data

bars_age_error <- join %>%
  select(service_event_type, age_range) %>% 
  count(age_range, service_event_type) %>% 
  na.omit()

line_age_cycle <- join %>% 
  group_by(age_range) %>%
  summarise(mean(customer_cycles_amount), .groups = "drop") %>% 
  na.omit() %>% 
  rename(
    "mean_cycle" = 'mean(customer_cycles_amount)'
  )
scale_factor <- max(bars_age_error$n) / max(line_age_cycle$mean_cycle) #This is to illustrate the second y-axis later on in the bar chart

ggplot() +
  # bars
  geom_bar(
    data = bars_age_error,
    aes(x = age_range, y = n, fill = service_event_type),
    stat = "identity", position = "dodge", 
    color = "black", linewidth = 0.3
  ) +
  scale_fill_paletteer_d("nationalparkcolors::Acadia")+
  # line
  geom_line(
    data = line_age_cycle,
    aes(x = as.numeric(age_range), y = mean_cycle  * scale_factor),
    color = "red", linewidth = 1, group = 1
  ) +
  geom_point(
    data = line_age_cycle,
    aes(x = as.numeric(age_range), y = mean_cycle * scale_factor),
    color = "red", size = 3
  ) +
  # dual y-axis
  scale_y_continuous(
    name = "Number of service events",
    sec.axis = sec_axis(
      transform = ~ . / scale_factor,
      name = "Average number of cycles per month"
    )
  ) +
  scale_x_discrete(name = "Machine age (years)") +
  labs(
    title = "Number of service events and average cycles per month by age range",
    fill = NULL) +
  theme_classic() +
  theme(
    legend.position        = "bottom",
    legend.direction       = "horizontal",
    axis.text              = element_text(size = 10),
    axis.title             = element_text(size = 10),
    axis.title.y.right     = element_text(color = "red"),
    axis.text.y.right      = element_text(color = "red")
  )

2. Failure type, warranty status, and repair type

This relationship can be showcased through a heatmap

# Making Heatmap to show relationship between error case, warranty status, and repair outcome

pcd <- join %>% 
  select(customer_warranty_status,repair_type, service_event_type, age_range, machine_id) %>% 
  na.omit()

pcd$customer_warranty_status <- as.factor(pcd$customer_warranty_status)
pcd$repair_type <- as.factor(pcd$repair_type)
min(pcd$cycle_per_month)

[1] Inf

# Making a separate data frame for the heatmap. 

heatmap_df <- pcd %>%
  group_by(service_event_type, repair_type, customer_warranty_status) %>%
  summarise(count = n(), .groups = "drop") %>%
  complete(service_event_type, repair_type, customer_warranty_status,
           fill = list(count = 0))


# Plot
ggplot(heatmap_df, aes(x = repair_type, y = service_event_type, fill = count)) +
  geom_tile(color = "white", linewidth = 0.5) +
  geom_text(aes(label = ifelse(count == 0, "0", as.character(count))),
            size = 3.5, color = "white") +
  scale_fill_gradient(low = "#A4BED5FF", high = "#023743FF") +
  facet_wrap(~ customer_warranty_status) +        # <-- this is to have 2 maps for 2 warranty statuses
  labs(
    x    = "Repair Type",
    y    = "Failure Type",
    fill = "Service events"
  ) +
  theme_minimal() +
  theme(
    axis.text.x     = element_text(angle = 45, hjust = 1, size = 10),
    axis.text.y     = element_text(size = 10),
    axis.title      = element_text(size = 10),
    panel.grid      = element_blank(),
    legend.position = "right",
    strip.text      = element_text(size = 10, face = "bold")  # styles the facet titles
  )

Some interesting questions we could investigate from this map:

Which failure types consistently lead to disposal rather than repair?
Preventative repair? Where? Which part?