1. Machine lifetime & intensity

Key variables:

What to analyse:

→ datapoint_time – commissioning date

→ cycles per year / per month

Calculation of machine age

Here I created a new data frame just to test it out if the code works. At the end I made a new column in the original joined data frame.

machine.age <- join %>% 
  select(datapoint_time,machine_commissioning, machine_id) %>% 
  group_by(machine_id)

machine.age$dt <- as.vector(machine.age$datapoint_time)
machine.age$mt <- as.vector(machine.age$machine_commissioning)

machine.age <- machine.age %>% 
  group_by(machine_id) %>% 
  mutate(age_year = round((dt - mt)/365.2425), na.rm = TRUE)

join$age_year <- machine.age$age_year

library(knitr)
kable(machine.age[1:5,], caption = "New data frame to calculate age in years")
New data frame to calculate age in years
datapoint_time machine_commissioning machine_id dt mt age_year na.rm
2020-01-02 2014-06-20 201406209290 18263 16241 6 TRUE
2020-01-02 2007-09-14 200709147860 18263 13770 12 TRUE
2020-01-03 2007-06-17 200706173882 18264 13681 13 TRUE
2020-01-06 2012-01-28 201201287469 18267 15367 8 TRUE
2020-01-07 2009-03-20 200903202799 18268 14323 11 TRUE

Interesting questions:

  1. Do failures depend on age or usage?

  2. Are machines failing early or only after heavy use?

Grouped bar chart with an overlaid line

For the next part I made age intervals to more easily see the differences between machine of different use time.

I also converted the service type in order to visualize it later.

join <- join %>% 
  mutate(age_range = cut(age_year, c(0, 5, 10, 15, 20, 25, 100), labels = c("0-5", "5-10", "10-15", "15-20", "20-25", ">25")))

join$service_event_type <- as.factor(join$service_event_type)

I made separate data frames for both so it’s easier and error-free on the original data

bars_age_error <- join %>%
  select(service_event_type, age_range) %>% 
  count(age_range, service_event_type) %>% 
  na.omit()

line_age_cycle <- join %>% 
  group_by(age_range) %>%
  summarise(mean(customer_cycles_amount), .groups = "drop") %>% 
  na.omit() %>% 
  rename(
    "mean_cycle" = 'mean(customer_cycles_amount)'
  )
scale_factor <- max(bars_age_error$n) / max(line_age_cycle$mean_cycle) #This is to illustrate the second y-axis later on in the bar chart
ggplot() +
  # bars
  geom_bar(
    data = bars_age_error,
    aes(x = age_range, y = n, fill = service_event_type),
    stat = "identity", position = "dodge", 
    color = "black", linewidth = 0.3
  ) +
  scale_fill_paletteer_d("nationalparkcolors::Acadia")+
  # line
  geom_line(
    data = line_age_cycle,
    aes(x = as.numeric(age_range), y = mean_cycle  * scale_factor),
    color = "red", linewidth = 1, group = 1
  ) +
  geom_point(
    data = line_age_cycle,
    aes(x = as.numeric(age_range), y = mean_cycle * scale_factor),
    color = "red", size = 3
  ) +
  # dual y-axis
  scale_y_continuous(
    name = "Number of service events",
    sec.axis = sec_axis(
      transform = ~ . / scale_factor,
      name = "Average total number of cycles"
    )
  ) +
  scale_x_discrete(name = "Machine age (years)") +
  labs(
    title = "Number of service events and average cycles per month by age range",
    fill = NULL) +
  theme_classic() +
  theme(
    legend.position        = "bottom",
    legend.direction       = "horizontal",
    axis.text              = element_text(size = 10),
    axis.title             = element_text(size = 10),
    axis.title.y.right     = element_text(color = "red"),
    axis.text.y.right      = element_text(color = "red")
  )

2. Failure type, warranty status, and repair type

This relationship can be showcased through a heatmap

# Making Heatmap to show relationship between error case, warranty status, and repair outcome

pcd <- join %>% 
  select(customer_warranty_status,repair_type, service_event_type, age_range, machine_id) %>% 
  na.omit()

pcd$customer_warranty_status <- as.factor(pcd$customer_warranty_status)
pcd$repair_type <- as.factor(pcd$repair_type)
min(pcd$cycle_per_month)
## [1] Inf
# Making a separate data frame for the heatmap. 

heatmap_df <- pcd %>%
  group_by(service_event_type, repair_type, customer_warranty_status) %>%
  summarise(count = n(), .groups = "drop") %>%
  complete(service_event_type, repair_type, customer_warranty_status,
           fill = list(count = 0))


# Plot
ggplot(heatmap_df, aes(x = repair_type, y = service_event_type, fill = count)) +
  geom_tile(color = "white", linewidth = 0.5) +
  geom_text(aes(label = ifelse(count == 0, "0", as.character(count))),
            size = 3.5, color = "white") +
  scale_fill_gradient(low = "#A4BED5FF", high = "#023743FF") +
  facet_wrap(~ customer_warranty_status) +        # <-- this is to have 2 maps for 2 warranty statuses
  labs(
    x    = "Repair Type",
    y    = "Failure Type",
    fill = "Service events"
  ) +
  theme_minimal() +
  theme(
    axis.text.x     = element_text(angle = 45, hjust = 1, size = 10),
    axis.text.y     = element_text(size = 10),
    axis.title      = element_text(size = 10),
    panel.grid      = element_blank(),
    legend.position = "right",
    strip.text      = element_text(size = 10, face = "bold")  # styles the facet titles
  )

Some interesting questions we could investigate from this map:

  1. Which failure types consistently lead to disposal rather than repair?

  2. Preventative repair? Where? Which part?

3. Machine use and Business Model

Do Rental and Pay-Per-Use machines get better care than Purchased ones?

Box plot: total cycles at time of service by BM — which model drives more machine use

bm_cycle <- join %>% 
  select(customer_business_model, repair_type, customer_cycles_amount, machine_program_60degrees,machine_program_40degrees,machine_program_eco)
  
ggplot(bm_cycle, aes(x = customer_business_model, y = customer_cycles_amount, fill = customer_business_model))+
  geom_boxplot(colour = "black", median.color = "red")+
  theme_classic()+
  scale_fill_paletteer_d("nationalparkcolors::Acadia")+
  theme(legend.position = "none",
        axis.text.x     = element_text(angle = 45, hjust = 1, size = 10))+
  labs(
    fill = NULL,
    x = "Business Model",
    y = "Total cycle amount"
  )

ggplot(bm_cycle, aes(x = repair_type, y = customer_cycles_amount, fill = repair_type))+
  geom_boxplot(colour = "black", median.color = "red")+
  theme_classic()+
  scale_fill_paletteer_d("nationalparkcolors::Acadia")+
  theme(legend.position = "none",
        axis.text.x     = element_text(angle = 45, hjust = 1, size = 10))+
  facet_grid(.~customer_business_model)+
  labs(
    fill = NULL,
    x = "Repair Type",
    y = "Total cycle amount"
  )

4. Service events by canton

Choropleth map

#Switzerland cantons from GISCO (Eurostat). This method requires the giscoR and sf packages!
#So do install.packages and then library first

swiss_cantons <- gisco_get_nuts(
  country    = "CH",
  nuts_level = 3, #This is canton level
  year       = "2021",
  resolution = "03" 
)

After doing this, it’s good to check the names in the swiss_cantons data, and then our data. Cause there can be mismatches such as umlauts or different spellings.

# Check names
unique(swiss_cantons$NAME_LATN)
unique(join$customer_canton)
# Fix canton names & aggregate data
canton_df <- join %>%
  mutate(customer_canton = recode(customer_canton,
                                  "Zurich"      = "Zürich",
                                  "Geneva"      = "Genève",
                                  "Lucerne"     = "Luzern",
                                  "Fribourg"    = "Freiburg",
                                  "Wallis"      = "Valais",
                                  "Basel-City"  = "Basel-Stadt"
  )) %>%
  group_by(customer_canton) %>%
  summarise(service_events = n(), .groups = "drop")
            
# Make abbreviations and join data
swiss_map <- swiss_cantons %>%
  left_join(canton_df, by = c("NAME_LATN" = "customer_canton"))

abbrev_lookup <- data.frame(
  NUTS_ID = c("CH011","CH012","CH013","CH021","CH022","CH023","CH024",
              "CH025","CH031","CH032","CH033","CH040","CH051","CH052",
              "CH053","CH054","CH055","CH056","CH057","CH061","CH062",
              "CH063","CH064","CH065","CH066","CH070"),
  abbr    = c("VD","VS","GE","BE","FR","SO","NE",
              "JU","BS","BL","AG","ZH","GL","SH",
              "AR","AI","SG","GR","TG","LU","UR",
              "SZ","OW","NW","ZG","TI")
)

swiss_map <- swiss_map %>%
  left_join(abbrev_lookup, by = "NUTS_ID")

# --- Step 5: plot
ggplot(swiss_map) +
  geom_sf(aes(fill = service_events), color = "white", linewidth = 0.4) +
  geom_sf_text(aes(label = abbr), size = 2.5, color = "gray20") +
  scale_fill_gradient(low = "#ffffcc", high = "#1a7a3c",
                      na.value = "grey90",
                      name = "Service events") +
  labs(title = "Service Events by Canton", subtitle = "2020–2023") +
  theme_void() +
  theme(
    legend.position = "right",
    plot.title      = element_text(size = 13, face = "bold"),
    plot.subtitle   = element_text(size = 10, color = "gray40")
  )

At this point it is good to cross reference and see if the high number of service events mean the machines here tend to fail more of if it’s purely due to customer volume

# customers per canton per business model
customers_per_canton <- join %>%
  mutate(customer_canton = recode(customer_canton,
                                  "Zurich"      = "Zürich",
                                  "Geneva"      = "Genève",
                                  "Lucerne"     = "Luzern",
                                  "Fribourg"    = "Freiburg",
                                  "Wallis"      = "Valais",
                                  "Basel-City"  = "Basel-Stadt"
  )) %>%
  group_by(customer_canton, customer_business_model) %>%
  summarise(
    service_events   = n(),
    unique_customers = n_distinct(customer_id),
    .groups = "drop"
  ) %>%
  mutate(events_per_customer = service_events / unique_customers)

#rank cantons by total service events
canton_order <- customers_per_canton %>%
  group_by(customer_canton) %>%
  summarise(total = sum(service_events)) %>%
  arrange(desc(total)) %>%
  pull(customer_canton)

customers_per_canton <- customers_per_canton %>%
  mutate(customer_canton = factor(customer_canton, levels = canton_order))

###Plot this data

ggplot(customers_per_canton,
       aes(x = customer_business_model,
           y = events_per_customer,
           fill = customer_business_model)) +
  geom_bar(stat = "identity", color = "black", linewidth = 0.3, width = 0.6) +
  geom_text(aes(label = round(events_per_customer, 1)),
            vjust = -0.5, size = 3) +
  scale_fill_manual(values = c("#ffffcc", "#78c679", "#1a7a3c")) +
  facet_wrap(~ customer_canton) +
  labs(x = NULL, y = "Service events per customer") +
  theme_classic() +
  theme(
    legend.position = "none",
    axis.text.x     = element_text(angle = 45, hjust = 1, size = 8),
    axis.text.y     = element_text(size = 8),
    strip.text      = element_text(size = 10, face = "bold"),
    panel.spacing   = unit(1, "lines")
  )

What we should pay attention to in this graph:

  • Jura showing 3 events in just Purchase model: could be one large customer skewing the number, or a data quality issue

  • Pay-Per-Use and Rental contracts are still concentrated in larger cantons, they haven’t penetrated rural or smaller cantons yet -> A business opportunity.

If failure rates are consistent everywhere, there’s no operational risk in expanding PPU nationally — the service burden per customer won’t increase.

  • Failure rate is consistent across cantons and business models, the problem isn’t geographic. This points toward product-level interventions — better components, predictive maintenance, or design improvements

Dot Density Graph

As discussed last call, I have completed the dot density graph with the following code

# There are some lat and lon values swapped, so we have to make sure they are correct
join2 <- join %>% 
  filter_out(customer_address_lat < 40)

weird <- join %>% 
  filter(customer_address_lat < 40)
weird <- rename(weird,
                "customer_address_lat" = "customer_address_lon",
                "customer_address_lon" = "customer_address_lat"
)

weird <- weird %>% 
  relocate(customer_address_lat, .before = customer_address_lon)

join2 <- bind_rows(join2, weird)

That was a prepocessing step, now we can move on to making the graph

#After this, we can make the density map
density <- st_as_sf(join2, coords = c("customer_address_lon", "customer_address_lat"), crs = 4269)

#This map is interactive
mapview(density)
#This one is an image
ggplot(density) +
  geom_point(
    data = join2, aes(x = customer_address_lon, y = customer_address_lat),
    fill = "#ffffcc", size = 2, pch=21, color = "black"
  ) +
  geom_sf(data = swiss_cantons, fill = "transparent", color = "black", linewidth = 0.6)+
  geom_sf(
    data = density, fill = "transparent",
    color = "transparent", linewidth = .6
  ) +
  labs(
    title = "Dot Density Map of Service Events",
    subtitle = "2020-2023"
  )+
  theme_void()+
  theme(
    plot.title      = element_text(size = 13, face = "bold"),
    plot.subtitle   = element_text(size = 10, color = "gray40")
  )