I made separate data frames for both so it’s easier and error-free on the original data
bars_age_error <- join %>%select(service_event_type, age_range) %>%count(age_range, service_event_type) %>%na.omit()line_age_cycle <- join %>%group_by(age_range) %>%summarise(mean(customer_cycles_amount), .groups ="drop") %>%na.omit() %>%rename("mean_cycle"='mean(customer_cycles_amount)' )scale_factor <-max(bars_age_error$n) /max(line_age_cycle$mean_cycle) #This is to illustrate the second y-axis later on in the bar chart
ggplot() +# barsgeom_bar(data = bars_age_error,aes(x = age_range, y = n, fill = service_event_type),stat ="identity", position ="dodge", color ="black", linewidth =0.3 ) +scale_fill_paletteer_d("nationalparkcolors::Acadia")+# linegeom_line(data = line_age_cycle,aes(x =as.numeric(age_range), y = mean_cycle * scale_factor),color ="red", linewidth =1, group =1 ) +geom_point(data = line_age_cycle,aes(x =as.numeric(age_range), y = mean_cycle * scale_factor),color ="red", size =3 ) +# dual y-axisscale_y_continuous(name ="Number of service events",sec.axis =sec_axis(transform =~ . / scale_factor,name ="Average number of cycles per month" ) ) +scale_x_discrete(name ="Machine age (years)") +labs(title ="Number of service events and average cycles per month by age range",fill =NULL) +theme_classic() +theme(legend.position ="bottom",legend.direction ="horizontal",axis.text =element_text(size =10),axis.title =element_text(size =10),axis.title.y.right =element_text(color ="red"),axis.text.y.right =element_text(color ="red") )
2. Failure type, warranty status, and repair type
This relationship can be showcased through a heatmap
# Making Heatmap to show relationship between error case, warranty status, and repair outcomepcd <- join %>%select(customer_warranty_status,repair_type, service_event_type, age_range, machine_id) %>%na.omit()pcd$customer_warranty_status <-as.factor(pcd$customer_warranty_status)pcd$repair_type <-as.factor(pcd$repair_type)min(pcd$cycle_per_month)
[1] Inf
# Making a separate data frame for the heatmap. heatmap_df <- pcd %>%group_by(service_event_type, repair_type, customer_warranty_status) %>%summarise(count =n(), .groups ="drop") %>%complete(service_event_type, repair_type, customer_warranty_status,fill =list(count =0))# Plotggplot(heatmap_df, aes(x = repair_type, y = service_event_type, fill = count)) +geom_tile(color ="white", linewidth =0.5) +geom_text(aes(label =ifelse(count ==0, "0", as.character(count))),size =3.5, color ="white") +scale_fill_gradient(low ="#A4BED5FF", high ="#023743FF") +facet_wrap(~ customer_warranty_status) +# <-- this is to have 2 maps for 2 warranty statuseslabs(x ="Repair Type",y ="Failure Type",fill ="Service events" ) +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1, size =10),axis.text.y =element_text(size =10),axis.title =element_text(size =10),panel.grid =element_blank(),legend.position ="right",strip.text =element_text(size =10, face ="bold") # styles the facet titles )
Some interesting questions we could investigate from this map:
Which failure types consistently lead to disposal rather than repair?