Load packages.
library(factoextra)
library(cluster)
library(tidyverse)
Load data.
toolmarks_dat <- read.csv("/Users/mariacuellar/Desktop/NYU interview/Teaching materials/large_df.csv")
We will draw a single toolmarks, the marks made by the two sides of the same screwdriver, eight replicate marks from one tool-side, and marks from different tools.
toolmarks_dat %>% filter(tool=="1", side=="A", angle=="80", mark=="1") %>%
ggplot(aes(x = x, y = aligned, color=mark)) +
geom_line() +
labs(x = "Signal length (mm)",
y = "Signal depth (mm)",
title = "Single toolmark") +
guides(color="none") +
theme_minimal()+
ylim(-0.002, 0.002)
toolmarks_dat %>% filter(tool=="1", side %in% c("A","B"), angle=="80", mark=="1") %>%
ggplot(aes(x = x, y = aligned, color=side)) +
geom_line() +
labs(x = "Signal length (mm)",
y = "Signal depth (mm)",
title = "Marks from different sides of screwdriver",
color = "Side") +
scale_color_brewer(palette = "Set1") +
facet_wrap(vars(side), ncol=1) +
guides(color="none") +
theme_minimal() +
ylim(-0.004, 0.004)
toolmarks_dat %>% filter(tool=="1", side=="A", angle=="80") %>%
ggplot(aes(x = x, y = aligned, colour = as.factor(mark))) +
geom_line() +
labs(x = "Signal length (mm)",
y = "Signal depth (mm)",
title = "Replicate marks made with same tool-side",
color = "Mark") +
scale_color_brewer(palette = "Blues")+
theme_minimal()+
ylim(-0.002, 0.002)
toolmarks_dat %>% filter(tool %in% c(1,2), side=="A", angle=="80", mark=="1") %>%
ggplot(aes(x = x, y = aligned, color=as.factor(tool))) +
geom_line() +
labs(x = "Signal length (mm)",
y = "Signal depth (mm)",
title = "Two marks made by different tools",
color = "Tool") +
scale_color_brewer(palette = "Set1") +
facet_wrap(vars(tool), ncol=1) +
guides(color="none") +
theme_minimal()+
ylim(-0.004, 0.004)
toolmarks_dat %>% filter(tool==c(1,2), side=="A", angle=="80") %>%
ggplot(aes(x = x, y = aligned, colour = as.factor(mark))) +
geom_line() +
labs(x = "Signal length (mm)",
y = "Signal depth (mm)",
title = "Marks from different tools, with their corresponding replicates",
color = "Mark") +
facet_wrap(vars(tool), ncol=1) +
scale_color_brewer(palette = "Blues") +
theme_minimal()+
ylim(-0.004, 0.004)
Load similarity score, which will be used for clustering.
sim_score <- readRDS("/Users/mariacuellar/Desktop/NYU interview/Teaching materials/sim_score.rds")
Find optimal number of clusters.
# Finding the optimal number of clusters
fviz_nbclust(sim_score, pam)
# fviz_nbclust(sim_score, kmeans) # oops, gives 8 when it should be 6
Draw a cluster plot using kmeans.
fviz_cluster(kmeans(sim_score, centers = 6),
data = sim_score,
# ellipse.type = "norm",
ggtheme = theme_minimal()
)
Draw a cluster plot using pam.
pam.res <- pam(sim_score, 6, metric = "euclidean", stand = FALSE)
fviz_cluster(pam.res,
data = sim_score,
ggtheme = theme_minimal() )