x <- seq(-5, 10, by = 0.1)
y1 <- dnorm(x, mean = 0, sd = 1)
y2 <- dnorm(x, mean = 2, sd = 2) * 0.1
Plot data
plot(x, y1, type = 'p', col='steelblue')
points(x, y2, col='tomato')
# auc integration func
auc_integrator <- function(x, y, lower, upper){
smoothed.fit <- smooth.spline(x, y)
smoothed.func <- function(x){
predict(smoothed.fit, x)$y
}
AUC <- integrate(smoothed.func, lower, upper, subdivisions = 2000, stop.on.error = FALSE)$value
return(AUC)
}
y1_norm <- y1 / auc_integrator(x, y1, -5, 10)
y2_norm <- y2 / auc_integrator(x, y2, -5, 10)
Plot normalized data
plot(x, y1_norm, type = 'p', col='steelblue')
points(x, y2_norm, col='tomato')
#======================================================================#
# Jaccard distance between two distributions
#======================================================================#
jaccard_dist <- function(x1, y1, x2, y2){
y1[y1 < 0] <- 0
y2[y2 < 0] <- 0
x.lower <- min(x1, x2)
x.upper <- max(x1, x2)
intersec.x.lower <- max(min(x1), min(x2))
intersec.x.upper <- min(max(x1), max(x2))
smoothed.fit.1 <- smooth.spline(x1, y1)
smoothed.fit.2 <- smooth.spline(x2, y2)
intersec_func <- function(x.new){
sapply(x.new, function(x) min(predict(smoothed.fit.1, x)$y, predict(smoothed.fit.2, x)$y))
}
union_func <- function(x.new){
sapply(x.new, function(x) max(predict(smoothed.fit.1, x)$y, predict(smoothed.fit.2, x)$y))
}
intersec.AUC <- integrate(intersec_func, intersec.x.lower, intersec.x.upper, subdivisions = 2000, stop.on.error = FALSE)$value
union.AUC <- integrate(union_func, x.lower, x.upper, subdivisions = 2000, stop.on.error = FALSE)$value
similarity.jaccard <- intersec.AUC / union.AUC
dist.jaccard <- min(1, (1- similarity.jaccard)) # no more than 1
return(dist.jaccard)
}
jaccard_dist(x, y1, x, y2)
## [1] 0.9362433
jaccard_dist(x, y1_norm, x, y2_norm)
## [1] 0.7067791