A quick R example of plotting the ECDF curves of the Kolmogorov-Smirnov test along with maximum distance (D) between distributions. Example with ggplot2 and base R plot
require(ggplot2)
## Loading required package: ggplot2
# simulate two distributions - your data goes here!
sample1 <- rnorm(10000, 10, 5)
sample2 <- rnorm(10000, 1, 5)
group <- c(rep("sample1", length(sample1)), rep("sample2", length(sample2)))
dat <- data.frame(KSD = c(sample1,sample2), group = group)
# create ECDF of data
cdf1 <- ecdf(sample1)
cdf2 <- ecdf(sample2)
# find min and max statistics to draw line between points of greatest distance
minMax <- seq(min(sample1, sample2), max(sample1, sample2), length.out=length(sample1))
x0 <- minMax[which( abs(cdf1(minMax) - cdf2(minMax)) == max(abs(cdf1(minMax) - cdf2(minMax))) )]
y0 <- cdf1(x0)
y1 <- cdf2(x0)
You can also embed plots, for example:
# png(file = "c:/temp/ks.png", width = 1024, height = 768, type="cairo-png")
ggplot(dat, aes(x = KSD, group = group, color = group))+
stat_ecdf(size=1) +
theme_bw(base_size = 28) +
theme(legend.position ="top") +
xlab("Sample") +
ylab("ECDF") +
#geom_line(size=1) +
geom_segment(aes(x = x0[1], y = y0[1], xend = x0[1], yend = y1[1]),
linetype = "dashed", color = "red") +
geom_point(aes(x = x0[1] , y= y0[1]), color="red", size=8) +
geom_point(aes(x = x0[1] , y= y1[1]), color="red", size=8) +
ggtitle("K-S Test: Sample 1 / Sample 2") +
theme(legend.title=element_blank())
# dev.off()
######################### non ggplot example
plot(cdf1, verticals=TRUE, do.points=FALSE, col="blue")
plot(cdf2, verticals=TRUE, do.points=FALSE, col="green", add=TRUE)
## alternatine, use standard R plot of ecdf
#plot(f.a, col="blue")
#lines(f.b, col="green")
points(c(x0, x0), c(y0, y1), pch=16, col="red")
segments(x0, y0, x0, y1, col="red", lty="dotted")
## alternative, down to x axis
#segments(x0, 0, x0, y1, col="red", lty="dotted")