co-occur calculation

binary-value

a <- c(1, 0, 1, 0, 1, 1)
b <- c(1, 0, 1, 0, 0, 0)
bi_mat <- rbind(a, b)
bi_mat
##   [,1] [,2] [,3] [,4] [,5] [,6]
## a    1    0    1    0    1    1
## b    1    0    1    0    0    0

library("proxy")  # 距離行列計算用パッケージ

sum(a * b)/(sum(a + b) - sum(a * b))  # jaccard
## [1] 0.5
simil(bi_mat, method = "jaccard")
##     a
## b 0.5

sum(a * b)/sqrt(sum(a^2) * sum(b^2))  # cosine
## [1] 0.7071
simil(bi_mat, method = "cosine")
##        a
## b 0.7071

sum(a * b)/min(sum(a), sum(b))  # simpson
## [1] 1
simil(bi_mat, method = "simpson")
##    a
## b  1

integer-value

aa <- c(4, 0, 3, 0, 2, 1)
bb <- c(4, 0, 1, 0, 0, 0)
r_mat <- rbind(aa, bb)
r_mat
##    [,1] [,2] [,3] [,4] [,5] [,6]
## aa    4    0    3    0    2    1
## bb    4    0    1    0    0    0

# http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto_Similarity_and_Distance
sum(aa * bb)/(sum(aa^2) + sum(bb^2) - sum(aa * bb))  # jaccard(=Tanimoto)
## [1] 0.6786
simil(r_mat, method = "eJaccard")
##        aa
## bb 0.6786

# http://en.wikipedia.org/wiki/Cosine_similarity
sum(aa * bb)/sqrt(sum(aa^2) * sum(bb^2))  # cosine
## [1] 0.8413
simil(r_mat, method = "cosine")
##        aa
## bb 0.8413

sum(aa * bb)/min(sum(aa)^2, sum(bb)^2)  # simpson ? 怪しいので 要確認。
## [1] 0.76
simil(r_mat, method = "simpson")  # --> 違う?
##    aa
## bb  1
pr_DB$get_entry("simpson")  # binary のみ?
##       names Simpson
##         FUN pr_Simpson
##    distance FALSE
##      PREFUN NA
##     POSTFUN NA
##     convert pr_simil2dist
##        type binary
##        loop TRUE
##       C_FUN FALSE
##     PACKAGE proxy
##        abcd TRUE
##     formula a / min{(a + b), (a + c)}
##   reference Simpson, G.G. (1960). Notes on the measurement of
##             faunal resemblance. American Journal of Science 258-A:
##             300-311.
## description The Simpson Similarity (used in Zoology).