co-occur calculation
binary-value
a <- c(1, 0, 1, 0, 1, 1)
b <- c(1, 0, 1, 0, 0, 0)
bi_mat <- rbind(a, b)
bi_mat
## [,1] [,2] [,3] [,4] [,5] [,6]
## a 1 0 1 0 1 1
## b 1 0 1 0 0 0
library("proxy") # 距離行列計算用パッケージ
sum(a * b)/(sum(a + b) - sum(a * b)) # jaccard
## [1] 0.5
simil(bi_mat, method = "jaccard")
## a
## b 0.5
sum(a * b)/sqrt(sum(a^2) * sum(b^2)) # cosine
## [1] 0.7071
simil(bi_mat, method = "cosine")
## a
## b 0.7071
sum(a * b)/min(sum(a), sum(b)) # simpson
## [1] 1
simil(bi_mat, method = "simpson")
## a
## b 1
integer-value
aa <- c(4, 0, 3, 0, 2, 1)
bb <- c(4, 0, 1, 0, 0, 0)
r_mat <- rbind(aa, bb)
r_mat
## [,1] [,2] [,3] [,4] [,5] [,6]
## aa 4 0 3 0 2 1
## bb 4 0 1 0 0 0
# http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto_Similarity_and_Distance
sum(aa * bb)/(sum(aa^2) + sum(bb^2) - sum(aa * bb)) # jaccard(=Tanimoto)
## [1] 0.6786
simil(r_mat, method = "eJaccard")
## aa
## bb 0.6786
# http://en.wikipedia.org/wiki/Cosine_similarity
sum(aa * bb)/sqrt(sum(aa^2) * sum(bb^2)) # cosine
## [1] 0.8413
simil(r_mat, method = "cosine")
## aa
## bb 0.8413
sum(aa * bb)/min(sum(aa)^2, sum(bb)^2) # simpson ? 怪しいので 要確認。
## [1] 0.76
simil(r_mat, method = "simpson") # --> 違う?
## aa
## bb 1
pr_DB$get_entry("simpson") # binary のみ?
## names Simpson
## FUN pr_Simpson
## distance FALSE
## PREFUN NA
## POSTFUN NA
## convert pr_simil2dist
## type binary
## loop TRUE
## C_FUN FALSE
## PACKAGE proxy
## abcd TRUE
## formula a / min{(a + b), (a + c)}
## reference Simpson, G.G. (1960). Notes on the measurement of
## faunal resemblance. American Journal of Science 258-A:
## 300-311.
## description The Simpson Similarity (used in Zoology).