Load libraries

library(knitr)
library(rmarkdown)
library(tidyverse)
library(feather)
library(data.table)
library(sna) # gcor package

Get data for test case.

# read in two sets of pairiwise dists (ENG AND FRE)
pairwise_dists1 <-  read_feather("ets/data/ENG_common_word_dists.feather") %>%
                      data.table(key = c("w1", "w2"))

pairwise_dists2 <-  read_feather("ets/data/FRE_common_word_dists.feather") %>%
                      data.table(key = c("w1", "w2"))

# merge and get toy subset
deciles_df <- read_csv("ets/data/concreteness_deciles.csv") %>%
  mutate(conc_tile1 = conc_tile,
         conc_tile2 = conc_tile,
         w1 = word,
         w2 = word) %>%
  select(-n, -word, -conc_tile)

merged_dists <- merge(pairwise_dists1, 
                          pairwise_dists2, by = c("w1", "w2"))  %>%
      left_join(deciles_df %>% select(w1, conc_tile1), by = "w1") %>%
      left_join(deciles_df %>% select(w2, conc_tile2), by = "w2") %>%
      filter(w1 != w2) %>%
      filter(conc_tile1 == 2, conc_tile2 == 2)

Symmetrical case: n = m

test2words_sym <-  deciles_df %>% filter(conc_tile1 == 2) %>% slice(1:4) %>% pull(w1) 
merged_dists_sym <- merged_dists %>%
  filter(w1 %in% test2words_sym, # 4 words
         w2 %in% test2words_sym) # 4 words

merged_dists_sym
##            w1         w2  cos_dist.x  cos_dist.y conc_tile1 conc_tile2
## 1     ability absolutely  0.16279096  0.31328938          2          2
## 2     ability   accurate -0.02158322  0.14075872          2          2
## 3     ability   advanced  0.06797462 -0.09600907          2          2
## 4  absolutely    ability  0.16279096  0.31328938          2          2
## 5  absolutely   accurate  0.06819016  0.34012988          2          2
## 6  absolutely   advanced  0.03503687  0.01033553          2          2
## 7    accurate    ability -0.02158322  0.14075872          2          2
## 8    accurate absolutely  0.06819016  0.34012988          2          2
## 9    accurate   advanced  0.10846877  0.41404534          2          2
## 10   advanced    ability  0.06797462 -0.09600907          2          2
## 11   advanced absolutely  0.03503687  0.01033553          2          2
## 12   advanced   accurate  0.10846877  0.41404534          2          2

cor

Estimate =

cor(merged_dists_sym$cos_dist.x, merged_dists_sym$cos_dist.y)
## [1] 0.4909756

gcor

gcor takes data in “wide” format - convert data to matrix.

get_regression_mat <- function(this_df, target_value){
  this_df %>%
    select(w1, w2, target_value) %>%
    spread(w2, target_value) %>%
    select(-w1) %>%
    as.matrix()
}

Mat1

mat1 <- get_regression_mat(merged_dists_sym, "cos_dist.x")
mat1
##          ability absolutely    accurate   advanced
## [1,]          NA 0.16279096 -0.02158322 0.06797462
## [2,]  0.16279096         NA  0.06819016 0.03503687
## [3,] -0.02158322 0.06819016          NA 0.10846877
## [4,]  0.06797462 0.03503687  0.10846877         NA

Mat2

mat2 <- get_regression_mat(merged_dists_sym, "cos_dist.y")
mat2
##          ability absolutely  accurate    advanced
## [1,]          NA 0.31328938 0.1407587 -0.09600907
## [2,]  0.31328938         NA 0.3401299  0.01033553
## [3,]  0.14075872 0.34012988        NA  0.41404534
## [4,] -0.09600907 0.01033553 0.4140453          NA

Estimate =

out_array <- array(0, dim = c(2, dim(mat1)))
out_array[1,,] <- mat1
out_array[2,,] <- mat2

gcor(out_array, diag = TRUE)
##           1         2
## 1 1.0000000 0.4909756
## 2 0.4909756 1.0000000

The first line of code in the gcor function is this: “dat <- as.sociomatrix.sna(dat)”. Let’s run that here. Everything looks fine.

as.sociomatrix.sna(out_array)
## , , 1
## 
##      [,1]      [,2]        [,3]        [,4]
## [1,]   NA 0.1627910 -0.02158322  0.06797462
## [2,]   NA 0.3132894  0.14075872 -0.09600907
## 
## , , 2
## 
##           [,1] [,2]       [,3]       [,4]
## [1,] 0.1627910   NA 0.06819016 0.03503687
## [2,] 0.3132894   NA 0.34012988 0.01033553
## 
## , , 3
## 
##             [,1]       [,2] [,3]      [,4]
## [1,] -0.02158322 0.06819016   NA 0.1084688
## [2,]  0.14075872 0.34012988   NA 0.4140453
## 
## , , 4
## 
##             [,1]       [,2]      [,3] [,4]
## [1,]  0.06797462 0.03503687 0.1084688   NA
## [2,] -0.09600907 0.01033553 0.4140453   NA

Asymmetrical case: n != m

test2words_asym <-  deciles_df %>% filter(conc_tile1 == 2)  %>% slice(1:5)  %>% pull(w1) 

test2words_asym <- merged_dists %>%
  filter(w1 %in% test2words_sym, # 4 words
         w2 %in% test2words_asym) # 5 words

test2words_asym
##            w1         w2   cos_dist.x  cos_dist.y conc_tile1 conc_tile2
## 1     ability absolutely  0.162790959  0.31328938          2          2
## 2     ability   accurate -0.021583222  0.14075872          2          2
## 3     ability   advanced  0.067974621 -0.09600907          2          2
## 4     ability  advantage  0.188014398  0.17588350          2          2
## 5  absolutely    ability  0.162790959  0.31328938          2          2
## 6  absolutely   accurate  0.068190159  0.34012988          2          2
## 7  absolutely   advanced  0.035036873  0.01033553          2          2
## 8  absolutely  advantage  0.002784793  0.13582860          2          2
## 9    accurate    ability -0.021583222  0.14075872          2          2
## 10   accurate absolutely  0.068190159  0.34012988          2          2
## 11   accurate   advanced  0.108468768  0.41404534          2          2
## 12   accurate  advantage  0.067214028  0.16318840          2          2
## 13   advanced    ability  0.067974621 -0.09600907          2          2
## 14   advanced absolutely  0.035036873  0.01033553          2          2
## 15   advanced   accurate  0.108468768  0.41404534          2          2
## 16   advanced  advantage -0.012458116  0.04876743          2          2

cor

Estimate =

cor(test2words_asym$cos_dist.x, test2words_asym$cos_dist.y)
## [1] 0.4537524

gcor

Mat1

mat1 <- get_regression_mat(test2words_asym, "cos_dist.x")
mat1
##          ability absolutely    accurate   advanced    advantage
## [1,]          NA 0.16279096 -0.02158322 0.06797462  0.188014398
## [2,]  0.16279096         NA  0.06819016 0.03503687  0.002784793
## [3,] -0.02158322 0.06819016          NA 0.10846877  0.067214028
## [4,]  0.06797462 0.03503687  0.10846877         NA -0.012458116

Mat2

mat2 <- get_regression_mat(test2words_asym, "cos_dist.y")
mat2
##          ability absolutely  accurate    advanced  advantage
## [1,]          NA 0.31328938 0.1407587 -0.09600907 0.17588350
## [2,]  0.31328938         NA 0.3401299  0.01033553 0.13582860
## [3,]  0.14075872 0.34012988        NA  0.41404534 0.16318840
## [4,] -0.09600907 0.01033553 0.4140453          NA 0.04876743

Estimate =

out_array <- array(0, dim = c(2, dim(mat1)))
out_array[1,,] <- mat1
out_array[2,,] <- mat2

gcor(out_array, diag = TRUE)
##          1        2
## 1 1.000000 0.665992
## 2 0.665992 1.000000

The first line of code in the gcor function is this: “dat <- as.sociomatrix.sna(dat)”. Let’s run that here. Adds a bunch of zeros.

as.sociomatrix.sna(out_array)
## , , 1
## 
##      [,1] [,2] [,3] [,4] [,5]      [,6]        [,7]        [,8]      [,9]
## [1,]    0    0    0    0   NA 0.1627910 -0.02158322  0.06797462 0.1880144
## [2,]    0    0    0    0   NA 0.3132894  0.14075872 -0.09600907 0.1758835
## 
## , , 2
## 
##      [,1] [,2] [,3] [,4]      [,5] [,6]       [,7]       [,8]        [,9]
## [1,]    0    0    0    0 0.1627910   NA 0.06819016 0.03503687 0.002784793
## [2,]    0    0    0    0 0.3132894   NA 0.34012988 0.01033553 0.135828599
## 
## , , 3
## 
##      [,1] [,2] [,3] [,4]        [,5]       [,6] [,7]      [,8]       [,9]
## [1,]    0    0    0    0 -0.02158322 0.06819016   NA 0.1084688 0.06721403
## [2,]    0    0    0    0  0.14075872 0.34012988   NA 0.4140453 0.16318840
## 
## , , 4
## 
##      [,1] [,2] [,3] [,4]        [,5]       [,6]      [,7] [,8]        [,9]
## [1,]    0    0    0    0  0.06797462 0.03503687 0.1084688   NA -0.01245812
## [2,]    0    0    0    0 -0.09600907 0.01033553 0.4140453   NA  0.04876743
## 
## , , 5
## 
##      [,1]      [,2]        [,3]        [,4] [,5] [,6] [,7] [,8] [,9]
## [1,]   NA 0.1627910 -0.02158322  0.06797462    0    0    0    0    0
## [2,]   NA 0.3132894  0.14075872 -0.09600907    0    0    0    0    0
## 
## , , 6
## 
##           [,1] [,2]       [,3]       [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] 0.1627910   NA 0.06819016 0.03503687    0    0    0    0    0
## [2,] 0.3132894   NA 0.34012988 0.01033553    0    0    0    0    0
## 
## , , 7
## 
##             [,1]       [,2] [,3]      [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] -0.02158322 0.06819016   NA 0.1084688    0    0    0    0    0
## [2,]  0.14075872 0.34012988   NA 0.4140453    0    0    0    0    0
## 
## , , 8
## 
##             [,1]       [,2]      [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,]  0.06797462 0.03503687 0.1084688   NA    0    0    0    0    0
## [2,] -0.09600907 0.01033553 0.4140453   NA    0    0    0    0    0
## 
## , , 9
## 
##           [,1]        [,2]       [,3]        [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] 0.1880144 0.002784793 0.06721403 -0.01245812    0    0    0    0    0
## [2,] 0.1758835 0.135828599 0.16318840  0.04876743    0    0    0    0    0