Load libraries
library(knitr)
library(rmarkdown)
library(tidyverse)
library(feather)
library(data.table)
library(sna) # gcor package
Get data for test case.
# read in two sets of pairiwise dists (ENG AND FRE)
pairwise_dists1 <- read_feather("ets/data/ENG_common_word_dists.feather") %>%
data.table(key = c("w1", "w2"))
pairwise_dists2 <- read_feather("ets/data/FRE_common_word_dists.feather") %>%
data.table(key = c("w1", "w2"))
# merge and get toy subset
deciles_df <- read_csv("ets/data/concreteness_deciles.csv") %>%
mutate(conc_tile1 = conc_tile,
conc_tile2 = conc_tile,
w1 = word,
w2 = word) %>%
select(-n, -word, -conc_tile)
merged_dists <- merge(pairwise_dists1,
pairwise_dists2, by = c("w1", "w2")) %>%
left_join(deciles_df %>% select(w1, conc_tile1), by = "w1") %>%
left_join(deciles_df %>% select(w2, conc_tile2), by = "w2") %>%
filter(w1 != w2) %>%
filter(conc_tile1 == 2, conc_tile2 == 2)
test2words_sym <- deciles_df %>% filter(conc_tile1 == 2) %>% slice(1:4) %>% pull(w1)
merged_dists_sym <- merged_dists %>%
filter(w1 %in% test2words_sym, # 4 words
w2 %in% test2words_sym) # 4 words
merged_dists_sym
## w1 w2 cos_dist.x cos_dist.y conc_tile1 conc_tile2
## 1 ability absolutely 0.16279096 0.31328938 2 2
## 2 ability accurate -0.02158322 0.14075872 2 2
## 3 ability advanced 0.06797462 -0.09600907 2 2
## 4 absolutely ability 0.16279096 0.31328938 2 2
## 5 absolutely accurate 0.06819016 0.34012988 2 2
## 6 absolutely advanced 0.03503687 0.01033553 2 2
## 7 accurate ability -0.02158322 0.14075872 2 2
## 8 accurate absolutely 0.06819016 0.34012988 2 2
## 9 accurate advanced 0.10846877 0.41404534 2 2
## 10 advanced ability 0.06797462 -0.09600907 2 2
## 11 advanced absolutely 0.03503687 0.01033553 2 2
## 12 advanced accurate 0.10846877 0.41404534 2 2
Estimate =
cor(merged_dists_sym$cos_dist.x, merged_dists_sym$cos_dist.y)
## [1] 0.4909756
gcor takes data in “wide” format - convert data to matrix.
get_regression_mat <- function(this_df, target_value){
this_df %>%
select(w1, w2, target_value) %>%
spread(w2, target_value) %>%
select(-w1) %>%
as.matrix()
}
mat1 <- get_regression_mat(merged_dists_sym, "cos_dist.x")
mat1
## ability absolutely accurate advanced
## [1,] NA 0.16279096 -0.02158322 0.06797462
## [2,] 0.16279096 NA 0.06819016 0.03503687
## [3,] -0.02158322 0.06819016 NA 0.10846877
## [4,] 0.06797462 0.03503687 0.10846877 NA
mat2 <- get_regression_mat(merged_dists_sym, "cos_dist.y")
mat2
## ability absolutely accurate advanced
## [1,] NA 0.31328938 0.1407587 -0.09600907
## [2,] 0.31328938 NA 0.3401299 0.01033553
## [3,] 0.14075872 0.34012988 NA 0.41404534
## [4,] -0.09600907 0.01033553 0.4140453 NA
Estimate =
out_array <- array(0, dim = c(2, dim(mat1)))
out_array[1,,] <- mat1
out_array[2,,] <- mat2
gcor(out_array, diag = TRUE)
## 1 2
## 1 1.0000000 0.4909756
## 2 0.4909756 1.0000000
The first line of code in the gcor function is this: “dat <- as.sociomatrix.sna(dat)”. Let’s run that here. Everything looks fine.
as.sociomatrix.sna(out_array)
## , , 1
##
## [,1] [,2] [,3] [,4]
## [1,] NA 0.1627910 -0.02158322 0.06797462
## [2,] NA 0.3132894 0.14075872 -0.09600907
##
## , , 2
##
## [,1] [,2] [,3] [,4]
## [1,] 0.1627910 NA 0.06819016 0.03503687
## [2,] 0.3132894 NA 0.34012988 0.01033553
##
## , , 3
##
## [,1] [,2] [,3] [,4]
## [1,] -0.02158322 0.06819016 NA 0.1084688
## [2,] 0.14075872 0.34012988 NA 0.4140453
##
## , , 4
##
## [,1] [,2] [,3] [,4]
## [1,] 0.06797462 0.03503687 0.1084688 NA
## [2,] -0.09600907 0.01033553 0.4140453 NA
test2words_asym <- deciles_df %>% filter(conc_tile1 == 2) %>% slice(1:5) %>% pull(w1)
test2words_asym <- merged_dists %>%
filter(w1 %in% test2words_sym, # 4 words
w2 %in% test2words_asym) # 5 words
test2words_asym
## w1 w2 cos_dist.x cos_dist.y conc_tile1 conc_tile2
## 1 ability absolutely 0.162790959 0.31328938 2 2
## 2 ability accurate -0.021583222 0.14075872 2 2
## 3 ability advanced 0.067974621 -0.09600907 2 2
## 4 ability advantage 0.188014398 0.17588350 2 2
## 5 absolutely ability 0.162790959 0.31328938 2 2
## 6 absolutely accurate 0.068190159 0.34012988 2 2
## 7 absolutely advanced 0.035036873 0.01033553 2 2
## 8 absolutely advantage 0.002784793 0.13582860 2 2
## 9 accurate ability -0.021583222 0.14075872 2 2
## 10 accurate absolutely 0.068190159 0.34012988 2 2
## 11 accurate advanced 0.108468768 0.41404534 2 2
## 12 accurate advantage 0.067214028 0.16318840 2 2
## 13 advanced ability 0.067974621 -0.09600907 2 2
## 14 advanced absolutely 0.035036873 0.01033553 2 2
## 15 advanced accurate 0.108468768 0.41404534 2 2
## 16 advanced advantage -0.012458116 0.04876743 2 2
Estimate =
cor(test2words_asym$cos_dist.x, test2words_asym$cos_dist.y)
## [1] 0.4537524
mat1 <- get_regression_mat(test2words_asym, "cos_dist.x")
mat1
## ability absolutely accurate advanced advantage
## [1,] NA 0.16279096 -0.02158322 0.06797462 0.188014398
## [2,] 0.16279096 NA 0.06819016 0.03503687 0.002784793
## [3,] -0.02158322 0.06819016 NA 0.10846877 0.067214028
## [4,] 0.06797462 0.03503687 0.10846877 NA -0.012458116
mat2 <- get_regression_mat(test2words_asym, "cos_dist.y")
mat2
## ability absolutely accurate advanced advantage
## [1,] NA 0.31328938 0.1407587 -0.09600907 0.17588350
## [2,] 0.31328938 NA 0.3401299 0.01033553 0.13582860
## [3,] 0.14075872 0.34012988 NA 0.41404534 0.16318840
## [4,] -0.09600907 0.01033553 0.4140453 NA 0.04876743
Estimate =
out_array <- array(0, dim = c(2, dim(mat1)))
out_array[1,,] <- mat1
out_array[2,,] <- mat2
gcor(out_array, diag = TRUE)
## 1 2
## 1 1.000000 0.665992
## 2 0.665992 1.000000
The first line of code in the gcor function is this: “dat <- as.sociomatrix.sna(dat)”. Let’s run that here. Adds a bunch of zeros.
as.sociomatrix.sna(out_array)
## , , 1
##
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] 0 0 0 0 NA 0.1627910 -0.02158322 0.06797462 0.1880144
## [2,] 0 0 0 0 NA 0.3132894 0.14075872 -0.09600907 0.1758835
##
## , , 2
##
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] 0 0 0 0 0.1627910 NA 0.06819016 0.03503687 0.002784793
## [2,] 0 0 0 0 0.3132894 NA 0.34012988 0.01033553 0.135828599
##
## , , 3
##
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] 0 0 0 0 -0.02158322 0.06819016 NA 0.1084688 0.06721403
## [2,] 0 0 0 0 0.14075872 0.34012988 NA 0.4140453 0.16318840
##
## , , 4
##
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] 0 0 0 0 0.06797462 0.03503687 0.1084688 NA -0.01245812
## [2,] 0 0 0 0 -0.09600907 0.01033553 0.4140453 NA 0.04876743
##
## , , 5
##
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] NA 0.1627910 -0.02158322 0.06797462 0 0 0 0 0
## [2,] NA 0.3132894 0.14075872 -0.09600907 0 0 0 0 0
##
## , , 6
##
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] 0.1627910 NA 0.06819016 0.03503687 0 0 0 0 0
## [2,] 0.3132894 NA 0.34012988 0.01033553 0 0 0 0 0
##
## , , 7
##
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] -0.02158322 0.06819016 NA 0.1084688 0 0 0 0 0
## [2,] 0.14075872 0.34012988 NA 0.4140453 0 0 0 0 0
##
## , , 8
##
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] 0.06797462 0.03503687 0.1084688 NA 0 0 0 0 0
## [2,] -0.09600907 0.01033553 0.4140453 NA 0 0 0 0 0
##
## , , 9
##
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
## [1,] 0.1880144 0.002784793 0.06721403 -0.01245812 0 0 0 0 0
## [2,] 0.1758835 0.135828599 0.16318840 0.04876743 0 0 0 0 0