04_correlation_analysis.R

source(here::here('R/00_read_data.R'))

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

# Create a correlation matrix as in described in
# Dewey & Knoblich (2014):
# 
# "All reported correlations are Pearson’s R,
# except for the values in columns seven and eight, which are
# Spearman’s Rho due to the non-normality of the Likert scale
# ratings." (p. 5)
#
# @param df 
# @param .test return (uncorrected) p values instead of correlations?
#
create_cor_mat = function(df, .test = FALSE){
  df %>%
    select(-(1:3)) -> df
  
  df %>%
    as.matrix() %>%
    Hmisc::rcorr(type = 'p') -> tab
  
  df %>%
    as.matrix() %>%
    Hmisc::rcorr(type = 's') -> spearman_tab
  
  if(.test){
    tab = tab[[3]]
    spearman_tab = spearman_tab[[3]]
  }else{
    tab = tab[[1]]
    spearman_tab = spearman_tab[[1]]
  }
  
  tab[,7:8] = spearman_tab[,7:8]
  tab[lower.tri(tab)] = NA 
  diag(tab) = NA
  return(tab)
}


library(tidyr)

tb_data %>%
  pivot_wider(
    id_cols = c(subid, gender, order, delay),
    values_from = estimated_interval,
    names_from = condition) %>%
  mutate(tb_agency = Observational - Operant) %>%
  pivot_wider(
    id_cols = c(subid, gender, order),
    names_from = delay,
    values_from = tb_agency, names_prefix = 'tb_'
  ) -> tb_agency

sa_data %>%
  pivot_wider(
    id_cols = c(subid, gender, order),
    values_from = pse,
    names_from = condition) %>%
  mutate(
    sa_agency = Observational - Operant
    ) %>%
  select(-Operant, -Observational) -> sa_agency

agency_data %>%
  pivot_wider(
    id_cols = c(subid, gender, order),
    values_from = agency,
    names_from = task,
    names_prefix = 'likert_') -> likert_agency
  
sa_agency %>%
  full_join(tb_agency) %>%
  full_join(likert_agency) -> all_agency_measures

## Joining, by = c("subid", "gender", "order")

## Joining, by = c("subid", "gender", "order")

# Grab magical ideation and Locus of Control from
# correlation data
correlation_data %>%
  select(subid, MI, LoC) %>%
  right_join(all_agency_measures) %>%
  select(
    subid, gender, order, 
    sa_agency, matches('tb_'), 
    MI, LoC, likert_SensAtt, likert_TemBin
    ) %>%
  mutate(
    likert_SAminusTB = likert_SensAtt - likert_TemBin
    ) -> all_agency_measures

## Joining, by = "subid"

library(gt)

## Warning: package 'gt' was built under R version 4.1.1

# Table 1
all_agency_measures %>%
  create_cor_mat() -> tab1 

all_agency_measures %>%
  create_cor_mat(.test = TRUE) %>%
  stats::p.adjust(method = 'BH', n = sum(!is.na(.))) %>%
  matrix(nrow = nrow(tab1)) %>%
  `dimnames<-`(dimnames(tab1))-> tab1_corrected_p

tab1 %>%
  as.data.frame() %>%
  mutate(row = rownames(.)) %>%
  gt(rowname_col = 'row') %>%
  fmt_number(1:9,decimals = 3) %>%
  fmt_missing(1:9, missing_text = '') %>%
  tab_header(
    title = 'Table 1',
    subtitle = 'Correlations between implicit and explicit measure of the SoA, all data.'
  ) %>%
  tab_options(
    table.font.size = '80%'
  ) %>%
  tab_footnote(
    'c.f. Dewey & Knoblich, 2014, p. 6',
    locations = cells_title('title')
  )

Table 1¹
Correlations between implicit and explicit measure of the SoA, all data.
	sa_agency	tb_200ms	tb_400ms	tb_1200ms	MI	LoC	likert_SensAtt	likert_TemBin	likert_SAminusTB
sa_agency		0.131	0.221	0.255	−0.072	−0.104	0.166	−0.022	0.126
tb_200ms			0.724	0.414	0.039	−0.084	0.214	0.173	0.032
tb_400ms				0.567	−0.033	−0.163	0.114	0.079	0.003
tb_1200ms					−0.160	−0.112	0.134	0.130	−0.011
MI						0.348	−0.113	−0.039	−0.033
LoC							−0.005	0.022	−0.038
likert_SensAtt								0.334	0.514
likert_TemBin									−0.697
likert_SAminusTB
¹ c.f. Dewey & Knoblich, 2014, p. 6

tab1_corrected_p %>%
  as.data.frame() %>%
  mutate(row = rownames(.)) %>%
  gt(rowname_col = 'row') %>%
  fmt_number(1:9,decimals = 3) %>%
  fmt_missing(1:9, missing_text = '') %>%
  tab_header(
    title = 'Table 1, BH-corrected p values'
  ) %>%
  data_color(
    1:9,
    scales::col_bin(
      bins = c(0,.05,1),
      palette = c('darkred', 'white'),
    )
  ) %>%
  tab_options(
    table.font.size = '80%'
  )

Table 1, BH-corrected p values
	sa_agency	tb_200ms	tb_400ms	tb_1200ms	MI	LoC	likert_SensAtt	likert_TemBin	likert_SAminusTB
sa_agency		0.543	0.208	0.110	0.764	0.596	0.414	0.928	0.543
tb_200ms			0.000	0.001	0.910	0.725	0.217	0.414	0.910
tb_400ms				0.000	0.910	0.414	0.563	0.735	0.977
tb_1200ms					0.414	0.563	0.543	0.543	0.977
MI						0.011	0.563	0.910	0.910
LoC							0.977	0.928	0.910
likert_SensAtt								0.015	0.000
likert_TemBin									0.000
likert_SAminusTB

# Table 2
all_agency_measures %>%
  filter(order == 'SenAtt-TemBin') %>%
  create_cor_mat() -> tab2

all_agency_measures %>%
  filter(order == 'SenAtt-TemBin') %>%
  create_cor_mat(.test = TRUE) %>%
  stats::p.adjust(method = 'BH', n = sum(!is.na(.))) %>%
  matrix(nrow = nrow(tab1)) %>%
  `dimnames<-`(dimnames(tab2))-> tab2_corrected_p


tab2 %>%
  as.data.frame() %>%
  mutate(row = rownames(.)) %>%
  gt(rowname_col = 'row') %>%
  fmt_number(1:9,decimals = 2) %>%
  fmt_missing(1:9, missing_text = '') %>%
  tab_header(
    title = 'Table 2',
    subtitle = 'Correlations among implicit and explicit measure of the SoA, SA-TB task order.'
    ) %>%
  tab_options(
    table.font.size = '80%'
  ) %>%
  tab_footnote(
    'c.f. Dewey & Knoblich, 2014, p. 6',
    locations = cells_title('title')
  )

Table 2¹
Correlations among implicit and explicit measure of the SoA, SA-TB task order.
	sa_agency	tb_200ms	tb_400ms	tb_1200ms	MI	LoC	likert_SensAtt	likert_TemBin	likert_SAminusTB
sa_agency		0.30	0.37	0.40	−0.06	−0.06	0.33	0.07	0.14
tb_200ms			0.74	0.35	0.10	0.04	0.31	0.23	0.03
tb_400ms				0.56	0.02	0.00	0.23	0.00	0.12
tb_1200ms					−0.03	−0.01	0.10	0.07	0.03
MI						0.40	−0.09	−0.13	0.12
LoC							−0.02	0.15	−0.21
likert_SensAtt								0.50	0.28
likert_TemBin									−0.71
likert_SAminusTB
¹ c.f. Dewey & Knoblich, 2014, p. 6

tab2_corrected_p %>%
  as.data.frame() %>%
  mutate(row = rownames(.)) %>%
  gt(rowname_col = 'row') %>%
  fmt_number(1:9,decimals = 3) %>%
  fmt_missing(1:9, missing_text = '') %>%
  tab_header(
    title = 'Table 2, BH-corrected p values'
  ) %>%
  data_color(
    1:9,
    scales::col_bin(
      bins = c(0,.05,1),
      palette = c('darkred', 'white'),
    )
  ) %>%
  tab_options(
    table.font.size = '80%'
  )

Table 2, BH-corrected p values
	sa_agency	tb_200ms	tb_400ms	tb_1200ms	MI	LoC	likert_SensAtt	likert_TemBin	likert_SAminusTB
sa_agency		0.205	0.100	0.075	0.939	0.939	0.167	0.939	0.819
tb_200ms			0.000	0.124	0.886	0.983	0.205	0.427	0.983
tb_400ms				0.002	0.983	0.992	0.427	0.992	0.827
tb_1200ms					0.983	0.983	0.886	0.939	0.983
MI						0.075	0.886	0.827	0.827
LoC							0.983	0.805	0.474
likert_SensAtt								0.012	0.266
likert_TemBin									0.000
likert_SAminusTB

# Table 3
all_agency_measures %>%
  filter(order == 'TemBin-SenAtt') %>%
  create_cor_mat() -> tab3

all_agency_measures %>%
  filter(order == 'TemBin-SenAtt') %>%
  create_cor_mat(.test = TRUE) %>%
  stats::p.adjust(method = 'BH', n = sum(!is.na(.))) %>%
  matrix(nrow = nrow(tab1)) %>%
  `dimnames<-`(dimnames(tab3))-> tab3_corrected_p


tab3 %>%
  as.data.frame() %>%
  mutate(row = rownames(.)) %>%
  gt(rowname_col = 'row') %>%
  fmt_number(1:9,decimals = 2) %>%
  fmt_missing(1:9, missing_text = '') %>%
  tab_header(
    title = 'Table 3',
    subtitle = 'Correlations among implicit and explicit measure of the SoA, TB-SA task order.'
  ) %>%
  tab_options(
    table.font.size = '80%'
  ) %>%
  tab_footnote(
    'c.f. Dewey & Knoblich, 2014, p. 7',
    locations = cells_title('title')
  )

Table 3¹
Correlations among implicit and explicit measure of the SoA, TB-SA task order.
	sa_agency	tb_200ms	tb_400ms	tb_1200ms	MI	LoC	likert_SensAtt	likert_TemBin	likert_SAminusTB
sa_agency		−0.15	0.02	0.18	−0.12	−0.16	0.06	−0.17	0.18
tb_200ms			0.69	0.52	−0.07	−0.30	0.08	0.09	0.04
tb_400ms				0.59	−0.12	−0.43	−0.04	0.17	−0.12
tb_1200ms					−0.33	−0.24	0.13	0.22	−0.05
MI						0.27	−0.11	0.13	−0.20
LoC							0.06	−0.12	0.14
likert_SensAtt								0.10	0.71
likert_TemBin									−0.72
likert_SAminusTB
¹ c.f. Dewey & Knoblich, 2014, p. 7

tab3_corrected_p %>%
  as.data.frame() %>%
  mutate(row = rownames(.)) %>%
  gt(rowname_col = 'row') %>%
  fmt_number(1:9,decimals = 3) %>%
  fmt_missing(1:9, missing_text = '') %>%
  tab_header(
    title = 'Table 3, BH-corrected p values'
  ) %>%
  data_color(
    1:9,
    scales::col_bin(
      bins = c(0,.05,1),
      palette = c('darkred', 'white'),
    )
  ) %>%
  tab_options(
    table.font.size = '80%'
  )

Table 3, BH-corrected p values
	sa_agency	tb_200ms	tb_400ms	tb_1200ms	MI	LoC	likert_SensAtt	likert_TemBin	likert_SAminusTB
sa_agency		0.697	0.925	0.693	0.697	0.693	0.810	0.693	0.693
tb_200ms			0.000	0.005	0.803	0.289	0.791	0.735	0.848
tb_400ms				0.001	0.697	0.042	0.848	0.693	0.697
tb_1200ms					0.202	0.479	0.697	0.586	0.810
MI						0.385	0.700	0.697	0.680
LoC							0.810	0.697	0.697
likert_SensAtt								0.706	0.000
likert_TemBin									0.000
likert_SAminusTB

article_pdf_path = here::here('documents/Dewey_Knoblich_2014.pdf') 

library(tabulizer)

## Warning: package 'tabulizer' was built under R version 4.1.1

article_pdf_path %>%
  extract_tables() -> pdf_tabs

pdf_tab1 = pdf_tabs[[1]][-1,4:12]
diag(pdf_tab1) = ''
pdf_tab1 %>%
  sub(pattern = '*', replacement = '', fixed = TRUE) %>%
  sub(pattern = '^2', replacement = '-') %>%
  as.numeric() %>%
  matrix(nrow = 9) -> pdf_tab1


(round(tab1,2) - pdf_tab1) %>%
  data.frame() %>%
  mutate(row = rownames(.)) %>%
  gt(rowname_col = 'row') %>%
  fmt_number(1:9) %>%
  tab_header(
    title = 'Deviations between calculated table and PDF table 1'
  ) %>%
  data_color(
    1:9,
    scales::col_numeric(
      palette = 'RdBu',
      domain = c(-.3,.3),
      reverse = TRUE
    )
  ) %>%
  fmt_missing(1:9, missing_text = '') %>%
  tab_options(
    table.font.size = '80%'
    )

Deviations between calculated table and PDF table 1
	sa_agency	tb_200ms	tb_400ms	tb_1200ms	MI	LoC	likert_SensAtt	likert_TemBin	likert_SAminusTB
sa_agency		0.00	0.00	0.00	0.00	0.00	0.00	0.00	0.00
tb_200ms			0.00	0.00	0.00	0.00	0.00	0.00	0.00
tb_400ms				0.00	0.00	0.00	0.00	0.00	0.00
tb_1200ms					0.00	0.00	0.00	0.00	0.00
MI						0.00	0.00	0.00	0.00
LoC							0.00	0.00	0.00
likert_SensAtt								0.00	0.00
likert_TemBin									0.00
likert_SAminusTB

pdf_tab2 = pdf_tabs[[2]][,5:13]
diag(pdf_tab2) = ''
pdf_tab2 %>%
  sub(pattern = '*', replacement = '', fixed = TRUE) %>%
  sub(pattern = '^2', replacement = '-') %>%
  as.numeric() %>%
  matrix(nrow = 9) -> pdf_tab2

(round(tab2,2) - pdf_tab2) %>%
  data.frame() %>%
  mutate(row = rownames(.)) %>%
  gt(rowname_col = 'row') %>%
  fmt_number(1:9) %>%
    tab_header(
    title = 'Deviations between calculated table and PDF table 2'
  ) %>%
  data_color(
    1:9,
    scales::col_numeric(
      palette = 'RdBu',
      domain = c(-.25,.25),
      reverse = TRUE
    )
  ) %>%
  fmt_missing(1:9, missing_text = '') %>%
  tab_options(
    table.font.size = '80%'
  )

Deviations between calculated table and PDF table 2
	sa_agency	tb_200ms	tb_400ms	tb_1200ms	MI	LoC	likert_SensAtt	likert_TemBin	likert_SAminusTB
sa_agency		0.06	0.06	−0.03	0.00	0.00	0.00	0.00	0.00
tb_200ms			−0.05	−0.09	−0.01	0.01	0.21	0.06	0.00
tb_400ms				−0.11	−0.08	−0.04	0.05	−0.04	0.00
tb_1200ms					−0.11	−0.06	−0.05	−0.01	0.00
MI						0.00	0.00	0.00	0.00
LoC							0.00	0.00	0.00
likert_SensAtt								0.00	0.00
likert_TemBin									0.00
likert_SAminusTB

# Frustratingly, table 3 does not come out with tabulizer. 
# We need to use OCR.

temppng = tempfile(fileext = '.png')

library(magick)

## Warning: package 'magick' was built under R version 4.1.1

## Linking to ImageMagick 6.9.12.3
## Enabled features: cairo, freetype, fftw, ghostscript, heic, lcms, pango, raw, rsvg, webp
## Disabled features: fontconfig, x11

area_inches = c(4269, 957, 1550, 885) / 600
dpi = 600
area_pixels = area_inches * dpi

article_pdf_path %>%
  pdftools::pdf_convert(
    dpi = dpi,
    pages = 7,
    filenames = temppng
  ) %>%
  image_read() %>%
  image_rotate(90) %>%
  image_crop(
    geometry_area(
      width = area_pixels[1], 
      height = area_pixels[2], 
      x_off = area_pixels[3],
      y_off = area_pixels[4]
      )
    ) %>%
  image_write(path = temppng)

## Warning in sprintf(filenames, pages, format): 2 arguments not used by format 'C:
## \Users\richa\AppData\Local\Temp\RtmpOeYjxB\filedf0159263.png'

## Converting page 7 to C:\Users\richa\AppData\Local\Temp\RtmpOeYjxB\filedf0159263.png... done!

library(tesseract)

## Warning: package 'tesseract' was built under R version 4.1.1

temppng %>%
  tesseract::ocr() %>%
  stringr::str_replace_all(pattern = '—', '-') %>%
  stringr::str_remove_all('\\*|^- ') %>%
  stringr::str_replace_all(pattern = '([- \n])([0-9])', '\\1.\\2') %>%
  stringr::str_replace_all(pattern = '\n- ', '\n') %>%
  strsplit('[\n ]') %>%
  unlist() %>%
  as.numeric() -> lt

# There is one element that doesn't seem to like to OCR properly
lt[9] = -lt[9]

pdf_tab3 = pdf_tab2 + NA
pdf_tab3[lower.tri(pdf_tab3)] = lt
pdf_tab3 = t(pdf_tab3)

(round(tab3,2) - pdf_tab3) %>%
  data.frame() %>%
  mutate(row = rownames(.)) %>%
  gt(rowname_col = 'row') %>%
  fmt_number(1:9) %>%
  tab_header(
    title = 'Deviations between calculated table and PDF table 3'
  ) %>%
  data_color(
    1:9,
    scales::col_numeric(
      palette = 'RdBu',
      domain = c(-.25,.25),
      reverse = TRUE
    )
  ) %>%
  fmt_missing(1:9, missing_text = '') %>%
  tab_options(
    table.font.size = '80%'
  )

Deviations between calculated table and PDF table 3
	sa_agency	tb_200ms	tb_400ms	tb_1200ms	MI	LoC	likert_SensAtt	likert_TemBin	likert_SAminusTB
sa_agency		−0.10	0.00	0.06	0.00	0.00	0.00	0.00	0.00
tb_200ms			−0.11	−0.02	0.07	−0.04	−0.07	0.20	0.00
tb_400ms				−0.06	0.05	−0.11	−0.07	−0.02	0.00
tb_1200ms					−0.08	0.01	0.06	−0.08	0.00
MI						0.00	0.00	0.00	0.00
LoC							0.00	0.00	0.00
likert_SensAtt								0.00	0.00
likert_TemBin									0.00
likert_SAminusTB

04_correlation_analysis.R

richa

2021-10-21