source(here::here('R/00_read_data.R'))
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Create a correlation matrix as in described in
# Dewey & Knoblich (2014):
# 
# "All reported correlations are Pearson’s R,
# except for the values in columns seven and eight, which are
# Spearman’s Rho due to the non-normality of the Likert scale
# ratings." (p. 5)
#
# @param df 
# @param .test return (uncorrected) p values instead of correlations?
#
create_cor_mat = function(df, .test = FALSE){
  df %>%
    select(-(1:3)) -> df
  
  df %>%
    as.matrix() %>%
    Hmisc::rcorr(type = 'p') -> tab
  
  df %>%
    as.matrix() %>%
    Hmisc::rcorr(type = 's') -> spearman_tab
  
  if(.test){
    tab = tab[[3]]
    spearman_tab = spearman_tab[[3]]
  }else{
    tab = tab[[1]]
    spearman_tab = spearman_tab[[1]]
  }
  
  tab[,7:8] = spearman_tab[,7:8]
  tab[lower.tri(tab)] = NA 
  diag(tab) = NA
  return(tab)
}


library(tidyr)

tb_data %>%
  pivot_wider(
    id_cols = c(subid, gender, order, delay),
    values_from = estimated_interval,
    names_from = condition) %>%
  mutate(tb_agency = Observational - Operant) %>%
  pivot_wider(
    id_cols = c(subid, gender, order),
    names_from = delay,
    values_from = tb_agency, names_prefix = 'tb_'
  ) -> tb_agency

sa_data %>%
  pivot_wider(
    id_cols = c(subid, gender, order),
    values_from = pse,
    names_from = condition) %>%
  mutate(
    sa_agency = Observational - Operant
    ) %>%
  select(-Operant, -Observational) -> sa_agency

agency_data %>%
  pivot_wider(
    id_cols = c(subid, gender, order),
    values_from = agency,
    names_from = task,
    names_prefix = 'likert_') -> likert_agency
  
sa_agency %>%
  full_join(tb_agency) %>%
  full_join(likert_agency) -> all_agency_measures
## Joining, by = c("subid", "gender", "order")
## Joining, by = c("subid", "gender", "order")
# Grab magical ideation and Locus of Control from
# correlation data
correlation_data %>%
  select(subid, MI, LoC) %>%
  right_join(all_agency_measures) %>%
  select(
    subid, gender, order, 
    sa_agency, matches('tb_'), 
    MI, LoC, likert_SensAtt, likert_TemBin
    ) %>%
  mutate(
    likert_SAminusTB = likert_SensAtt - likert_TemBin
    ) -> all_agency_measures
## Joining, by = "subid"
library(gt)
## Warning: package 'gt' was built under R version 4.1.1
# Table 1
all_agency_measures %>%
  create_cor_mat() -> tab1 

all_agency_measures %>%
  create_cor_mat(.test = TRUE) %>%
  stats::p.adjust(method = 'BH', n = sum(!is.na(.))) %>%
  matrix(nrow = nrow(tab1)) %>%
  `dimnames<-`(dimnames(tab1))-> tab1_corrected_p

tab1 %>%
  as.data.frame() %>%
  mutate(row = rownames(.)) %>%
  gt(rowname_col = 'row') %>%
  fmt_number(1:9,decimals = 3) %>%
  fmt_missing(1:9, missing_text = '') %>%
  tab_header(
    title = 'Table 1',
    subtitle = 'Correlations between implicit and explicit measure of the SoA, all data.'
  ) %>%
  tab_options(
    table.font.size = '80%'
  ) %>%
  tab_footnote(
    'c.f. Dewey & Knoblich, 2014, p. 6',
    locations = cells_title('title')
  )
Table 11
Correlations between implicit and explicit measure of the SoA, all data.
sa_agency tb_200ms tb_400ms tb_1200ms MI LoC likert_SensAtt likert_TemBin likert_SAminusTB
sa_agency 0.131 0.221 0.255 −0.072 −0.104 0.166 −0.022 0.126
tb_200ms 0.724 0.414 0.039 −0.084 0.214 0.173 0.032
tb_400ms 0.567 −0.033 −0.163 0.114 0.079 0.003
tb_1200ms −0.160 −0.112 0.134 0.130 −0.011
MI 0.348 −0.113 −0.039 −0.033
LoC −0.005 0.022 −0.038
likert_SensAtt 0.334 0.514
likert_TemBin −0.697
likert_SAminusTB

1 c.f. Dewey & Knoblich, 2014, p. 6

tab1_corrected_p %>%
  as.data.frame() %>%
  mutate(row = rownames(.)) %>%
  gt(rowname_col = 'row') %>%
  fmt_number(1:9,decimals = 3) %>%
  fmt_missing(1:9, missing_text = '') %>%
  tab_header(
    title = 'Table 1, BH-corrected p values'
  ) %>%
  data_color(
    1:9,
    scales::col_bin(
      bins = c(0,.05,1),
      palette = c('darkred', 'white'),
    )
  ) %>%
  tab_options(
    table.font.size = '80%'
  )
Table 1, BH-corrected p values
sa_agency tb_200ms tb_400ms tb_1200ms MI LoC likert_SensAtt likert_TemBin likert_SAminusTB
sa_agency 0.543 0.208 0.110 0.764 0.596 0.414 0.928 0.543
tb_200ms 0.000 0.001 0.910 0.725 0.217 0.414 0.910
tb_400ms 0.000 0.910 0.414 0.563 0.735 0.977
tb_1200ms 0.414 0.563 0.543 0.543 0.977
MI 0.011 0.563 0.910 0.910
LoC 0.977 0.928 0.910
likert_SensAtt 0.015 0.000
likert_TemBin 0.000
likert_SAminusTB
# Table 2
all_agency_measures %>%
  filter(order == 'SenAtt-TemBin') %>%
  create_cor_mat() -> tab2

all_agency_measures %>%
  filter(order == 'SenAtt-TemBin') %>%
  create_cor_mat(.test = TRUE) %>%
  stats::p.adjust(method = 'BH', n = sum(!is.na(.))) %>%
  matrix(nrow = nrow(tab1)) %>%
  `dimnames<-`(dimnames(tab2))-> tab2_corrected_p


tab2 %>%
  as.data.frame() %>%
  mutate(row = rownames(.)) %>%
  gt(rowname_col = 'row') %>%
  fmt_number(1:9,decimals = 2) %>%
  fmt_missing(1:9, missing_text = '') %>%
  tab_header(
    title = 'Table 2',
    subtitle = 'Correlations among implicit and explicit measure of the SoA, SA-TB task order.'
    ) %>%
  tab_options(
    table.font.size = '80%'
  ) %>%
  tab_footnote(
    'c.f. Dewey & Knoblich, 2014, p. 6',
    locations = cells_title('title')
  )
Table 21
Correlations among implicit and explicit measure of the SoA, SA-TB task order.
sa_agency tb_200ms tb_400ms tb_1200ms MI LoC likert_SensAtt likert_TemBin likert_SAminusTB
sa_agency 0.30 0.37 0.40 −0.06 −0.06 0.33 0.07 0.14
tb_200ms 0.74 0.35 0.10 0.04 0.31 0.23 0.03
tb_400ms 0.56 0.02 0.00 0.23 0.00 0.12
tb_1200ms −0.03 −0.01 0.10 0.07 0.03
MI 0.40 −0.09 −0.13 0.12
LoC −0.02 0.15 −0.21
likert_SensAtt 0.50 0.28
likert_TemBin −0.71
likert_SAminusTB

1 c.f. Dewey & Knoblich, 2014, p. 6

tab2_corrected_p %>%
  as.data.frame() %>%
  mutate(row = rownames(.)) %>%
  gt(rowname_col = 'row') %>%
  fmt_number(1:9,decimals = 3) %>%
  fmt_missing(1:9, missing_text = '') %>%
  tab_header(
    title = 'Table 2, BH-corrected p values'
  ) %>%
  data_color(
    1:9,
    scales::col_bin(
      bins = c(0,.05,1),
      palette = c('darkred', 'white'),
    )
  ) %>%
  tab_options(
    table.font.size = '80%'
  )
Table 2, BH-corrected p values
sa_agency tb_200ms tb_400ms tb_1200ms MI LoC likert_SensAtt likert_TemBin likert_SAminusTB
sa_agency 0.205 0.100 0.075 0.939 0.939 0.167 0.939 0.819
tb_200ms 0.000 0.124 0.886 0.983 0.205 0.427 0.983
tb_400ms 0.002 0.983 0.992 0.427 0.992 0.827
tb_1200ms 0.983 0.983 0.886 0.939 0.983
MI 0.075 0.886 0.827 0.827
LoC 0.983 0.805 0.474
likert_SensAtt 0.012 0.266
likert_TemBin 0.000
likert_SAminusTB
# Table 3
all_agency_measures %>%
  filter(order == 'TemBin-SenAtt') %>%
  create_cor_mat() -> tab3

all_agency_measures %>%
  filter(order == 'TemBin-SenAtt') %>%
  create_cor_mat(.test = TRUE) %>%
  stats::p.adjust(method = 'BH', n = sum(!is.na(.))) %>%
  matrix(nrow = nrow(tab1)) %>%
  `dimnames<-`(dimnames(tab3))-> tab3_corrected_p


tab3 %>%
  as.data.frame() %>%
  mutate(row = rownames(.)) %>%
  gt(rowname_col = 'row') %>%
  fmt_number(1:9,decimals = 2) %>%
  fmt_missing(1:9, missing_text = '') %>%
  tab_header(
    title = 'Table 3',
    subtitle = 'Correlations among implicit and explicit measure of the SoA, TB-SA task order.'
  ) %>%
  tab_options(
    table.font.size = '80%'
  ) %>%
  tab_footnote(
    'c.f. Dewey & Knoblich, 2014, p. 7',
    locations = cells_title('title')
  )
Table 31
Correlations among implicit and explicit measure of the SoA, TB-SA task order.
sa_agency tb_200ms tb_400ms tb_1200ms MI LoC likert_SensAtt likert_TemBin likert_SAminusTB
sa_agency −0.15 0.02 0.18 −0.12 −0.16 0.06 −0.17 0.18
tb_200ms 0.69 0.52 −0.07 −0.30 0.08 0.09 0.04
tb_400ms 0.59 −0.12 −0.43 −0.04 0.17 −0.12
tb_1200ms −0.33 −0.24 0.13 0.22 −0.05
MI 0.27 −0.11 0.13 −0.20
LoC 0.06 −0.12 0.14
likert_SensAtt 0.10 0.71
likert_TemBin −0.72
likert_SAminusTB

1 c.f. Dewey & Knoblich, 2014, p. 7

tab3_corrected_p %>%
  as.data.frame() %>%
  mutate(row = rownames(.)) %>%
  gt(rowname_col = 'row') %>%
  fmt_number(1:9,decimals = 3) %>%
  fmt_missing(1:9, missing_text = '') %>%
  tab_header(
    title = 'Table 3, BH-corrected p values'
  ) %>%
  data_color(
    1:9,
    scales::col_bin(
      bins = c(0,.05,1),
      palette = c('darkred', 'white'),
    )
  ) %>%
  tab_options(
    table.font.size = '80%'
  )
Table 3, BH-corrected p values
sa_agency tb_200ms tb_400ms tb_1200ms MI LoC likert_SensAtt likert_TemBin likert_SAminusTB
sa_agency 0.697 0.925 0.693 0.697 0.693 0.810 0.693 0.693
tb_200ms 0.000 0.005 0.803 0.289 0.791 0.735 0.848
tb_400ms 0.001 0.697 0.042 0.848 0.693 0.697
tb_1200ms 0.202 0.479 0.697 0.586 0.810
MI 0.385 0.700 0.697 0.680
LoC 0.810 0.697 0.697
likert_SensAtt 0.706 0.000
likert_TemBin 0.000
likert_SAminusTB
article_pdf_path = here::here('documents/Dewey_Knoblich_2014.pdf') 

library(tabulizer)
## Warning: package 'tabulizer' was built under R version 4.1.1
article_pdf_path %>%
  extract_tables() -> pdf_tabs

pdf_tab1 = pdf_tabs[[1]][-1,4:12]
diag(pdf_tab1) = ''
pdf_tab1 %>%
  sub(pattern = '*', replacement = '', fixed = TRUE) %>%
  sub(pattern = '^2', replacement = '-') %>%
  as.numeric() %>%
  matrix(nrow = 9) -> pdf_tab1


(round(tab1,2) - pdf_tab1) %>%
  data.frame() %>%
  mutate(row = rownames(.)) %>%
  gt(rowname_col = 'row') %>%
  fmt_number(1:9) %>%
  tab_header(
    title = 'Deviations between calculated table and PDF table 1'
  ) %>%
  data_color(
    1:9,
    scales::col_numeric(
      palette = 'RdBu',
      domain = c(-.3,.3),
      reverse = TRUE
    )
  ) %>%
  fmt_missing(1:9, missing_text = '') %>%
  tab_options(
    table.font.size = '80%'
    )
Deviations between calculated table and PDF table 1
sa_agency tb_200ms tb_400ms tb_1200ms MI LoC likert_SensAtt likert_TemBin likert_SAminusTB
sa_agency 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
tb_200ms 0.00 0.00 0.00 0.00 0.00 0.00 0.00
tb_400ms 0.00 0.00 0.00 0.00 0.00 0.00
tb_1200ms 0.00 0.00 0.00 0.00 0.00
MI 0.00 0.00 0.00 0.00
LoC 0.00 0.00 0.00
likert_SensAtt 0.00 0.00
likert_TemBin 0.00
likert_SAminusTB
pdf_tab2 = pdf_tabs[[2]][,5:13]
diag(pdf_tab2) = ''
pdf_tab2 %>%
  sub(pattern = '*', replacement = '', fixed = TRUE) %>%
  sub(pattern = '^2', replacement = '-') %>%
  as.numeric() %>%
  matrix(nrow = 9) -> pdf_tab2

(round(tab2,2) - pdf_tab2) %>%
  data.frame() %>%
  mutate(row = rownames(.)) %>%
  gt(rowname_col = 'row') %>%
  fmt_number(1:9) %>%
    tab_header(
    title = 'Deviations between calculated table and PDF table 2'
  ) %>%
  data_color(
    1:9,
    scales::col_numeric(
      palette = 'RdBu',
      domain = c(-.25,.25),
      reverse = TRUE
    )
  ) %>%
  fmt_missing(1:9, missing_text = '') %>%
  tab_options(
    table.font.size = '80%'
  )
Deviations between calculated table and PDF table 2
sa_agency tb_200ms tb_400ms tb_1200ms MI LoC likert_SensAtt likert_TemBin likert_SAminusTB
sa_agency 0.06 0.06 −0.03 0.00 0.00 0.00 0.00 0.00
tb_200ms −0.05 −0.09 −0.01 0.01 0.21 0.06 0.00
tb_400ms −0.11 −0.08 −0.04 0.05 −0.04 0.00
tb_1200ms −0.11 −0.06 −0.05 −0.01 0.00
MI 0.00 0.00 0.00 0.00
LoC 0.00 0.00 0.00
likert_SensAtt 0.00 0.00
likert_TemBin 0.00
likert_SAminusTB
# Frustratingly, table 3 does not come out with tabulizer. 
# We need to use OCR.

temppng = tempfile(fileext = '.png')

library(magick)
## Warning: package 'magick' was built under R version 4.1.1
## Linking to ImageMagick 6.9.12.3
## Enabled features: cairo, freetype, fftw, ghostscript, heic, lcms, pango, raw, rsvg, webp
## Disabled features: fontconfig, x11
area_inches = c(4269, 957, 1550, 885) / 600
dpi = 600
area_pixels = area_inches * dpi

article_pdf_path %>%
  pdftools::pdf_convert(
    dpi = dpi,
    pages = 7,
    filenames = temppng
  ) %>%
  image_read() %>%
  image_rotate(90) %>%
  image_crop(
    geometry_area(
      width = area_pixels[1], 
      height = area_pixels[2], 
      x_off = area_pixels[3],
      y_off = area_pixels[4]
      )
    ) %>%
  image_write(path = temppng)
## Warning in sprintf(filenames, pages, format): 2 arguments not used by format 'C:
## \Users\richa\AppData\Local\Temp\RtmpOeYjxB\filedf0159263.png'
## Converting page 7 to C:\Users\richa\AppData\Local\Temp\RtmpOeYjxB\filedf0159263.png... done!
library(tesseract)
## Warning: package 'tesseract' was built under R version 4.1.1
temppng %>%
  tesseract::ocr() %>%
  stringr::str_replace_all(pattern = '—', '-') %>%
  stringr::str_remove_all('\\*|^- ') %>%
  stringr::str_replace_all(pattern = '([- \n])([0-9])', '\\1.\\2') %>%
  stringr::str_replace_all(pattern = '\n- ', '\n') %>%
  strsplit('[\n ]') %>%
  unlist() %>%
  as.numeric() -> lt

# There is one element that doesn't seem to like to OCR properly
lt[9] = -lt[9]

pdf_tab3 = pdf_tab2 + NA
pdf_tab3[lower.tri(pdf_tab3)] = lt
pdf_tab3 = t(pdf_tab3)

(round(tab3,2) - pdf_tab3) %>%
  data.frame() %>%
  mutate(row = rownames(.)) %>%
  gt(rowname_col = 'row') %>%
  fmt_number(1:9) %>%
  tab_header(
    title = 'Deviations between calculated table and PDF table 3'
  ) %>%
  data_color(
    1:9,
    scales::col_numeric(
      palette = 'RdBu',
      domain = c(-.25,.25),
      reverse = TRUE
    )
  ) %>%
  fmt_missing(1:9, missing_text = '') %>%
  tab_options(
    table.font.size = '80%'
  )
Deviations between calculated table and PDF table 3
sa_agency tb_200ms tb_400ms tb_1200ms MI LoC likert_SensAtt likert_TemBin likert_SAminusTB
sa_agency −0.10 0.00 0.06 0.00 0.00 0.00 0.00 0.00
tb_200ms −0.11 −0.02 0.07 −0.04 −0.07 0.20 0.00
tb_400ms −0.06 0.05 −0.11 −0.07 −0.02 0.00
tb_1200ms −0.08 0.01 0.06 −0.08 0.00
MI 0.00 0.00 0.00 0.00
LoC 0.00 0.00 0.00
likert_SensAtt 0.00 0.00
likert_TemBin 0.00
likert_SAminusTB