polimp_analysis

rm(list = ls())
library(jsonlite)
library(ggplot2)
library(tidyr)
source("../helper/useful.R")

raw.data.path <- "../../production-results/"

## LOOP TO READ IN FILES
all.data <- data.frame()
files <- dir(raw.data.path,pattern="*.json")

for (file.name in files) {
  print(file.name)
  
  ## these are the two functions that are most meaningful
  json_file <- readLines(paste(raw.data.path,file.name,sep=""))
  json_file_str = paste(json_file, collapse = "")
  json_file_str = gsub(",}", "}", json_file_str)
  jso = jsonlite::fromJSON(json_file_str)
  jso1 <- data.frame(jso)
  jso1$subid <- substring(file.name, 1, 6)
  
  ## now here's where data get bound together
  all.data <- rbind(all.data, jso1)
}

## [1] "30BXRYBRP4XKPAH4KG7IDEG9Y8GHWV.json"
## [1] "31QTRG6Q2TDGRWASIUXB7K3GV8YYPQ.json"
## [1] "323Q6SJS8IGIEOV9URXI9QU935VHF2.json"
## [1] "33FBRBDW6OZII1A60LDBO7G7399C83.json"
## [1] "33L7PJKHCGYZ4ICHM781DHGBRVC8TL.json"
## [1] "340UGXU9DY1CF7NJU59LARMLL7IVU2.json"
## [1] "354P56DE9K3UP4UJN2B5PCOUESOS7P.json"
## [1] "354P56DE9K3UP4UJN2B5PCOUESSS7T.json"
## [1] "35GCEFQ6I5O9PXFAGI2N8O9HW543Z2.json"
## [1] "35GMH2SV3EH00RH03T82BH6A3QPOEA.json"
## [1] "35GMH2SV3EH00RH03T82BH6A3RXOEK.json"
## [1] "36AHBNMV1RC72Z72DL6G7QMRHONYD2.json"
## [1] "36H9ULYP62UE5UQZIR03L3AOGXWFJ2.json"
## [1] "36WLNQG78ZAGHXSRZO3G5X6BWSEEB6.json"
## [1] "39LNWE0K4UW7T13JVYA2V2WRD6SIU0.json"
## [1] "3BV8HQ2ZZW17L8UKNSKMYYUSXB0A6F.json"
## [1] "3DQQ64TANGLC2RFS3TFNMNI0KZQPW8.json"
## [1] "3EJPLAJKEMG8MG2SZK0LFRL16W56ZA.json"
## [1] "3EO896NRAWVOOZ80DB5S27PP764JTE.json"
## [1] "3F1567XTNW5MQ73ZAVKK880ZRUT9QV.json"
## [1] "3G5W44VEU7IFUESCBW6X43ADW1YGK3.json"
## [1] "3GM6G9ZBKNXEP7ELG8IKIZH6B15TM0.json"
## [1] "3IUZPWIU1O7BR0ICQ1AFQ6W1QQSKWC.json"
## [1] "3IXQG4FA2TYZM11ARDNUDJ505UWB9P.json"
## [1] "3J2UYBXQQLCBKVW2VQNT9SIFOQT06L.json"
## [1] "3JJVG1YBEBXGLEZYONAD130YJZFB52.json"
## [1] "3JV9LGBJWTEYK5DR9OYQBKRMG66GOL.json"
## [1] "3MD9PLUKKIEGT18KY1M2H27N1N1NZA.json"
## [1] "3NLZY2D53PPHR948ZSNADHRUROCQLB.json"
## [1] "3NVC2EB65QZ9K755KW04UH0FQBZY35.json"
## [1] "3OE22WJIGIOKAZR286FB44ND4G8QUA.json"
## [1] "3OLF68YTN91341NVOLNQS475QB8FAU.json"
## [1] "3OONKJ5DKCJ2TO5GT26CA90P0TCOBA.json"
## [1] "3OSWBBLG1EXI2UHS8PKEOCFIFJ0XD4.json"
## [1] "3PEIJLRY6TTHB0HJPKP4G043MZ6WXC.json"
## [1] "3R2UR8A0IAG96R8HYYYUZ6RHJW1XOT.json"
## [1] "3RANCT1ZVFHX6TPDPOIL9FSEIG7UB2.json"
## [1] "3RUIQRXJBBO60COV7NFEWVS0HILLLD.json"
## [1] "3RWE2M8QWHAJRG2UU79AB341JGHN0H.json"
## [1] "3RXCAC0YIRPVZDQB279UEY1M128G8Q.json"
## [1] "3S0TNUHWKTISNTGK0H8M8DYPO9P8D3.json"
## [1] "3SB5N7Y3O34DHOL873WFLJV4I3PG0U.json"
## [1] "3SEPORI8WNZ99IEVPXHXVWNN2G2AZD.json"
## [1] "3TPWUS5F891T826OYOTH2NJTLGCCWI.json"
## [1] "3TVRFO09GKF1065KL6I27PNNOS2XLK.json"
## [1] "3WEV0KO0OMSA6DVTEFERP4YQ04ZDSD.json"
## [1] "3WYP994K17R8HQSN3YY21KCZ0236YL.json"
## [1] "3XM0HYN6NKZILRT1ITLH0HB0DWSPE1.json"
## [1] "3XXU1SWE8MVC7X85L345PXK1K9IA0K.json"
## [1] "3Y54SXRO1LLX2FJUPA9699Y0OM0TUI.json"

Filter out participants and clean up.

d <- all.data %>%
  select(subid, answer.scale, answer.judgment) %>%
  rename(judgment = answer.judgment) %>%
  filter(answer.scale != "training1" & answer.scale != "training2") %>%
  separate(answer.scale, into = c("utterance", "inference"), sep = "_") 

d$inference <- as.factor(d$inference)

d1 <- d %>%
  mutate(bound = factor(substring(inference, 1, 3),
                           levels = c("and", "but"),
                           labels = c("lower", "upper")),
         scale = factor(as.numeric(grepl("some", utterance)), 
                        levels = c(0, 1), 
                        labels = c("ad-hoc","scalar")),
         utt_valence = factor(as.numeric(grepl("love", utterance, 
                                               ignore.case=TRUE)), 
                            levels = c(0, 1), 
                            labels = c("hate", "love")),
         condition = str_c(utt_valence, "-", inference))

qplot(judgment, fill = condition, position = "dodge", 
      data = filter(d1, scale == "scalar")) + 
  facet_grid(~bound)

plot of chunk unnamed-chunk-2

# proportions
d1$possible <- d1$judgment
levels(d1$possible) <- c(0, 1)
d1$possible <- as.numeric(as.character(d1$possible))

mss <- aggregate(possible ~ condition + subid, subset(d1, scale == "scalar"), mean)
ms <- aggregate(possible ~ condition, subset(d1, scale == "scalar"), mean)
ms$cih <- aggregate(possible ~ condition, mss, ci.high)$possible
ms$cil <- aggregate(possible ~ condition, mss, ci.low)$possible

qplot(condition, possible, fill=condition, stat="identity",
      geom="bar", ylim=c(0,1),
      data = ms) +
    geom_errorbar(aes(ymin=possible-cil,ymax=possible+cih,width=.2)) +
  ylab("Proportion \"Possible\" responses")

plot of chunk unnamed-chunk-3

# chi-square - hate vs. love
# those who said 'YES, possible that all'
d_yes <- table(d1[d1$judgment == "yes",]$condition)
chisq.test(d_yes)

## 
##  Chi-squared test for given probabilities
## 
## data:  d_yes
## X-squared = 1.778, df = 1, p-value = 0.1824

# those who said 'NOT possible that all'
d_no <- table(d1[d1$judgment == "no",]$condition)
chisq.test(d_no)

## 
##  Chi-squared test for given probabilities
## 
## data:  d_no
## X-squared = 1, df = 1, p-value = 0.3173

# wilcox test (Bonnefon used this?): whether distribution of responses is identical in the two conditions
wilcox.test(possible ~ condition, data=d1)

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  possible by condition
## W = 1450, p-value = 0.09809
## alternative hypothesis: true location shift is not equal to 0

polimp_analysis

EJY, MCF

April 1, 2015