####WORKSAPCE SETTING####

#working directory
setwd("C:/Users/vitto/Desktop/Ethicub/Data analysis")

#libraries
library(dplyr)
## 
## Caricamento pacchetto: 'dplyr'
## I seguenti oggetti sono mascherati da 'package:stats':
## 
##     filter, lag
## I seguenti oggetti sono mascherati da 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(stringr)
library(magrittr)
## 
## Caricamento pacchetto: 'magrittr'
## Il seguente oggetto รจ mascherato da 'package:tidyr':
## 
##     extract
#clean all saved objects
rm(list=ls())

####SCORING LAB SOSCI DATA####

#reading dataset
LabData <- read.csv("data_ethicub.csv",sep=";") %>%
  #primi dati sono test, che taglio
  slice(.,7:nrow(.))

###Scoring###

#Agency + creation dataframe
df <- LabData %>%
  select(contains("AE01")) %>%
  mutate_if(is.character,as.numeric) %>%
  apply(1,mean) %>%
  as.matrix(ncol=1) %>%
  data.frame %>%
  rename(agency_POST=1) %>%
  round(2)

#Experience
df$exp <- LabData %>%
  select(contains("AE02")) %>%
  mutate_if(is.character,as.numeric) %>%
  apply(1,mean)%>%
  round(2)

#IOS
df$IOS <- LabData %>%
  select(contains("IO01")) %>%
  mutate_if(is.character,as.numeric) %>%
  unlist %>%
  as.vector %>%
  round(2)

#likeability
df$like_POST <- LabData %>%
  select(contains("LI01")) %>%
  mutate_if(is.character,as.numeric) %>%
  mutate(
    LI01_04_R = 6 - LI01_04,
    LI01_04 = NULL
  ) %>%
  apply(1,mean) %>%
  round(2)

#MAS ROBOT
df$MAS_Res_iCub <- LabData %>%
  select(MA01_02, MA01_03, MA01_05, MA01_07, MA01_09, MA01_12, MA01_14) %>%
  mutate_if(is.character,as.numeric) %>%
  apply(1,mean)%>%
  round(2)

df$MAS_Ext_iCub <- LabData %>%
  select(MA01_04, MA01_06, MA01_10, MA01_11) %>%
  mutate_if(is.character,as.numeric) %>%
  #all are reverse
  apply(c(1,2),function(x){5-x})%>%
  apply(1,mean)%>%
  round(2)

df$MAS_Group_iCub <- LabData %>%
  select(MA01_01, MA01_08, MA01_13) %>%
  mutate_if(is.character,as.numeric) %>%
  mutate(
    MA01_01_R = 5-MA01_01,
    MA01_01 = NULL,
    MA01_13_R = 5-MA01_13,
    MA01_13 = NULL,
  ) %>%
  apply(1,mean)%>%
  round(2)

#MAS HUMAN

df$MAS_Res_Human <- LabData %>%
  select(MA02_02, MA02_03, MA02_05, MA02_07, MA02_09, MA02_12, MA02_14) %>%
  mutate_if(is.character,as.numeric) %>%
  apply(1,mean)%>%
  round(2)

df$MAS_Ext_Human <- LabData %>%
  select(MA02_04, MA02_06, MA02_10, MA02_11) %>%
  mutate_if(is.character,as.numeric) %>%
  #all are reverse
  apply(c(1,2),function(x){5-x})%>%
  apply(1,mean)%>%
  round(2)

df$MAS_Group_Human <- LabData %>%
  select(MA02_01, MA02_08, MA02_13) %>%
  mutate_if(is.character,as.numeric) %>%
  mutate(
    MA02_01_R = 5-MA02_01,
    MA02_01 = NULL,
    MA02_13_R = 5-MA02_13,
    MA02_13 = NULL,
  ) %>%
  apply(1,mean)%>%
  round(2)

#Need for cognition
df$nc <- LabData %>%
  select(contains("NC01")) %>%
  mutate_if(is.character,as.numeric) %>%
  mutate(
    NC01_03_R = 5-NC01_03,
    NC01_03 = NULL,
    NC01_04_R = 5-NC01_04,
    NC01_04 = NULL,
    NC01_05_R = 5-NC01_05,
    NC01_05 = NULL,
    NC01_07_R = 5-NC01_07,
    NC01_07 = NULL,
    NC01_08_R = 5-NC01_08,
    NC01_08 = NULL,
    NC01_09_R = 5-NC01_09,
    NC01_09 = NULL,
    NC01_12_R = 5-NC01_12,
    NC01_12 = NULL,
    NC01_16_R = 5-NC01_16,
    NC01_16 = NULL,
    NC01_17_R = 5-NC01_17,
    NC01_17 = NULL,
  ) %>%
  apply(1,mean)%>%
  round(2)

#sense of agency
df$SoNA <- LabData %>%
  select(PE01_01, PE01_04, PE01_08, PE01_09, PE01_12, PE01_13) %>%
  mutate_if(is.character,as.numeric) %>%
  apply(1,mean)%>%
  round(2)

df$SoPA <- LabData %>%
  select(PE01_02, PE01_03, PE01_05, PE01_06, PE01_07, PE01_10, PE01_11) %>%
  mutate_if(is.character,as.numeric) %>%
  apply(1,mean)%>%
  round(2)

#Big Five
df$BFI_ext <- LabData %>%
  select(PE02_01, PE02_06, PE02_11, PE02_16, PE02_21, PE02_26) %>%
  mutate_if(is.character,as.numeric) %>%
  mutate(
    PE02_01_R = 6-PE02_01,
    PE02_01 = NULL,
    PE02_21_R = 6-PE02_21,
    PE02_21 = NULL,
    PE02_26_R = 6-PE02_26,
    PE02_26 = NULL,
  ) %>%
  apply(1,mean)%>%
  round(2)

df$BFI_agr <- LabData %>%
  select(PE02_02, PE02_07, PE02_12, PE02_17, PE02_22, PE02_27) %>%
  mutate_if(is.character,as.numeric) %>%
  mutate(
    PE02_07_R = 6-PE02_07,
    PE02_07 = NULL,
    PE02_17_R = 6-PE02_17,
    PE02_17 = NULL,
    PE02_27_R = 6-PE02_27,
    PE02_27 = NULL,
  ) %>%
  apply(1,mean)%>%
  round(2)

df$BFI_con <- LabData %>%
  select(PE02_03, PE02_08, PE02_13, PE02_18, PE02_23, PE02_28) %>%
  mutate_if(is.character,as.numeric) %>%
  mutate(
    PE02_03_R = 6-PE02_03,
    PE02_03 = NULL,
    PE02_08_R = 6-PE02_08,
    PE02_08 = NULL,
    PE02_28_R = 6-PE02_28,
    PE02_28 = NULL,
  ) %>%
  apply(1,mean)%>%
  round(2)

df$BFI_neg <- LabData %>%
  select(PE02_04, PE02_09, PE02_14, PE02_19, PE02_24, PE02_29) %>%
  mutate_if(is.character,as.numeric) %>%
  mutate(
    PE02_14_R = 6-PE02_14,
    PE02_14 = NULL,
    PE02_19_R = 6-PE02_19,
    PE02_19 = NULL,
    PE02_24_R = 6-PE02_24,
    PE02_24 = NULL,
  ) %>%
  apply(1,mean)%>%
  round(2)

df$BFI_ope <- LabData %>%
  select(PE02_05, PE02_10, PE02_15, PE02_20, PE02_25) %>%
  mutate_if(is.character,as.numeric) %>%
  mutate(
    PE02_10_R = 6-PE02_10,
    PE02_10 = NULL,
    PE02_20_R = 6-PE02_20,
    PE02_20 = NULL,
   # PE02_30_R = 6-PE02_30,
   # PE02_30 = NULL,
  ) %>%
  apply(1,mean)%>%
  round(2)

#Warmth & Competence
df$warmth_POST <- LabData %>%
  select(contains("WC01")) %>%
  mutate_if(is.character,as.numeric) %>%
  apply(1,mean)%>%
  round(2)

df$warmth_PRE <- LabData %>%
  select(contains("WC03")) %>%
  mutate_if(is.character,as.numeric) %>%
  apply(1,mean)%>%
  round(2)

df$comp_POST <- LabData %>%
  select(contains("WC02")) %>%
  mutate_if(is.character,as.numeric) %>%
  apply(1,mean)%>%
  round(2)

df$comp_PRE <- LabData %>%
  select(contains("WC04")) %>%
  mutate_if(is.character,as.numeric) %>%
  apply(1,mean)%>%
  round(2)


#ID
df$ID1 <- LabData %>%
  select(contains("WE05")) %>%
  unlist %>%
  gsub("'","",.) %>%
  as.vector

####SCORING HOME SOSCI DATA####
HomeData <- read.csv("data_ethicub-pre.csv") %>%
  slice(.,2:nrow(.))

#####Matching with LabData#####

#ID
ID2 <- HomeData %>%
  select(WE01_01) %>%
  unlist %>%
  gsub("'","",.) %>%
  as.vector 

#Keep only people who altready anwered lab quest.
homeData_paired <- HomeData[ID2 %in% df$ID1,]
ID2_paired <- ID2[ID2 %in% df$ID1]

#There are duplicated answers. Keeping last one (assuming at home they failed
#first time and then corrected it. Hardly they did it correctly and went to do
#it again.)
homeData_paired_undupl <- homeData_paired[!duplicated(ID2_paired,fromLast=T),]
ID2_paired_undupl <- ID2_paired[!duplicated(ID2_paired,fromLast=T)]

#Ordering HomeData in the same way of LabData
homeData_paired_undupl_ordered <- homeData_paired_undupl[match(df$ID1,ID2_paired_undupl),]

#shortening variable name and deleting useless ones
HomeData_Clean <- homeData_paired_undupl_ordered
rm(homeData_paired,homeData_paired_undupl,homeData_paired_undupl_ordered)

#####Cleaning and inclusion in dataframe####
#Naming house data as OUT to separate from PRE administration in lab

#likeability
df$like_OUT <- HomeData_Clean %>%
  select(contains("LK01")) %>%
  mutate_if(is.character,as.numeric) %>%
  mutate(
    LK01_04_R = 6-LK01_04,
    LK01_04 = NULL
  ) %>%
  apply(1,mean)%>%
  round(2)

#experience
df$exp_OUT <- HomeData_Clean %>%
  select(contains("LK02")) %>%
  mutate_if(is.character,as.numeric) %>%
  apply(1,mean)%>%
  round(2)

#agency
df$agency_OUT <- HomeData_Clean %>%
  select(contains("LK03")) %>%
  mutate_if(is.character,as.numeric) %>%
  apply(1,mean)%>%
  round(2)

#competence
df$competence_OUT <- HomeData_Clean %>%
  select(contains("WC01")) %>%
  mutate_if(is.character,as.numeric) %>%
  apply(1,mean)%>%
  round(2)

#warmth
df$warm_OUT <- HomeData_Clean %>%
  select(contains("WC02")) %>%
  mutate_if(is.character,as.numeric) %>%
  apply(1,mean)%>%
  round(2)

#age
df$age <- HomeData_Clean %>%
  select(contains("WE04")) %>%
  mutate_if(is.character,as.numeric) %>%
  unlist

#gender
df$gen <- HomeData_Clean %>%
  select(contains("WE03")) %>%
  lapply(.,function(x){gsub(1,"f",x)}) %>%
  lapply(.,function(x){gsub(2,"m",x)}) %>%
  lapply(.,function(x){gsub(3,"nb",x)}) %>%
  lapply(.,function(x){gsub(4,"o",x)}) %>%
  unlist %>%
  as.vector %>%
  as.factor

#Met icub
df$met <- HomeData_Clean %>%
  select(contains("WE02")) %>%
  lapply(.,function(x){gsub(1,"si",x)}) %>%
  lapply(.,function(x){gsub(2,"no",x)}) %>%
  unlist %>%
  as.vector %>%
  as.factor

####ORDERING DF####


df <- df[,c(
  'ID1',
  'age',
  'gen',
  'met',
  'agency_OUT',
  'agency_POST',
  'exp',
  'IOS',
  'like_OUT',
  'like_POST',
  'MAS_Res_iCub',
  'MAS_Ext_iCub',
  'MAS_Group_iCub',
  'MAS_Res_Human',
  'MAS_Ext_Human',
  'MAS_Group_Human',
  'nc',
  'SoNA',
  'SoPA',
  'BFI_ext',
  'BFI_agr',
  'BFI_con',
  'BFI_neg',
  'BFI_ope',
  'warmth_PRE',
  'warmth_POST',
  'comp_PRE',
  'comp_POST',
  'exp_OUT',
  'competence_OUT',
  'warm_OUT'
)]


####CLEANING LOG DATA####
log <- matrix(ncol = 9,nrow=0) %>% data.frame

for (i in 2:13){

log[(i*8-7):(i*8),]  <-  read.table(
                  paste("Logs/raw data/logs_000",
                        str_pad(i,2,pad="0"),
                        "/data.log",
                        sep=""),
                  sep="\r") %>%
  unlist %>%
  matrix(ncol=4,byrow=T) %>%
  apply(c(1,2),function(x){gsub("^.{3}","",x)}) %>%
  data.frame %>%
  rename("DIL"=1,"INI"=2,"AGR"=3,"UPD"=4) %>%
  separate_wider_regex(1:4,c("time" = ".{17}","val" = ".*"),names_sep = "_") %>%
  {lapply(.,function(x){gsub(" DILEMMA | AGREED | INIT | UPDATE ","",x)})} %>%
  #Subsitute dilemmas with D1,D2,D3...
  {lapply(.,function(x){gsub("Un assistente medico robot .*","D1",x)})} %>%
  {lapply(.,function(x){gsub("Un robot di ultima gener.*","D2",x)})} %>%
  {lapply(.,function(x){gsub("I detenuti maschi di un car.*","D3",x)})} %>%
  {lapply(.,function(x){gsub("Un robot autonomo dell'Aerona.*","D4",x)})} %>%
  {lapply(.,function(x){gsub("Un banco alimentare ha un ass.*","D5",x)})} %>%
  {lapply(.,function(x){gsub("Un robot assistente medico lav.*","D6",x)})} %>%
  {lapply(.,function(x){gsub("In una zona lontana della .*","D7",x)})} %>%
  {lapply(.,function(x){gsub("Nel pronto soccorso di un grande .*","D8",x)})} %>%
  data.frame %>%
  separate_wider_delim(6," ",names=c("ifAGR","AGR_val")) %>%
  mutate(SUB = rep(df$ID1[i-1],8)) %>%
  mutate(
    #Time recorded as time intervals
    across(contains("time"),as.numeric),
    across(contains("time"),function(x){round(x,2)}),
    UPD_time = UPD_time - AGR_time,
    AGR_time = AGR_time - INI_time,
    INI_time = INI_time - DIL_time,
    DIL_time = NULL,
    #ifAGR as logical
    ifAGR = ifAGR %>% as.numeric %>% as.logical,
    #Opinions as numbers
    INI_val = INI_val %>% as.numeric,
    AGR_val = AGR_val %>% as.numeric,
    UPD_val = UPD_val %>% as.numeric,) %T>%
  {assign("coln",colnames(.),1)}
}
  
colnames(log) <- coln





#write csv with clean data at the end

#subsitute NAs with empty values for excel
df_nice <- df
df_nice[is.na(df_nice)] <- ""

write.csv(df_nice,"quest_dataframe.csv")
write.csv(log,"log_dataframe.csv")

####DATA ANALYSIS####

#Change opinion if iCub disagrees?

#T-test. Ahia!
log_noAGR <- log %>%
  filter(ifAGR==FALSE)

for (i in 1:nrow(log_noAGR)){
  if (log_noAGR$INI_val[i]-log_noAGR$AGR_val[i] > 0){
    log_noAGR$UPD_val[i] <- 101-log_noAGR$UPD_val[i]
  }
}


#T-test per dilemma

for (i in log_noAGR$DIL_val[!duplicated(log_noAGR$DIL_val)]) {
  paste("----Dilemma",i,"------------------------") %>% print
  t.test(log_noAGR$INI_val[log_noAGR$DIL_val==i],log_noAGR$UPD_val[log_noAGR$DIL_val==i]) %>%
    print
}
## [1] "----Dilemma D2 ------------------------"
## 
##  Welch Two Sample t-test
## 
## data:  log_noAGR$INI_val[log_noAGR$DIL_val == i] and log_noAGR$UPD_val[log_noAGR$DIL_val == i]
## t = 0.6865, df = 9.4251, p-value = 0.5089
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -28.40838  53.40838
## sample estimates:
## mean of x mean of y 
##  37.66667  25.16667 
## 
## [1] "----Dilemma D3 ------------------------"
## 
##  Welch Two Sample t-test
## 
## data:  log_noAGR$INI_val[log_noAGR$DIL_val == i] and log_noAGR$UPD_val[log_noAGR$DIL_val == i]
## t = 0.65797, df = 6.7239, p-value = 0.5324
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -23.61561  41.61561
## sample estimates:
## mean of x mean of y 
##      26.5      17.5 
## 
## [1] "----Dilemma D4 ------------------------"
## 
##  Welch Two Sample t-test
## 
## data:  log_noAGR$INI_val[log_noAGR$DIL_val == i] and log_noAGR$UPD_val[log_noAGR$DIL_val == i]
## t = 0.94241, df = 6.328, p-value = 0.3806
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -26.90345  61.30345
## sample estimates:
## mean of x mean of y 
##      40.4      23.2 
## 
## [1] "----Dilemma D8 ------------------------"
## 
##  Welch Two Sample t-test
## 
## data:  log_noAGR$INI_val[log_noAGR$DIL_val == i] and log_noAGR$UPD_val[log_noAGR$DIL_val == i]
## t = 2.6342, df = 6.2334, p-value = 0.03748
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   4.000523 96.666143
## sample estimates:
## mean of x mean of y 
##  64.66667  14.33333 
## 
## [1] "----Dilemma D1 ------------------------"
## 
##  Welch Two Sample t-test
## 
## data:  log_noAGR$INI_val[log_noAGR$DIL_val == i] and log_noAGR$UPD_val[log_noAGR$DIL_val == i]
## t = 0.90998, df = 7.3123, p-value = 0.3918
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -20.4907  46.4907
## sample estimates:
## mean of x mean of y 
##  43.33333  30.33333 
## 
## [1] "----Dilemma D5 ------------------------"
## 
##  Welch Two Sample t-test
## 
## data:  log_noAGR$INI_val[log_noAGR$DIL_val == i] and log_noAGR$UPD_val[log_noAGR$DIL_val == i]
## t = 1.0005, df = 7.7976, p-value = 0.3471
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -18.19436  45.86103
## sample estimates:
## mean of x mean of y 
##  41.33333  27.50000 
## 
## [1] "----Dilemma D6 ------------------------"
## 
##  Welch Two Sample t-test
## 
## data:  log_noAGR$INI_val[log_noAGR$DIL_val == i] and log_noAGR$UPD_val[log_noAGR$DIL_val == i]
## t = 1.3404, df = 8.556, p-value = 0.2146
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -8.513607 32.799322
## sample estimates:
## mean of x mean of y 
##  47.14286  35.00000 
## 
## [1] "----Dilemma D7 ------------------------"
## 
##  Welch Two Sample t-test
## 
## data:  log_noAGR$INI_val[log_noAGR$DIL_val == i] and log_noAGR$UPD_val[log_noAGR$DIL_val == i]
## t = 2.3651, df = 7.7919, p-value = 0.04641
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   0.6505375 63.0161291
## sample estimates:
## mean of x mean of y 
##  59.00000  27.16667