We need to take our data and calculate euclidean distances between words for our participants so that we can include contrastiveness as a feature in our analysis
First, we need to read in the requisite files - one that contains all of the phonological feature vectors and one that contains the data from our participants
rm(list = ls())
library(ggthemes)
library(tidyverse)
library(afex)
library(standardize)
library(plyr)
vectors <- read.csv("F:/Google Drive/Experiments/Edinburgh Experiments/Experiment 4- Phonological Decoupling/PhonVectors.csv")
Exp4Data <- read.csv("F:/Google Drive/Experiments/Edinburgh Experiments/Experiment 4- Phonological Decoupling/Data/Exp4OmnibusCSV.csv")
Now we need to split up the Data so that it is useable
#put the vector for each phoneme into a single column (for ease of data handling)
vectors$vector <- paste(vectors$Sonorant,vectors$Consonantal, vectors$Voice,vectors$Nasal,vectors$Degree, vectors$Labial, vectors$Palatal,
vectors$Pharyngeal, vectors$Round, vectors$Tongue, vectors$Radical, sep = ",")
Exp4Data$Label <- tolower(Exp4Data$Label) #Make it lowercase
Exp4Data <- subset(Exp4Data, select = -c(X, X.1, X.2)) #Remove empty columns
#Split label into consonants and vowels
Exp4Data$LC1 <- substr(Exp4Data$Label, 1, 1)
Exp4Data$LV1 <- substr(Exp4Data$Label, 2, 3)
Exp4Data$LC2 <- substr(Exp4Data$Label, 4, 4)
Exp4Data$LV2 <- substr(Exp4Data$Label, 5, 6)
Exp4Data$LC3 <- substr(Exp4Data$Label, 7, 7)
Exp4Data$LV3 <- substr(Exp4Data$Label, 8, 9)
#map in the vectors
Exp4Data$LC1Vec <- mapvalues(Exp4Data$LC1,
from = vectors$Representation,
to= vectors$vector)
Exp4Data$LV1Vec <- mapvalues(Exp4Data$LV1,
from = vectors$Representation,
to= vectors$vector)
Exp4Data$LC2Vec <- mapvalues(Exp4Data$LC2,
from = vectors$Representation,
to= vectors$vector)
Exp4Data$LV2Vec <- mapvalues(Exp4Data$LV2,
from = vectors$Representation,
to= vectors$vector)
Exp4Data$LC3Vec <- mapvalues(Exp4Data$LC3,
from = vectors$Representation,
to= vectors$vector)
Exp4Data$LV3Vec <- mapvalues(Exp4Data$LV3,
from = vectors$Representation,
to= vectors$vector)
#Whole-word vectors in a single column
Exp4Data$LabelVector <- paste(Exp4Data$LC1Vec, Exp4Data$LV1Vec, Exp4Data$LC2Vec, Exp4Data$LV2Vec, Exp4Data$LC3Vec, Exp4Data$LV3Vec, sep = ",")
#Split this column back into separate columns
colnames <- as.character(1:66)
Exp4Data <- separate(data = Exp4Data, col = LabelVector, into = colnames, sep = ',', remove = FALSE)
colnames(Exp4Data)[33:98] <- paste("LV", colnames(Exp4Data)[33:98], sep = '')
Exp4Data[33:98] <- lapply(Exp4Data[33:98], as.numeric)
#First We Need to make a list of the correct labels for every image. This needs to be done on a by-participant basis
Exp4T <- subset(Exp4Data, TrialType == "T") #Target Trials tell you the correct pairings of labels and images
participants <- unique(Exp4Data$ID)
ID <- NA
CorrectLabel <- NA
Image <- NA
CorrectPairs <- data.frame(cbind(ID, CorrectLabel, Image))
for(participant in participants){
Ppairs <- subset(Exp4T, ID == participant)
Ppairs <- subset(Ppairs, select = c(ID, Label, Image))
colnames(Ppairs) <- c("ID", "CorrectLabel", "Image")
CorrectPairs <- rbind(CorrectPairs, Ppairs)
}
CorrectPairs <- CorrectPairs[complete.cases(CorrectPairs),] #Get rid of the blank line
# Now we start comparisons
Exp4Data$CorrectLabel <- NA
Exp4Data$CorrectLabelVector <- NA
Exp4Data$TargetDist <- 1
Exp4Data <- subset(Exp4Data, select = c(ID, Sex, Age, Condition, CondColl, Sys, Phon, Trial, TrialType, Image, Label, LabelVector, CorrectLabel,
CorrectLabelVector, TargetDist, Response, RespType, RT, Correctness, CorrInd, CorrCat, SigInd, SigCat ))
Exp4DataMod <- Exp4Data[1,]
for (participant in participants){
triallist <- subset(Exp4Data, ID == participant)
correctlist <- subset(CorrectPairs, ID == participant)
ifelse(triallist$TrialType == "T",
triallist$CorrectLabel <-triallist$Label,
triallist$CorrectLabel <- mapvalues(triallist$Image,
from = correctlist$Image,
to = correctlist$CorrectLabel))
triallist$CorrectLabelVector <- mapvalues(triallist$CorrectLabel,
from =triallist$Label,
to = triallist$LabelVector)
Exp4DataMod <- rbind(Exp4DataMod, triallist)
}
Exp4DataMod <- Exp4DataMod[complete.cases(Exp4DataMod),]
colnames <- as.character(1:66)
Exp4DataMod <- separate(data = Exp4DataMod, col = LabelVector, into = colnames, sep = ',', remove = FALSE)
colnames(Exp4DataMod)[13:78] <- paste("LV", colnames(Exp4DataMod)[13:78], sep = '')
Exp4DataMod[13:78] <- lapply(Exp4DataMod[13:78], as.numeric)
Exp4DataMod <- separate(data = Exp4DataMod, col = CorrectLabelVector, into = colnames, sep = ',', remove = FALSE)
colnames(Exp4DataMod)[81:146] <- paste("CLV", colnames(Exp4DataMod)[81:146], sep = '')
Exp4DataMod[81:146] <- lapply(Exp4DataMod[81:146], as.numeric)
DiffList <- NA
for(i in 1:nrow(Exp4DataMod)){
LV <- as.numeric(Exp4DataMod[i, 13:78, drop = T])
CLV <- as.numeric(Exp4DataMod[i, 81:146, drop = T])
Diff <- sqrt(sum((LV - CLV) * (LV - CLV)))
DiffList <- c(DiffList, Diff)
}
DiffList <- DiffList[complete.cases(DiffList)]
Exp4DataMod <- cbind(Exp4DataMod, DiffList)
Exp4DataMod$TargetDist <- Exp4DataMod$DiffList
Exp4DataClean <- subset(Exp4DataMod, select = c(ID, Sex, Age, Condition, CondColl, Sys, Phon, Trial, TrialType, Image, Label, LabelVector, CorrectLabel,
CorrectLabelVector, TargetDist, Response, RespType, RT, Correctness, CorrInd, CorrCat, SigInd,
SigCat))
So that’s some ugliness out of the way- now we just save this clean data so we can analyse it elsewhere
write.csv(Exp4DataClean, "F:/Google Drive/Experiments/Edinburgh Experiments/Experiment 4- Phonological Decoupling/Data/Exp4Clean.csv")