Euclidean Distance Calculator - Phonological Dispersion Manuscript

We need to take our data and calculate euclidean distances between words for our participants so that we can include contrastiveness as a feature in our analysis

First, we need to read in the requisite files - one that contains all of the phonological feature vectors and one that contains the data from our participants

rm(list = ls())

library(ggthemes)
library(tidyverse)
library(afex)
library(standardize)
library(plyr)

vectors <- read.csv("F:/Google Drive/Experiments/Edinburgh Experiments/Experiment 4- Phonological Decoupling/PhonVectors.csv")
Exp4Data <- read.csv("F:/Google Drive/Experiments/Edinburgh Experiments/Experiment 4- Phonological Decoupling/Data/Exp4OmnibusCSV.csv")

Now we need to split up the Data so that it is useable

#put the vector for each phoneme into a single column (for ease of data handling)
vectors$vector <- paste(vectors$Sonorant,vectors$Consonantal, vectors$Voice,vectors$Nasal,vectors$Degree, vectors$Labial, vectors$Palatal,
                        vectors$Pharyngeal, vectors$Round, vectors$Tongue, vectors$Radical, sep = ",")


Exp4Data$Label <- tolower(Exp4Data$Label)  #Make it lowercase
Exp4Data <- subset(Exp4Data, select = -c(X, X.1, X.2)) #Remove empty columns

#Split label into consonants and vowels
Exp4Data$LC1 <- substr(Exp4Data$Label, 1, 1)
Exp4Data$LV1 <- substr(Exp4Data$Label, 2, 3)
Exp4Data$LC2 <- substr(Exp4Data$Label, 4, 4)
Exp4Data$LV2 <- substr(Exp4Data$Label, 5, 6)
Exp4Data$LC3 <- substr(Exp4Data$Label, 7, 7)
Exp4Data$LV3 <- substr(Exp4Data$Label, 8, 9)

#map in the vectors
Exp4Data$LC1Vec <- mapvalues(Exp4Data$LC1,
                         from = vectors$Representation,
                         to= vectors$vector)

Exp4Data$LV1Vec <- mapvalues(Exp4Data$LV1,
                         from = vectors$Representation,
                         to= vectors$vector)

Exp4Data$LC2Vec <- mapvalues(Exp4Data$LC2,
                         from = vectors$Representation,
                         to= vectors$vector)

Exp4Data$LV2Vec <- mapvalues(Exp4Data$LV2,
                         from = vectors$Representation,
                         to= vectors$vector)

Exp4Data$LC3Vec <- mapvalues(Exp4Data$LC3,
                         from = vectors$Representation,
                         to= vectors$vector)

Exp4Data$LV3Vec <- mapvalues(Exp4Data$LV3,
                         from = vectors$Representation,
                         to= vectors$vector)

#Whole-word vectors in a single column
Exp4Data$LabelVector <- paste(Exp4Data$LC1Vec, Exp4Data$LV1Vec, Exp4Data$LC2Vec, Exp4Data$LV2Vec, Exp4Data$LC3Vec, Exp4Data$LV3Vec, sep = ",") 

#Split this column back into separate columns
colnames <- as.character(1:66)
Exp4Data <- separate(data = Exp4Data, col = LabelVector, into = colnames, sep = ',', remove = FALSE)
colnames(Exp4Data)[33:98] <- paste("LV", colnames(Exp4Data)[33:98], sep = '')

Exp4Data[33:98] <- lapply(Exp4Data[33:98], as.numeric)

#First We Need to make a list of the correct labels for every image. This needs to be done on a by-participant basis

Exp4T <- subset(Exp4Data, TrialType == "T")   #Target Trials tell you the correct pairings of labels and images

participants <- unique(Exp4Data$ID)

ID <- NA
CorrectLabel <- NA
Image <- NA

CorrectPairs <- data.frame(cbind(ID, CorrectLabel, Image))


for(participant in participants){
  
  Ppairs <- subset(Exp4T, ID == participant)
  Ppairs <- subset(Ppairs, select = c(ID, Label, Image))
  colnames(Ppairs) <- c("ID", "CorrectLabel", "Image")
  
  CorrectPairs <- rbind(CorrectPairs, Ppairs)
}

CorrectPairs <- CorrectPairs[complete.cases(CorrectPairs),]  #Get rid of the blank line


# Now we start comparisons

Exp4Data$CorrectLabel <- NA
Exp4Data$CorrectLabelVector <- NA
Exp4Data$TargetDist <- 1

Exp4Data <- subset(Exp4Data, select = c(ID, Sex, Age, Condition, CondColl, Sys, Phon, Trial, TrialType, Image, Label, LabelVector, CorrectLabel,
                                        CorrectLabelVector, TargetDist, Response, RespType, RT, Correctness, CorrInd, CorrCat, SigInd, SigCat ))


Exp4DataMod <- Exp4Data[1,]


for (participant in participants){
  
  triallist <- subset(Exp4Data, ID == participant)
  correctlist <- subset(CorrectPairs, ID == participant)
  
  ifelse(triallist$TrialType == "T",
         triallist$CorrectLabel <-triallist$Label,
         triallist$CorrectLabel <- mapvalues(triallist$Image,
                                        from = correctlist$Image,
                                        to = correctlist$CorrectLabel))
  
  triallist$CorrectLabelVector <- mapvalues(triallist$CorrectLabel,
                                     from =triallist$Label,
                                     to = triallist$LabelVector)  
  

  Exp4DataMod <- rbind(Exp4DataMod, triallist)
  
}

Exp4DataMod <- Exp4DataMod[complete.cases(Exp4DataMod),]

colnames <- as.character(1:66)

Exp4DataMod <- separate(data = Exp4DataMod, col = LabelVector, into = colnames, sep = ',', remove = FALSE)
colnames(Exp4DataMod)[13:78] <- paste("LV", colnames(Exp4DataMod)[13:78], sep = '')
Exp4DataMod[13:78] <- lapply(Exp4DataMod[13:78], as.numeric)

Exp4DataMod <- separate(data = Exp4DataMod, col = CorrectLabelVector, into = colnames, sep = ',', remove = FALSE)
colnames(Exp4DataMod)[81:146] <- paste("CLV", colnames(Exp4DataMod)[81:146], sep = '')
Exp4DataMod[81:146] <- lapply(Exp4DataMod[81:146], as.numeric)

DiffList <- NA

for(i in 1:nrow(Exp4DataMod)){
  
  LV <- as.numeric(Exp4DataMod[i, 13:78, drop = T])
  CLV <- as.numeric(Exp4DataMod[i, 81:146, drop = T])
  
  Diff <- sqrt(sum((LV - CLV) * (LV - CLV)))
  
  DiffList <- c(DiffList, Diff)
  
}

DiffList <- DiffList[complete.cases(DiffList)]    

Exp4DataMod <- cbind(Exp4DataMod, DiffList)
Exp4DataMod$TargetDist <- Exp4DataMod$DiffList

Exp4DataClean <- subset(Exp4DataMod, select = c(ID, Sex, Age, Condition, CondColl, Sys, Phon, Trial, TrialType, Image, Label, LabelVector, CorrectLabel,
                                                CorrectLabelVector, TargetDist, Response, RespType, RT, Correctness, CorrInd, CorrCat, SigInd,
                                                SigCat))

So that’s some ugliness out of the way- now we just save this clean data so we can analyse it elsewhere

write.csv(Exp4DataClean, "F:/Google Drive/Experiments/Edinburgh Experiments/Experiment 4- Phonological Decoupling/Data/Exp4Clean.csv")

Euclidean Distance Calculator - Phonological Dispersion Manuscript

Alan Nielsen

November 21, 2017