Data Wrangling & Visualization

Load Data and stats packages

The data are fake (see Excel file). I wanted to create a template that I can immediately use once when we finished collecting. Here are the packages I used.

library(readxl)
FakeDataVEE <- read_excel("~/Andrew Mojica/Projects/Visual Effects Experiment/FakeDataVEE.xlsx")
View(FakeDataVEE)

# packages I used
library(dplyr)

## Warning: package 'dplyr' was built under R version 3.6.2

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.6.3

library(psycho)

## Warning: package 'psycho' was built under R version 3.6.3

## Note: Many functions of the 'psycho' package have been (improved and) moved to other packages of the new 'easystats' collection (https://github.com/easystats). If you don't find where a function is gone, please open an issue at: https://github.com/easystats/easystats/issues

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 3.6.3

## -- Attaching packages ----------------------------------------------- tidyverse 1.3.0 --

## v tibble  2.1.3     v purrr   0.3.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0

## Warning: package 'tidyr' was built under R version 3.6.2

## Warning: package 'purrr' was built under R version 3.6.2

## -- Conflicts -------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(statsr)

## Warning: package 'statsr' was built under R version 3.6.3

Define Factor Variables

Here are the variables I defined as factors (not numerical). This distinction is, of course, important in R.

FakeDataVEE <- FakeDataVEE %>% 
  mutate(Color_Level = as.factor(Color_Level), 
         Subject = as.factor(Subject),
         P_Color=as.factor(P_Color),
         P_Present=as.factor(P_Present),
         SameDifferent=as.factor(SameDifferent))

Determine hits, misses, correct rejections, and false alarms

The next step is to categorize the peripheral data into four categories (hit, false alarms, misses, and correct rejections). I used if-then statements in R.

Hits = Target Present Trial & Target Present Response False Alarm = Target Absent Trial $ Target Present Response Misses = Target Present Trial & Target Absent Response Correct Reaction = Target Absent Trial & Target Absent Response

FakeDataVEE <- FakeDataVEE %>% 
  mutate(
    hits = ifelse( P_Present == "P" & Peripheral == 1, 1,0)
    )

FakeDataVEE <- FakeDataVEE %>% 
  mutate(
    false_alarms = ifelse(P_Present == "A" & Peripheral == 0,1,0)
    )

FakeDataVEE <- FakeDataVEE %>% 
  mutate(
    misses = ifelse(P_Present == "P" & Peripheral == 0, 1,0)
    )

FakeDataVEE <- FakeDataVEE %>% 
  mutate(
    cr = ifelse(P_Present == "A" & Peripheral == 1,1,0)
    )

Create a summary pivot table that captures counts

I created a data frame that counts the number of hits, false alarms, misses, and correct rejection for each subject and color levels.

peripheral_data<-FakeDataVEE%>% 
  select(Color_Level,Subject,hits,false_alarms,misses,cr)%>% 
  group_by(Color_Level,Subject)%>% 
  summarize(n_hit=sum(hits),
            n_fa = sum(false_alarms),
            n_miss= sum(misses),
            n_cr = sum(cr))

Calculate d’

To calculate d’, I used a program in R called psycho. This program calculates d’, the beta, the A’ and the B’’D based on the signal detection theory. Here is the documentation https://www.rdocumentation.org/packages/psycho/versions/0.5.0/topics/dprime

indices <- psycho::dprime(peripheral_data$n_hit, peripheral_data$n_fa, peripheral_data$n_miss, peripheral_data$n_cr)
peripheral_data <- cbind(peripheral_data, indices)

Calculate percentages

I calculated proportion scores, such as overall accuracy.

peripheral_data <- peripheral_data %>% 
  mutate(
    Overall_Accuracy = (n_hit+n_cr)/(n_hit+n_cr+n_fa+n_miss)
  )

peripheral_data <- peripheral_data %>% 
  mutate(
    p_hit = (n_hit)/(n_hit+n_miss)
  )

peripheral_data <- peripheral_data %>%  
  mutate(
    p_fa = (n_fa)/(n_fa+n_cr)
  )
peripheral_data <- peripheral_data %>% 
  mutate(
    p_miss = (n_miss)/(n_miss+n_hit)
  )
peripheral_data <- peripheral_data %>% 
  mutate(
    p_cr = (n_cr)/(n_cr+n_fa)
  )

Possible plots

Here are some possible plots. I found this helpful website (https://www.rapidtables.com/web/color/RGB_Color.html) that produces R code using RGB color. You can also change the level of saturation. I randomly picked different shades of red. Once we determine what colors (using this term loosely) we will use, we can use them in our figures.

# Scatter plot
ggplot(peripheral_data, 
       aes(p_fa, p_hit,
           color = Color_Level)) +
  geom_jitter(size = 3)+
  scale_color_manual(values=c("#FF0000",
                              "#FF4D4D",
                              "#FF5A5A",
                              "#FF6666",
                              "#FF8080",
                              "#FF9A9A",
                              "#FFB3B3",
                              "#FFE6E6"))+
  theme(legend.position = "bottom")+
  theme_dark()+
  geom_hline(yintercept=0)+
  geom_vline(xintercept=0)+
    labs(                         
    title    = "Visual Effects Experiment", 
    subtitle = "Peripheral Data: d prime",
    y        = "% Hits",
    x        = "% False Alarms")

# Box plot
peripheral_data %>%
  ggplot(aes(Color_Level, dprime, 
             fill = Color_Level))+
  geom_boxplot()+
      scale_fill_manual(values=c("#FF0000",
                              "#FF4D4D",
                              "#FF5A5A",
                              "#FF6666",
                              "#FF8080",
                              "#FF9A9A",
                              "#FFB3B3",
                              "#FFE6E6"))+
  xlab("Color Level")+
  theme(legend.position = "bottom")+
  theme_dark()+
    labs(                         
    title    = "Visual Effects Experiment", 
    subtitle = "Peripheral Data: d prime",
    y        = "d prime",
    x        = "color level")

# Density plot
peripheral_data %>%
  ggplot(aes(x = dprime, 
             fill = Color_Level))+
  geom_density()+
        scale_fill_manual(values=c("#FF0000",
                              "#FF4D4D",
                              "#FF5A5A",
                              "#FF6666",
                              "#FF8080",
                              "#FF9A9A",
                              "#FFB3B3",
                              "#FFE6E6"))+
  theme(legend.position = "bottom")+
  theme_dark()+
    labs(                         
    title    = "Visual Effects Experiment", 
    subtitle = "Peripheral Data: d prime",
    y        = "count",
    x        = "d prime")

# Histogram plot
peripheral_data %>%
  ggplot(aes(x = dprime, 
             fill = Color_Level,
             binwidth =0.5))+
  geom_histogram(bins =50)+
          scale_fill_manual(values=c("#FF0000",
                              "#FF4D4D",
                              "#FF5A5A",
                              "#FF6666",
                              "#FF8080",
                              "#FF9A9A",
                              "#FFB3B3",
                              "#FFE6E6"))+
  geom_vline(xintercept=0)+
  facet_grid(Color_Level~.)+
  theme(legend.position = "bottom")+
  theme_dark()+
    labs(                         
    title    = "Visual Effects Experiment", 
    subtitle = "Peripheral Data: d prime",
    y        = "count",
    x        = "d prime")