Objective:

This lab assignment reinforces your understanding of data cleaning and descriptive analysis using dplyr and psych in R.

Data Preparation:

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(psych)

# Create a dataset
data <- data.frame(
  participant_id = 1:10,
  reaction_time = c(250, 340, 295, NA, 310, 275, 325, 290, 360, NA),
  gender = c("M", "F", "F", "M", "M", "F", "M", "F", "M", "F"),
  accuracy = c(95, 87, 92, 88, 94, 91, 85, 89, 93, NA)
)

# Display data
print(data)
##    participant_id reaction_time gender accuracy
## 1               1           250      M       95
## 2               2           340      F       87
## 3               3           295      F       92
## 4               4            NA      M       88
## 5               5           310      M       94
## 6               6           275      F       91
## 7               7           325      M       85
## 8               8           290      F       89
## 9               9           360      M       93
## 10             10            NA      F       NA

Data Cleaning:

# Remove rows with missing values
data_clean <- na.omit(data)
print(data_clean)
##   participant_id reaction_time gender accuracy
## 1              1           250      M       95
## 2              2           340      F       87
## 3              3           295      F       92
## 5              5           310      M       94
## 6              6           275      F       91
## 7              7           325      M       85
## 8              8           290      F       89
## 9              9           360      M       93

Descriptive Statistics:

# Summary statistics
descriptive_stats <- describe(data_clean)
print(descriptive_stats)
##                vars n   mean    sd median trimmed   mad min max range  skew
## participant_id    1 8   5.12  2.90    5.5    5.12  3.71   1   9     8 -0.11
## reaction_time     2 8 305.62 35.70  302.5  305.62 37.06 250 360   110  0.01
## gender*           3 8   1.50  0.53    1.5    1.50  0.74   1   2     1  0.00
## accuracy          4 8  90.75  3.49   91.5   90.75  3.71  85  95    10 -0.36
##                kurtosis    se
## participant_id    -1.74  1.03
## reaction_time     -1.40 12.62
## gender*           -2.23  0.19
## accuracy          -1.52  1.24

Visualization:

library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
# Histogram of reaction time
ggplot(data_clean, aes(x = reaction_time)) + 
  geom_histogram(binwidth = 20, fill = "blue", alpha = 0.7) +
  theme_minimal() +
  labs(title = "Distribution of Reaction Time")

Submission Instructions:

  • Knit this document to HTML.
  • Submit your work on Canvas as per the instructions.