Source: Psi Chi R Contest

Will reveal code after contest deadline.

#Load packages and import data

#install.packages(ggplot2,dplyr,lubridate,readr)

library(ggplot2)
library(dplyr)
library(lubridate)
library(readr)

november=read.csv('Nov_R_Data.csv')

theme_set(theme_bw())

Data processing (level 1)

Write a script that will filter out participants who are missing values for the ‘Hoursmom’ and ‘Hoursdad’ variables

november1=november %>% 
  filter(!is.na(Hoursmom) & !is.na(Hoursdad))

Create a variable called ‘PsyContM’ by summing together the following variables: DyadM1+ DyadM2+ DyadM3+ DyadM4+ DyadM5+ DyadM6+ DyadM7

november2=november1 %>% 
  mutate(PsyContM= DyadM1 + DyadM2+ DyadM3+ DyadM4+ DyadM5+ DyadM6+ DyadM7)

Create a variable called ‘PsyContF’ by summing together the following variables: DyadF1+ DyadF2+ DyadF3+ DyadF4+ DyadF5+ DyadF6+ DyadF7

november3=november2 %>% 
  mutate(PsyContF=DyadF1+ DyadF2+ DyadF3+ DyadF4+ DyadF5+ DyadF6+ DyadF7)

Descriptive Statistics (level 2)

Calculate the mean, standard deviation, median, and range for PsyContM and PsyContF

november3=november3 %>% 
  filter(!is.na(PsyContM),!is.na(PsyContF))

summary(november3$PsyContM) #Mean = 11.77, Median = 10
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    7.00    7.00   10.00   11.77   14.00   35.00
sd(november3$PsyContM) #Standard dev. = 5.57
## [1] 5.568655
range(november3$PsyContM) #Range = 7 to 35
## [1]  7 35
summary(november3$PsyContF) #Mean = 11.54, Median = 9
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    7.00    7.00    9.00   11.54   14.00   35.00
sd(november3$PsyContF) #Standard dev. = 5.43
## [1] 5.430595
range(november3$PsyContF) #Range = 7 to 35
## [1]  7 35

Data visualization (level 3)

Re-create the correlation matrix below:

#Create matrix for storage
cor_matrix =matrix(0, nrow = 4, ncol = 4)

#Enter data
cor_matrix[1, 1] = 1
cor_matrix[2, 1] = 0.7086156
cor_matrix[3, 1] = -0.1849406
cor_matrix[4, 1] = -0.1902252

cor_matrix[1, 2] = 0.7086156
cor_matrix[2, 2] = 1
cor_matrix[3, 2] = -0.1535265
cor_matrix[4, 2] = -0.1904273

cor_matrix[1, 3] = -0.1849406
cor_matrix[2, 3] = -0.1535265
cor_matrix[3, 3] = 1
cor_matrix[4, 3] = 0.5646881

cor_matrix[1, 4] = -0.1902252
cor_matrix[2, 4] = -0.1904273
cor_matrix[3, 4] = 0.5646881
cor_matrix[4, 4] = 1

#Rename labels
rownames(cor_matrix) = c('Hoursmom', 'Hoursdad', 'PsyContM', 'PsyContF')
colnames(cor_matrix)= c('Hoursmom', 'Hoursdad', 'PsyContM', 'PsyContF')

#Win!
print(cor_matrix)
##            Hoursmom   Hoursdad   PsyContM   PsyContF
## Hoursmom  1.0000000  0.7086156 -0.1849406 -0.1902252
## Hoursdad  0.7086156  1.0000000 -0.1535265 -0.1904273
## PsyContM -0.1849406 -0.1535265  1.0000000  0.5646881
## PsyContF -0.1902252 -0.1904273  0.5646881  1.0000000

Inferential statistics (level 4)

Is there a significant correlation between PsyConF and Hoursdad? Note the R and P values

#Check data for abnormality
november4=november3 %>% 
  filter(!is.na(PsyContF), !is.na(Hoursdad))

hist(november4$PsyContF)

hist(november4$Hoursdad)

#Data not normally distributed, so use Spearman's
cor.test(november4$PsyContF,november4$Hoursdad,method = 'spearman') #p-value = 0.000009257 and correlation = -0.2010405  . Yes, there's a significant (negative) correlation between the two variables.
## 
##  Spearman's rank correlation rho
## 
## data:  november4$PsyContF and november4$Hoursdad
## S = 21999411, p-value = 0.000009257
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.2010405