Will reveal code after contest deadline.
#Load packages and import data
#install.packages(ggplot2,dplyr,lubridate,readr)
library(ggplot2)
library(dplyr)
library(lubridate)
library(readr)
november=read.csv('Nov_R_Data.csv')
theme_set(theme_bw())
Write a script that will filter out participants who are missing values for the ‘Hoursmom’ and ‘Hoursdad’ variables
november1=november %>%
filter(!is.na(Hoursmom) & !is.na(Hoursdad))
Create a variable called ‘PsyContM’ by summing together the following variables: DyadM1+ DyadM2+ DyadM3+ DyadM4+ DyadM5+ DyadM6+ DyadM7
november2=november1 %>%
mutate(PsyContM= DyadM1 + DyadM2+ DyadM3+ DyadM4+ DyadM5+ DyadM6+ DyadM7)
Create a variable called ‘PsyContF’ by summing together the following variables: DyadF1+ DyadF2+ DyadF3+ DyadF4+ DyadF5+ DyadF6+ DyadF7
november3=november2 %>%
mutate(PsyContF=DyadF1+ DyadF2+ DyadF3+ DyadF4+ DyadF5+ DyadF6+ DyadF7)
Calculate the mean, standard deviation, median, and range for PsyContM and PsyContF
november3=november3 %>%
filter(!is.na(PsyContM),!is.na(PsyContF))
summary(november3$PsyContM) #Mean = 11.77, Median = 10
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7.00 7.00 10.00 11.77 14.00 35.00
sd(november3$PsyContM) #Standard dev. = 5.57
## [1] 5.568655
range(november3$PsyContM) #Range = 7 to 35
## [1] 7 35
summary(november3$PsyContF) #Mean = 11.54, Median = 9
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7.00 7.00 9.00 11.54 14.00 35.00
sd(november3$PsyContF) #Standard dev. = 5.43
## [1] 5.430595
range(november3$PsyContF) #Range = 7 to 35
## [1] 7 35
Re-create the correlation matrix below:
#Create matrix for storage
cor_matrix =matrix(0, nrow = 4, ncol = 4)
#Enter data
cor_matrix[1, 1] = 1
cor_matrix[2, 1] = 0.7086156
cor_matrix[3, 1] = -0.1849406
cor_matrix[4, 1] = -0.1902252
cor_matrix[1, 2] = 0.7086156
cor_matrix[2, 2] = 1
cor_matrix[3, 2] = -0.1535265
cor_matrix[4, 2] = -0.1904273
cor_matrix[1, 3] = -0.1849406
cor_matrix[2, 3] = -0.1535265
cor_matrix[3, 3] = 1
cor_matrix[4, 3] = 0.5646881
cor_matrix[1, 4] = -0.1902252
cor_matrix[2, 4] = -0.1904273
cor_matrix[3, 4] = 0.5646881
cor_matrix[4, 4] = 1
#Rename labels
rownames(cor_matrix) = c('Hoursmom', 'Hoursdad', 'PsyContM', 'PsyContF')
colnames(cor_matrix)= c('Hoursmom', 'Hoursdad', 'PsyContM', 'PsyContF')
#Win!
print(cor_matrix)
## Hoursmom Hoursdad PsyContM PsyContF
## Hoursmom 1.0000000 0.7086156 -0.1849406 -0.1902252
## Hoursdad 0.7086156 1.0000000 -0.1535265 -0.1904273
## PsyContM -0.1849406 -0.1535265 1.0000000 0.5646881
## PsyContF -0.1902252 -0.1904273 0.5646881 1.0000000
Is there a significant correlation between PsyConF and Hoursdad? Note the R and P values
#Check data for abnormality
november4=november3 %>%
filter(!is.na(PsyContF), !is.na(Hoursdad))
hist(november4$PsyContF)
hist(november4$Hoursdad)
#Data not normally distributed, so use Spearman's
cor.test(november4$PsyContF,november4$Hoursdad,method = 'spearman') #p-value = 0.000009257 and correlation = -0.2010405 . Yes, there's a significant (negative) correlation between the two variables.
##
## Spearman's rank correlation rho
##
## data: november4$PsyContF and november4$Hoursdad
## S = 21999411, p-value = 0.000009257
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.2010405