The data was saved under the name HW_2.sav. The output can be found under the name HW_2.spv. Both of the files were uploaded on blackboard. Example Code
# Load the package
library(haven)
# Import the dataset
WE_data <- read_sav("Sperman_slide7.sav")
# View you data set
head(WE_data)
## # A tibble: 6 x 4
## X Y Rank_X Rank_Y
## <dbl> <dbl> <dbl> <dbl>
## 1 12 36 4 8
## 2 16 25 7 4.5
## 3 10 35 2 7
## 4 11 38 3 9
## 5 15 24 6 3
## 6 9 40 1 10
# Run a scatterplot to check the relationship between X and Y
plot(WE_data$X,WE_data$Y, main="The Scatterplot of X and Y",xlab="Variable X",ylab="Variable Y")
# From the scatterpolt we can tell that the Variable X and Y are negatively correlated.
# load the Hmisc Package
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
# Subset the data we need
WE_data_Spearman <- cbind(WE_data$X,WE_data$Y)
# Calculate the Pearson correlation
rcorr(WE_data_Spearman, type="spearman")
## [,1] [,2]
## [1,] 1.00 -0.92
## [2,] -0.92 1.00
##
## n= 10
##
##
## P
## [,1] [,2]
## [1,] 1e-04
## [2,] 1e-04
# We reject the null hypotheses. There is a strong negative statistically significant correlation between variable X and variable Y (r = -.924, p < 0.001).
# Create the Data set for Pointbiserial correlation
gender <- c(1,1,1,1,1,0,0,0,0,0)
y <- c(16,22,18,19,22,15,12,18,17,14)
WE_data_Pointbiserial <- as.data.frame(cbind(gender,y))
# Calculate the Pointbiserial correlation
cor.test(WE_data_Pointbiserial$y,WE_data_Pointbiserial$gender)
##
## Pearson's product-moment correlation
##
## data: WE_data_Pointbiserial$y and WE_data_Pointbiserial$gender
## t = 2.6563, df = 8, p-value = 0.02897
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.09658527 0.91836431
## sample estimates:
## cor
## 0.6845804
# We reject the null hypotheses. There is a positive statistically significant relationship between gender (X) and scores (Y) (r = .685, p < 0.05). [The higher scores are associated with females].
# Create the Data set for Phi correlation
X <- c(1,1,0,1,1,0,1,1,0,0)
Y <- c(1,1,1,1,1,0,0,0,0,0)
WE_data_phi <- as.data.frame(cbind(X,Y))
# Calculate the Phi correlation
cor.test(WE_data_phi$X,WE_data_phi$Y)
##
## Pearson's product-moment correlation
##
## data: WE_data_phi$X and WE_data_phi$Y
## t = 1.2649, df = 8, p-value = 0.2415
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2979692 0.8256471
## sample estimates:
## cor
## 0.4082483
# We fail to reject the null hypotheses. There is not a statistically significant relationship (at the 0.05 level) between gender (X) and success (Y) (φ=.408, p = .242).
# Note the p here is different with the SPSS homework. Because the R uses t test instead of chi-square for calculating the p value.
# We can use the chisq.test() function in MASS package to calculate the Chi Square statistics
library(MASS)
chisq.test(WE_data_phi$X,WE_data_phi$Y,correct = FALSE)
## Warning in chisq.test(WE_data_phi$X, WE_data_phi$Y, correct = FALSE): Chi-
## squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: WE_data_phi$X and WE_data_phi$Y
## X-squared = 1.6667, df = 1, p-value = 0.1967
# Interpret again: We fail to reject the null hypotheses. There is not a statistically significant relationship (at the 0.05 level) between gender (X) and success (Y) (φ=.408, p = .197).
# Import the data first
library(haven)
WE_data2 <- read_sav("asgusam5.sav")
head(WE_data2)
## # A tibble: 6 x 58
## id gender month year language book home_computer home_desk
## <dbl> <dbl+l> <dbl+lb> <dbl+l> <dbl+lb> <dbl+l> <dbl+lbl> <dbl+lbl>
## 1 1 1 [GIR… 1 [JAN… 5 [200… 1 [ALWA… 4 [TWO… 1 [YES] 0 [NO]
## 2 2 0 [BOY] 9 [SEP… 4 [200… 1 [ALWA… 3 [ONE… 1 [YES] 1 [YES]
## 3 3 0 [BOY] 10 [OCT… 4 [200… 1 [ALWA… 4 [TWO… 1 [YES] 1 [YES]
## 4 4 1 [GIR… 8 [AUG… 4 [200… 1 [ALWA… 3 [ONE… 1 [YES] 1 [YES]
## 5 5 0 [BOY] 8 [AUG… 4 [200… 1 [ALWA… 5 [THR… 1 [YES] 1 [YES]
## 6 6 0 [BOY] 11 [NOV… 4 [200… 1 [ALWA… 3 [ONE… 1 [YES] 1 [YES]
## # … with 50 more variables: home_book <dbl+lbl>, home_room <dbl+lbl>,
## # home_internet <dbl+lbl>, computer_home <dbl+lbl>,
## # computer_school <dbl+lbl>, computer_some <dbl+lbl>,
## # parentsupport1 <dbl+lbl>, parentsupport2 <dbl+lbl>,
## # parentsupport3 <dbl+lbl>, parentsupport4 <dbl+lbl>, school1 <dbl+lbl>,
## # school2 <dbl+lbl>, school3 <dbl+lbl>, studentbullied1 <dbl+lbl>,
## # studentbullied2 <dbl+lbl>, studentbullied3 <dbl+lbl>,
## # studentbullied4 <dbl+lbl>, studentbullied5 <dbl+lbl>,
## # studentbullied6 <dbl+lbl>, learning1 <dbl+lbl>, learning2 <dbl+lbl>,
## # learning3 <dbl+lbl>, learning4 <dbl+lbl>, learning5 <dbl+lbl>,
## # learning6 <dbl+lbl>, learning7 <dbl+lbl>, engagement1 <dbl+lbl>,
## # engagement2 <dbl+lbl>, engagement3 <dbl+lbl>, engagement4 <dbl+lbl>,
## # engagement5 <dbl+lbl>, confidence1 <dbl+lbl>, confidence2 <dbl+lbl>,
## # confidence3 <dbl+lbl>, confidence4 <dbl+lbl>, confidence5 <dbl+lbl>,
## # confidence6 <dbl+lbl>, score1 <dbl>, score2 <dbl>, score3 <dbl>,
## # score4 <dbl>, score5 <dbl>, ParentSupport <dbl>, Home <dbl>,
## # school <dbl>, StudentBullied <dbl>, learning <dbl>, engagement <dbl>,
## # confidence <dbl>, ScienceScore <dbl>
# Pool out the variables that we need for the test
WE_data2_PSP <- as.data.frame(cbind(WE_data2$learning,WE_data2$engagement,WE_data2$confidence))
WE_data2_PSP_cleaned <- na.omit(WE_data2_PSP)
names <- c("learning","engagement","confidence")
colnames(WE_data2_PSP_cleaned) <- names
# We can use the package 'ppcor' to calculate the partial and semi-partial correlation
library(ppcor)
# Perform partial correlation
pcor.test(WE_data2_PSP_cleaned$learning,WE_data2_PSP_cleaned$engagement,WE_data2_PSP_cleaned$confidence,method="pearson")
## estimate p.value statistic n gp Method
## 1 0.4283649 0 53.4766 12728 1 pearson
# Perform semi-partial correlation
spcor.test(WE_data2_PSP_cleaned$learning,WE_data2_PSP_cleaned$engagement,WE_data2_PSP_cleaned$confidence,method="pearson")
## estimate p.value statistic n gp Method
## 1 0.3317006 0 39.66306 12728 1 pearson
# Interpretation: We reject the null hypotheses. There is a positive statistically significant correlation between variable engagement and learning after entirely controlling for the effects of confidence. partial correlation = 0.428 while part correlation (semi-partial) = 0.332, p < 0.01.
What proportion of variation in learning scores is not explained by confidence and is explained by engagement?
What proportion of variation in learning scores is explained by the part of engagement that is independent of confidence?