library(readxl)
MyData <- read.csv("~/Documents/Fall 2022/PSY 211/2014-2022 ONLY.csv")
#setup
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(psych)
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(stats)
library(dplyr)
library(ggplot2)
library(ggplot2)
library(ggeffects)
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
##
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following object is masked from 'package:purrr':
##
## compact
library(rcompanion)
##
## Attaching package: 'rcompanion'
##
## The following object is masked from 'package:psych':
##
## phi
#read in my data set, remove missing data coded as 99 and create new data set
WSself <- dplyr::select(MyData, CRISPST, SSIPST, GENDER, RACE, ROSPRE, GRADE,
ROSPST, LEVEL)
WSselfGrades <-na_if (WSself, "99")
WSselfGrades <- na.omit(WSselfGrades)
WSselfGrades$ROSPST <- as.numeric(WSselfGrades$ROSPST)
class(WSselfGrades$ROSPST)
## [1] "numeric"
WSselfGrades$CRISPST <- as.numeric(WSselfGrades$CRISPST)
class(WSselfGrades$CRISPST)
## [1] "numeric"
WSselfGrades$SSIPST <- as.numeric(WSselfGrades$SSIPST)
class(WSselfGrades$SSIPST)
## [1] "numeric"
#visualizing variables
ggplot(data = WSselfGrades, aes(x =WSselfGrades$ROSPST,
y= WSselfGrades$SSIPST)) +
geom_point() +
xlab("Self-Esteem Scores") +
ylab("Attitude Towards School") +
geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'
#running pearson’s r
library(psych)
r <- corr.test(WSselfGrades$ROSPST, WSselfGrades$SSIPST)
print(r,short=F)
## Call:corr.test(x = WSselfGrades$ROSPST, y = WSselfGrades$SSIPST)
## Correlation matrix
## [1] 0.47
## Sample Size
## [1] 920
## These are the unadjusted probability values.
## The probability values adjusted for multiple tests are in the p.adj object.
## [1] 0
##
## Confidence intervals based upon normal theory. To get bootstrapped values, try cor.ci
## raw.lower raw.r raw.upper raw.p lower.adj upper.adj
## NA-NA 0.42 0.47 0.52 0 0.42 0.52
#computing regression
reg <- lm(WSselfGrades$SSIPST~WSselfGrades$ROSPST, data=WSselfGrades)
summary(reg)
##
## Call:
## lm(formula = WSselfGrades$SSIPST ~ WSselfGrades$ROSPST, data = WSselfGrades)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.9197 -3.0998 0.5898 3.4903 10.8197
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.26086 0.55274 14.95 <2e-16 ***
## WSselfGrades$ROSPST 0.40995 0.02546 16.10 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.534 on 918 degrees of freedom
## Multiple R-squared: 0.2202, Adjusted R-squared: 0.2194
## F-statistic: 259.3 on 1 and 918 DF, p-value: < 2.2e-16
Confidence Intervals
confint(reg)
## 2.5 % 97.5 %
## (Intercept) 7.1760785 9.345636
## WSselfGrades$ROSPST 0.3599876 0.459921
#centering our data around the mean
WSselfGrades$cSSIPST<- scale(WSselfGrades$SSIPST,center=T,scale=F)
#RERUN regression
reg2 <- lm(WSselfGrades$ROSPST~WSselfGrades$cSSIPST,data=WSselfGrades)
summary(reg2)
##
## Call:
## lm(formula = WSselfGrades$ROSPST ~ WSselfGrades$cSSIPST, data = WSselfGrades)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.307 -3.530 -0.030 4.082 14.916
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 20.90109 0.17111 122.1 <2e-16 ***
## WSselfGrades$cSSIPST 0.53720 0.03336 16.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.19 on 918 degrees of freedom
## Multiple R-squared: 0.2202, Adjusted R-squared: 0.2194
## F-statistic: 259.3 on 1 and 918 DF, p-value: < 2.2e-16
#one sample #Null Hypotheses is the that the aver.is not significantly difference than the population #Alternative Hypotheses is the that the aver. is significantly difference than the population
library(report)
modelON <- t.test(WSselfGrades$ROSPST,mu=18)
print(modelON)
##
## One Sample t-test
##
## data: WSselfGrades$ROSPST
## t = 14.98, df = 919, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 18
## 95 percent confidence interval:
## 20.52100 21.28117
## sample estimates:
## mean of x
## 20.90109
report(modelON)
## Effect sizes were labelled following Cohen's (1988) recommendations.
##
## The One Sample t-test testing the difference between WSselfGrades$ROSPST (mean
## = 20.90) and mu = 18 suggests that the effect is positive, statistically
## significant, and small (difference = 2.90, 95% CI [20.52, 21.28], t(919) =
## 14.98, p < .001; Cohen's d = 0.49, 95% CI [0.43, 0.56])
#testing variance #p value = is not significant so we can retain the null and assumption have been met and move forward with running the T Test
var.test(WSselfGrades$ROSPRE,WSselfGrades$ROSPST)
##
## F test to compare two variances
##
## data: WSselfGrades$ROSPRE and WSselfGrades$ROSPST
## F = 0.89612, num df = 919, denom df = 919, p-value = 0.09659
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.7873628 1.0199028
## sample estimates:
## ratio of variances
## 0.8961213
two-sample/paired
modelPT <- t.test(WSselfGrades$ROSPRE,WSselfGrades$ROSPST,var.equal= T,paired= T)
print(modelPT)
##
## Paired t-test
##
## data: WSselfGrades$ROSPRE and WSselfGrades$ROSPST
## t = -2.7402, df = 919, p-value = 0.006259
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -0.7722941 -0.1277059
## sample estimates:
## mean difference
## -0.45
report(modelPT)
## Effect sizes were labelled following Cohen's (1988) recommendations.
##
## The Paired t-test testing the difference between WSselfGrades$ROSPRE and
## WSselfGrades$ROSPST (mean difference = -0.45) suggests that the effect is
## negative, statistically significant, and very small (difference = -0.45, 95% CI
## [-0.77, -0.13], t(919) = -2.74, p = 0.006; Cohen's d = -0.09, 95% CI [-0.16,
## -0.03])
#describe
describe(WSselfGrades$ROSPST,na.rm=T)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 920 20.9 5.87 21 21.17 5.93 3 30 27 -0.39 -0.31 0.19
describe(WSselfGrades$ROSPRE,na.rm=T)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 920 20.45 5.56 21 20.62 5.93 0 30 30 -0.32 -0.19 0.18
Levels <- subset(WSselfGrades,WSselfGrades$LEVEL=="1"|WSselfGrades$LEVEL=="2")
var.test(WSselfGrades$ROSPST~LEVEL,data=Levels)
##
## F test to compare two variances
##
## data: WSselfGrades$ROSPST by LEVEL
## F = 0.97475, num df = 446, denom df = 472, p-value = 0.785
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.8117163 1.1710858
## sample estimates:
## ratio of variances
## 0.9747469
#INDEPENDENT SAMPLE
modelIS <- t.test(WSselfGrades$ROSPST~LEVEL,data=Levels,var.equal= F)
print(modelIS)
##
## Welch Two Sample t-test
##
## data: WSselfGrades$ROSPST by LEVEL
## t = -2.9986, df = 916.24, p-value = 0.002785
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
## -1.9134345 -0.3995944
## sample estimates:
## mean in group 1 mean in group 2
## 20.30649 21.46300
report(modelIS)
## Warning: Unable to retrieve data from htest object. Returning an approximate
## effect size using t_to_d().
## Effect sizes were labelled following Cohen's (1988) recommendations.
##
## The Welch Two Sample t-test testing the difference of WSselfGrades$ROSPST by
## LEVEL (mean in group 1 = 20.31, mean in group 2 = 21.46) suggests that the
## effect is negative, statistically significant, and very small (difference =
## -1.16, 95% CI [-1.91, -0.40], t(916.24) = -3.00, p = 0.003; Cohen's d = -0.20,
## 95% CI [-0.33, -0.07])
pairedtest <- t.test(WSselfGrades$ROSPST~LEVEL,data=WSselfGrades,var.equal= F)
aggregate(WSselfGrades$ROSPST~LEVEL, Levels, sd)
## LEVEL WSselfGrades$ROSPST
## 1 1 5.810345
## 2 2 5.885130
#G Power Analysis pwr.t.test (d =.3, sig.level = .05, power = .8, type = “two.sample”,alternative = “two.sided”)
Two-sample t test power calculation
n = 175.3847
d = 0.3
sig.level = 0.05
power = 0.8
alternative = two.sided
NOTE: n is number in each group