library(readxl)

MyData <- read.csv("~/Documents/Fall 2022/PSY 211/2014-2022 ONLY.csv")

#setup

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(psych)
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(stats)
library(dplyr)
library(ggplot2)
library(ggplot2)
library(ggeffects)
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## 
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## 
## The following object is masked from 'package:purrr':
## 
##     compact
library(rcompanion)
## 
## Attaching package: 'rcompanion'
## 
## The following object is masked from 'package:psych':
## 
##     phi

#read in my data set, remove missing data coded as 99 and create new data set

WSself <- dplyr::select(MyData, CRISPST, SSIPST, GENDER, RACE, ROSPRE, GRADE, 
                        ROSPST, LEVEL)

WSselfGrades <-na_if (WSself, "99")
WSselfGrades <- na.omit(WSselfGrades)
WSselfGrades$ROSPST <- as.numeric(WSselfGrades$ROSPST)
class(WSselfGrades$ROSPST)
## [1] "numeric"
WSselfGrades$CRISPST <- as.numeric(WSselfGrades$CRISPST)
class(WSselfGrades$CRISPST)
## [1] "numeric"
WSselfGrades$SSIPST <- as.numeric(WSselfGrades$SSIPST)
class(WSselfGrades$SSIPST)
## [1] "numeric"

#visualizing variables

ggplot(data = WSselfGrades, aes(x =WSselfGrades$ROSPST,
                                y= WSselfGrades$SSIPST)) + 
  geom_point() +
  xlab("Self-Esteem Scores") +
  ylab("Attitude Towards School") +
  geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'

#running pearson’s r

library(psych)
r <- corr.test(WSselfGrades$ROSPST, WSselfGrades$SSIPST)
print(r,short=F)
## Call:corr.test(x = WSselfGrades$ROSPST, y = WSselfGrades$SSIPST)
## Correlation matrix 
## [1] 0.47
## Sample Size 
## [1] 920
## These are the unadjusted probability values.
##   The probability values  adjusted for multiple tests are in the p.adj object. 
## [1] 0
## 
##  Confidence intervals based upon normal theory.  To get bootstrapped values, try cor.ci
##       raw.lower raw.r raw.upper raw.p lower.adj upper.adj
## NA-NA      0.42  0.47      0.52     0      0.42      0.52

#computing regression

reg <- lm(WSselfGrades$SSIPST~WSselfGrades$ROSPST, data=WSselfGrades)
summary(reg)
## 
## Call:
## lm(formula = WSselfGrades$SSIPST ~ WSselfGrades$ROSPST, data = WSselfGrades)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -18.9197  -3.0998   0.5898   3.4903  10.8197 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          8.26086    0.55274   14.95   <2e-16 ***
## WSselfGrades$ROSPST  0.40995    0.02546   16.10   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.534 on 918 degrees of freedom
## Multiple R-squared:  0.2202, Adjusted R-squared:  0.2194 
## F-statistic: 259.3 on 1 and 918 DF,  p-value: < 2.2e-16

Confidence Intervals

confint(reg)
##                         2.5 %   97.5 %
## (Intercept)         7.1760785 9.345636
## WSselfGrades$ROSPST 0.3599876 0.459921

#centering our data around the mean

WSselfGrades$cSSIPST<- scale(WSselfGrades$SSIPST,center=T,scale=F)

#RERUN regression

reg2 <- lm(WSselfGrades$ROSPST~WSselfGrades$cSSIPST,data=WSselfGrades)
summary(reg2)
## 
## Call:
## lm(formula = WSselfGrades$ROSPST ~ WSselfGrades$cSSIPST, data = WSselfGrades)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.307  -3.530  -0.030   4.082  14.916 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          20.90109    0.17111   122.1   <2e-16 ***
## WSselfGrades$cSSIPST  0.53720    0.03336    16.1   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.19 on 918 degrees of freedom
## Multiple R-squared:  0.2202, Adjusted R-squared:  0.2194 
## F-statistic: 259.3 on 1 and 918 DF,  p-value: < 2.2e-16

#one sample #Null Hypotheses is the that the aver.is not significantly difference than the population #Alternative Hypotheses is the that the aver. is significantly difference than the population

library(report)
modelON <- t.test(WSselfGrades$ROSPST,mu=18)
print(modelON)
## 
##  One Sample t-test
## 
## data:  WSselfGrades$ROSPST
## t = 14.98, df = 919, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 18
## 95 percent confidence interval:
##  20.52100 21.28117
## sample estimates:
## mean of x 
##  20.90109
report(modelON)
## Effect sizes were labelled following Cohen's (1988) recommendations.
## 
## The One Sample t-test testing the difference between WSselfGrades$ROSPST (mean
## = 20.90) and mu = 18 suggests that the effect is positive, statistically
## significant, and small (difference = 2.90, 95% CI [20.52, 21.28], t(919) =
## 14.98, p < .001; Cohen's d = 0.49, 95% CI [0.43, 0.56])

#testing variance #p value = is not significant so we can retain the null and assumption have been met and move forward with running the T Test

var.test(WSselfGrades$ROSPRE,WSselfGrades$ROSPST)
## 
##  F test to compare two variances
## 
## data:  WSselfGrades$ROSPRE and WSselfGrades$ROSPST
## F = 0.89612, num df = 919, denom df = 919, p-value = 0.09659
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.7873628 1.0199028
## sample estimates:
## ratio of variances 
##          0.8961213

two-sample/paired

modelPT <- t.test(WSselfGrades$ROSPRE,WSselfGrades$ROSPST,var.equal= T,paired= T)
print(modelPT)
## 
##  Paired t-test
## 
## data:  WSselfGrades$ROSPRE and WSselfGrades$ROSPST
## t = -2.7402, df = 919, p-value = 0.006259
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.7722941 -0.1277059
## sample estimates:
## mean difference 
##           -0.45
report(modelPT)
## Effect sizes were labelled following Cohen's (1988) recommendations.
## 
## The Paired t-test testing the difference between WSselfGrades$ROSPRE and
## WSselfGrades$ROSPST (mean difference = -0.45) suggests that the effect is
## negative, statistically significant, and very small (difference = -0.45, 95% CI
## [-0.77, -0.13], t(919) = -2.74, p = 0.006; Cohen's d = -0.09, 95% CI [-0.16,
## -0.03])

#describe

describe(WSselfGrades$ROSPST,na.rm=T)
##    vars   n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 920 20.9 5.87     21   21.17 5.93   3  30    27 -0.39    -0.31 0.19
describe(WSselfGrades$ROSPRE,na.rm=T)
##    vars   n  mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 920 20.45 5.56     21   20.62 5.93   0  30    30 -0.32    -0.19 0.18
Levels <- subset(WSselfGrades,WSselfGrades$LEVEL=="1"|WSselfGrades$LEVEL=="2")
var.test(WSselfGrades$ROSPST~LEVEL,data=Levels)
## 
##  F test to compare two variances
## 
## data:  WSselfGrades$ROSPST by LEVEL
## F = 0.97475, num df = 446, denom df = 472, p-value = 0.785
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.8117163 1.1710858
## sample estimates:
## ratio of variances 
##          0.9747469

#INDEPENDENT SAMPLE

modelIS <- t.test(WSselfGrades$ROSPST~LEVEL,data=Levels,var.equal= F)
print(modelIS)
## 
##  Welch Two Sample t-test
## 
## data:  WSselfGrades$ROSPST by LEVEL
## t = -2.9986, df = 916.24, p-value = 0.002785
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
##  -1.9134345 -0.3995944
## sample estimates:
## mean in group 1 mean in group 2 
##        20.30649        21.46300
report(modelIS)
## Warning: Unable to retrieve data from htest object. Returning an approximate
##   effect size using t_to_d().
## Effect sizes were labelled following Cohen's (1988) recommendations.
## 
## The Welch Two Sample t-test testing the difference of WSselfGrades$ROSPST by
## LEVEL (mean in group 1 = 20.31, mean in group 2 = 21.46) suggests that the
## effect is negative, statistically significant, and very small (difference =
## -1.16, 95% CI [-1.91, -0.40], t(916.24) = -3.00, p = 0.003; Cohen's d = -0.20,
## 95% CI [-0.33, -0.07])
pairedtest <- t.test(WSselfGrades$ROSPST~LEVEL,data=WSselfGrades,var.equal= F)
aggregate(WSselfGrades$ROSPST~LEVEL, Levels, sd)
##   LEVEL WSselfGrades$ROSPST
## 1     1            5.810345
## 2     2            5.885130

#G Power Analysis pwr.t.test (d =.3, sig.level = .05, power = .8, type = “two.sample”,alternative = “two.sided”)

Two-sample t test power calculation

          n = 175.3847
          d = 0.3
  sig.level = 0.05
      power = 0.8
alternative = two.sided

NOTE: n is number in each group