library(knitr)
library(readxl)
MyData <- read.csv("~/Documents/Fall 2022/PSY 211/2014-2022 ONLY.csv")
#setup
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(psych)
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(stats)
library(dplyr)
library(ggplot2)
library(ggplot2)
library(ggeffects)
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
##
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following object is masked from 'package:purrr':
##
## compact
#read in my data set, remove missing data coded as 99 and create new data set
WSself <- dplyr::select(MyData, CRISPST, SSIPST, GENDER, RACE, GRADE,
ROSPST, LEVEL)
WSselfGrades <-na_if (WSself, "99")
WSselfGrades <- na.omit(WSselfGrades)
WSselfGrades$ROSPST <- as.numeric(WSselfGrades$ROSPST)
class(WSselfGrades$ROSPST)
## [1] "numeric"
WSselfGrades$CRISPST <- as.numeric(WSselfGrades$CRISPST)
class(WSselfGrades$CRISPST)
## [1] "numeric"
WSselfGrades$SSIPST <- as.numeric(WSselfGrades$SSIPST)
class(WSselfGrades$SSIPST)
## [1] "numeric"
#visualizing variables
ggplot(data = WSselfGrades, aes(x =WSselfGrades$ROSPST,
y= WSselfGrades$SSIPST)) +
geom_point() +
xlab("Self-Esteem Scores") +
ylab("Attitude Towards School") +
geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'
#running pearson’s r
library(psych)
r <- corr.test(WSselfGrades$ROSPST, WSselfGrades$SSIPST)
print(r,short=F)
## Call:corr.test(x = WSselfGrades$ROSPST, y = WSselfGrades$SSIPST)
## Correlation matrix
## [1] 0.47
## Sample Size
## [1] 920
## These are the unadjusted probability values.
## The probability values adjusted for multiple tests are in the p.adj object.
## [1] 0
##
## Confidence intervals based upon normal theory. To get bootstrapped values, try cor.ci
## raw.lower raw.r raw.upper raw.p lower.adj upper.adj
## NA-NA 0.42 0.47 0.52 0 0.42 0.52
#computing regression
reg <- lm(WSselfGrades$SSIPST~WSselfGrades$ROSPST, data=WSselfGrades)
summary(reg)
##
## Call:
## lm(formula = WSselfGrades$SSIPST ~ WSselfGrades$ROSPST, data = WSselfGrades)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.9197 -3.0998 0.5898 3.4903 10.8197
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.26086 0.55274 14.95 <2e-16 ***
## WSselfGrades$ROSPST 0.40995 0.02546 16.10 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.534 on 918 degrees of freedom
## Multiple R-squared: 0.2202, Adjusted R-squared: 0.2194
## F-statistic: 259.3 on 1 and 918 DF, p-value: < 2.2e-16
Confidence Intervals
confint(reg)
## 2.5 % 97.5 %
## (Intercept) 7.1760785 9.345636
## WSselfGrades$ROSPST 0.3599876 0.459921
#centering our data around the mean
WSselfGrades$cSSIPST<- scale(WSselfGrades$SSIPST,center=T,scale=F)
#RERUN regression
reg2 <- lm(WSselfGrades$ROSPST~WSselfGrades$cSSIPST,data=WSselfGrades)
summary(reg2)
##
## Call:
## lm(formula = WSselfGrades$ROSPST ~ WSselfGrades$cSSIPST, data = WSselfGrades)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.307 -3.530 -0.030 4.082 14.916
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 20.90109 0.17111 122.1 <2e-16 ***
## WSselfGrades$cSSIPST 0.53720 0.03336 16.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.19 on 918 degrees of freedom
## Multiple R-squared: 0.2202, Adjusted R-squared: 0.2194
## F-statistic: 259.3 on 1 and 918 DF, p-value: < 2.2e-16
fit=lm(WSselfGrades$ROSPST~WSselfGrades$SSIPST,data=WSselfGrades)
summary(fit)
##
## Call:
## lm(formula = WSselfGrades$ROSPST ~ WSselfGrades$SSIPST, data = WSselfGrades)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.307 -3.530 -0.030 4.082 14.916
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.86028 0.58697 20.21 <2e-16 ***
## WSselfGrades$SSIPST 0.53720 0.03336 16.10 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.19 on 918 degrees of freedom
## Multiple R-squared: 0.2202, Adjusted R-squared: 0.2194
## F-statistic: 259.3 on 1 and 918 DF, p-value: < 2.2e-16
ggplot(WSselfGrades,aes(y=WSselfGrades$ROSPST,x=WSselfGrades$SSIPST))+
geom_point()+geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
#one sample #Null Hypotheses is the that the aver.is not significantly difference than the population #Alternative Hypotheses is the that the aver. is significantly difference than the population
t.test(WSselfGrades$ROSPST,mu=18)
##
## One Sample t-test
##
## data: WSselfGrades$ROSPST
## t = 14.98, df = 919, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 18
## 95 percent confidence interval:
## 20.52100 21.28117
## sample estimates:
## mean of x
## 20.90109
#testing variance #p value = is not significant so we can retain the null and assumption have been met and move forward with running the T Test
var.test(WSselfGrades$ROSPST,WSselfGrades$SSIPST)
##
## F test to compare two variances
##
## data: WSselfGrades$ROSPST and WSselfGrades$SSIPST
## F = 1.3104, num df = 919, denom df = 919, p-value = 4.309e-05
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 1.151364 1.491408
## sample estimates:
## ratio of variances
## 1.310402
two-sample/paired
t.test(WSselfGrades$ROSPST,WSselfGrades$SSIPST,var.equal= T,paired= T)
##
## Paired t-test
##
## data: WSselfGrades$ROSPST and WSselfGrades$SSIPST
## t = 21.648, df = 919, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 3.702603 4.440875
## sample estimates:
## mean difference
## 4.071739
#describe
describe(WSselfGrades$ROSPST,na.rm=T)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 920 20.9 5.87 21 21.17 5.93 3 30 27 -0.39 -0.31 0.19
describe(WSselfGrades$SSIPST,na.rm=T)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 920 16.83 5.13 18 17.16 5.93 0 25 25 -0.52 -0.46 0.17
Levels <- subset(WSselfGrades,WSselfGrades$LEVEL=="1"|WSselfGrades$LEVE=="2")
var.test(WSselfGrades$ROSPST~LEVEL,data=Levels)
##
## F test to compare two variances
##
## data: WSselfGrades$ROSPST by LEVEL
## F = 0.97475, num df = 446, denom df = 472, p-value = 0.785
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.8117163 1.1710858
## sample estimates:
## ratio of variances
## 0.9747469
t.test(WSselfGrades$ROSPST~LEVEL,data=Levels,var.equal= F)
##
## Welch Two Sample t-test
##
## data: WSselfGrades$ROSPST by LEVEL
## t = -2.9986, df = 916.24, p-value = 0.002785
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
## -1.9134345 -0.3995944
## sample estimates:
## mean in group 1 mean in group 2
## 20.30649 21.46300
aggregate(WSselfGrades$ROSPST~LEVEL, Levels, sd)
## LEVEL WSselfGrades$ROSPST
## 1 1 5.810345
## 2 2 5.885130