library(knitr)

library(readxl)

MyData <- read.csv("~/Documents/Fall 2022/PSY 211/2014-2022 ONLY.csv")

#setup

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(psych)
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(stats)
library(dplyr)
library(ggplot2)
library(ggplot2)
library(ggeffects)
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## 
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## 
## The following object is masked from 'package:purrr':
## 
##     compact

#read in my data set, remove missing data coded as 99 and create new data set

WSself <- dplyr::select(MyData, CRISPST, SSIPST, GENDER, RACE, GRADE, 
                        ROSPST, LEVEL)

WSselfGrades <-na_if (WSself, "99")
WSselfGrades <- na.omit(WSselfGrades)
WSselfGrades$ROSPST <- as.numeric(WSselfGrades$ROSPST)
class(WSselfGrades$ROSPST)
## [1] "numeric"
WSselfGrades$CRISPST <- as.numeric(WSselfGrades$CRISPST)
class(WSselfGrades$CRISPST)
## [1] "numeric"
WSselfGrades$SSIPST <- as.numeric(WSselfGrades$SSIPST)
class(WSselfGrades$SSIPST)
## [1] "numeric"

#visualizing variables

ggplot(data = WSselfGrades, aes(x =WSselfGrades$ROSPST,
                                y= WSselfGrades$SSIPST)) + 
  geom_point() +
  xlab("Self-Esteem Scores") +
  ylab("Attitude Towards School") +
  geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'

#running pearson’s r

library(psych)
r <- corr.test(WSselfGrades$ROSPST, WSselfGrades$SSIPST)
print(r,short=F)
## Call:corr.test(x = WSselfGrades$ROSPST, y = WSselfGrades$SSIPST)
## Correlation matrix 
## [1] 0.47
## Sample Size 
## [1] 920
## These are the unadjusted probability values.
##   The probability values  adjusted for multiple tests are in the p.adj object. 
## [1] 0
## 
##  Confidence intervals based upon normal theory.  To get bootstrapped values, try cor.ci
##       raw.lower raw.r raw.upper raw.p lower.adj upper.adj
## NA-NA      0.42  0.47      0.52     0      0.42      0.52

#computing regression

reg <- lm(WSselfGrades$SSIPST~WSselfGrades$ROSPST, data=WSselfGrades)
summary(reg)
## 
## Call:
## lm(formula = WSselfGrades$SSIPST ~ WSselfGrades$ROSPST, data = WSselfGrades)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -18.9197  -3.0998   0.5898   3.4903  10.8197 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          8.26086    0.55274   14.95   <2e-16 ***
## WSselfGrades$ROSPST  0.40995    0.02546   16.10   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.534 on 918 degrees of freedom
## Multiple R-squared:  0.2202, Adjusted R-squared:  0.2194 
## F-statistic: 259.3 on 1 and 918 DF,  p-value: < 2.2e-16

Confidence Intervals

confint(reg)
##                         2.5 %   97.5 %
## (Intercept)         7.1760785 9.345636
## WSselfGrades$ROSPST 0.3599876 0.459921

#centering our data around the mean

WSselfGrades$cSSIPST<- scale(WSselfGrades$SSIPST,center=T,scale=F)

#RERUN regression

reg2 <- lm(WSselfGrades$ROSPST~WSselfGrades$cSSIPST,data=WSselfGrades)
summary(reg2)
## 
## Call:
## lm(formula = WSselfGrades$ROSPST ~ WSselfGrades$cSSIPST, data = WSselfGrades)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.307  -3.530  -0.030   4.082  14.916 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          20.90109    0.17111   122.1   <2e-16 ***
## WSselfGrades$cSSIPST  0.53720    0.03336    16.1   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.19 on 918 degrees of freedom
## Multiple R-squared:  0.2202, Adjusted R-squared:  0.2194 
## F-statistic: 259.3 on 1 and 918 DF,  p-value: < 2.2e-16

Plotting Regression - is this correct. I got errors when running my data using the codes

fit=lm(WSselfGrades$ROSPST~WSselfGrades$SSIPST,data=WSselfGrades)
summary(fit)
## 
## Call:
## lm(formula = WSselfGrades$ROSPST ~ WSselfGrades$SSIPST, data = WSselfGrades)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.307  -3.530  -0.030   4.082  14.916 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         11.86028    0.58697   20.21   <2e-16 ***
## WSselfGrades$SSIPST  0.53720    0.03336   16.10   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.19 on 918 degrees of freedom
## Multiple R-squared:  0.2202, Adjusted R-squared:  0.2194 
## F-statistic: 259.3 on 1 and 918 DF,  p-value: < 2.2e-16
ggplot(WSselfGrades,aes(y=WSselfGrades$ROSPST,x=WSselfGrades$SSIPST))+
  geom_point()+geom_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'

#one sample #Null Hypotheses is the that the aver.is not significantly difference than the population #Alternative Hypotheses is the that the aver. is significantly difference than the population

t.test(WSselfGrades$ROSPST,mu=18)
## 
##  One Sample t-test
## 
## data:  WSselfGrades$ROSPST
## t = 14.98, df = 919, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 18
## 95 percent confidence interval:
##  20.52100 21.28117
## sample estimates:
## mean of x 
##  20.90109

#testing variance #p value = is not significant so we can retain the null and assumption have been met and move forward with running the T Test

var.test(WSselfGrades$ROSPST,WSselfGrades$SSIPST)
## 
##  F test to compare two variances
## 
## data:  WSselfGrades$ROSPST and WSselfGrades$SSIPST
## F = 1.3104, num df = 919, denom df = 919, p-value = 4.309e-05
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  1.151364 1.491408
## sample estimates:
## ratio of variances 
##           1.310402

two-sample/paired

t.test(WSselfGrades$ROSPST,WSselfGrades$SSIPST,var.equal= T,paired= T)
## 
##  Paired t-test
## 
## data:  WSselfGrades$ROSPST and WSselfGrades$SSIPST
## t = 21.648, df = 919, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  3.702603 4.440875
## sample estimates:
## mean difference 
##        4.071739

#describe

describe(WSselfGrades$ROSPST,na.rm=T)
##    vars   n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 920 20.9 5.87     21   21.17 5.93   3  30    27 -0.39    -0.31 0.19
describe(WSselfGrades$SSIPST,na.rm=T)
##    vars   n  mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 920 16.83 5.13     18   17.16 5.93   0  25    25 -0.52    -0.46 0.17
Levels <- subset(WSselfGrades,WSselfGrades$LEVEL=="1"|WSselfGrades$LEVE=="2")
var.test(WSselfGrades$ROSPST~LEVEL,data=Levels)
## 
##  F test to compare two variances
## 
## data:  WSselfGrades$ROSPST by LEVEL
## F = 0.97475, num df = 446, denom df = 472, p-value = 0.785
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.8117163 1.1710858
## sample estimates:
## ratio of variances 
##          0.9747469
t.test(WSselfGrades$ROSPST~LEVEL,data=Levels,var.equal= F)
## 
##  Welch Two Sample t-test
## 
## data:  WSselfGrades$ROSPST by LEVEL
## t = -2.9986, df = 916.24, p-value = 0.002785
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
##  -1.9134345 -0.3995944
## sample estimates:
## mean in group 1 mean in group 2 
##        20.30649        21.46300
aggregate(WSselfGrades$ROSPST~LEVEL, Levels, sd)
##   LEVEL WSselfGrades$ROSPST
## 1     1            5.810345
## 2     2            5.885130