This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
academic_performance <- read_excel("data_academic_performance.xlsx")
#First check that package required is installed, if not install it
# Specify your packages
needed_packages <- c("pastecs", "ggplot2", "semTools", "FSA")
# Extract not installed packages
not_installed <- needed_packages[!(needed_packages %in% installed.packages()[ , "Package"])]
# Install not installed packages
if(length(not_installed)) install.packages(not_installed)
library(pastecs) #For creating descriptive statistic summaries
##
## Attaching package: 'pastecs'
## The following objects are masked from 'package:dplyr':
##
## first, last
library(ggplot2) #For creating histograms with more detail than plot
library(semTools) #For skewness and kurtosis
## Warning: package 'semTools' was built under R version 4.0.3
## Loading required package: lavaan
## This is lavaan 0.6-7
## lavaan is BETA software! Please report any bugs.
##
## ###############################################################################
## This is semTools 0.5-3
## All users of R (or SEM) are invited to submit functions or ideas for functions.
## ###############################################################################
library(stargazer)
## Warning: package 'stargazer' was built under R version 4.0.3
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(sjstats)#chi-square effect size
## Warning: package 'sjstats' was built under R version 4.0.3
library(gmodels) #For creating histograms with more detail than plot
## Warning: package 'gmodels' was built under R version 4.0.3
##
## Attaching package: 'gmodels'
## The following object is masked from 'package:sjstats':
##
## ci
##Checking the representativeness of the data ##
academic_performance$GENDER<- as.factor(academic_performance$GENDER)
table(academic_performance$GENDER)
##
## F M
## 5043 7368
academic_performance$SCHOOL_NAT<- as.factor(academic_performance$SCHOOL_NAT)
table(academic_performance$SCHOOL_NAT)
##
## PRIVATE PUBLIC
## 6565 5846
##missing data handling
df_2<- academic_performance %>% dplyr::na_if(0)
df_2<- df_2 %>% dplyr::na_if('Not sure')
df_2<- df_2 %>% dplyr::na_if('Not apply')
myData<- na.omit(df_2)
myData$PEOPLE_HOUSE=as.factor(ifelse(myData$PEOPLE_HOUSE=="Once","One",myData$PEOPLE_HOUSE))
#We will allocate the histogram to a variable to allow use to manipulate it
# for maths
gg <- ggplot(myData, aes(x=MAT_S11))
#Change the label of the x axis
gg <- gg + labs(x="maths_score")
#manage binwidth and colours
gg <- gg + geom_histogram(binwidth=2, colour="black", aes(y=..density.., fill=..count..))
gg <- gg + scale_fill_gradient("Count", low="#DCDCDC", high="#7C7C7C")
#adding a normal curve
gg <- gg + stat_function(fun=dnorm, color="red",args=list(mean=mean(myData$MAT_S11, na.rm=TRUE), sd=sd(myData$MAT_S11, na.rm=TRUE)))
#to display the graph request the contents of the variable be shown
gg
#Create a qqplot
qqnorm(myData$MAT_S11)
qqline(myData$MAT_S11, col=2) #show a line on theplot
### Generate Summary Statistics
pastecs::stat.desc(myData$MAT_S11, basic=F)
## median mean SE.mean CI.mean.0.95 var std.dev
## 64.0000000 64.4024564 0.1156878 0.2267700 140.5686361 11.8561645
## coef.var
## 0.1840949
#We can make our decision based on the value of the standardised score for skew and kurtosis
#We divide the skew statistic by the standard error to get the standardised score
tpskew<-semTools::skew(myData$MAT_S11)
tpkurt<-semTools::kurtosis(myData$MAT_S11)
tpskew[1]/tpskew[2]
## skew (g1)
## 16.11356
tpkurt[1]/tpkurt[2]
## Excess Kur (g2)
## 1.847239
#and by calculating the percentage of standardised scores for the variable itself that are outside our acceptable range
#This will tell us how big a problem we have
# Calculate the percentage of standardised scores that are greated than 1.96
zMathscore<- abs(scale(myData$MAT_S11))
FSA::perc(as.numeric(zMathscore), 1.96, "gt")
## [1] 4.246406
FSA::perc(as.numeric(zMathscore), 3.29, "gt")
## [1] 0
#For critical reading
gg <- ggplot(myData, aes(x=CR_S11))
#Change the label of the x axis
gg <- gg + labs(x="Citcal_Reading_score")
#manage binwidth and colours
gg <- gg + geom_histogram(binwidth=2, colour="black", aes(y=..density.., fill=..count..))
gg <- gg + scale_fill_gradient("Count", low="#DCDCDC", high="#7C7C7C")
#adding a normal curve
#use stat_function to compute a normalised score for
gg <- gg + stat_function(fun=dnorm, color="red",args=list(mean=mean(myData$CR_S11, na.rm=TRUE), sd=sd(myData$CR_S11, na.rm=TRUE)))
#to display the graph request the contents of the variable be shown
gg
#Create a qqplot
qqnorm(myData$CR_S11)
qqline(myData$CR_S11, col=2) #show a line on theplot
### Generate Summary Statistics
pastecs::stat.desc(myData$CR_S11, basic=F)
## median mean SE.mean CI.mean.0.95 var std.dev
## 61.00000000 60.86956108 0.09795809 0.19201645 100.78454567 10.03915065
## coef.var
## 0.16492891
#We can make our decision based on the value of the standardised score for skew and kurtosis
#We divide the skew statistic by the standard error to get the standardised score
tpskew<-semTools::skew(myData$CR_S11)
tpkurt<-semTools::kurtosis(myData$CR_S11)
tpskew[1]/tpskew[2]
## skew (g1)
## 9.572668
tpkurt[1]/tpkurt[2]
## Excess Kur (g2)
## 9.651906
#and by calculating the percentage of standardised scores for the variable itself that are outside our acceptable range
#This will tell us how big a problem we have
# Calculate the percentage of standardised scores that are greated than 1.96
zCRscore<- abs(scale(myData$CR_S11))
FSA::perc(as.numeric(zCRscore), 1.96, "gt")
## [1] 5.560316
FSA::perc(as.numeric(zCRscore), 3.29, "gt")
## [1] 0.3903647
# t-test#
#for CR_S11#
#difference in critical writing score based on school nature#
#Get descriptive stastitics by group - output as a matrix
psych::describeBy(myData$CR_S11, myData$SCHOOL_NAT, mat=TRUE)
## item group1 vars n mean sd median trimmed mad min max
## X11 1 PRIVATE 1 5361 62.74408 9.835085 62.0 62.61203 8.8956 28 100
## X12 2 PUBLIC 1 5142 58.91521 9.876647 58.5 58.69397 9.6369 26 100
## range skew kurtosis se
## X11 72 0.2129527 0.6212798 0.1343245
## X12 74 0.2770183 0.4232489 0.1377347
#Conduct Levene's test for homogeneity of variance
car::leveneTest(CR_S11 ~ SCHOOL_NAT, data=myData)
## Registered S3 methods overwritten by 'car':
## method from
## influence.merMod lme4
## cooks.distance.influence.merMod lme4
## dfbeta.influence.merMod lme4
## dfbetas.influence.merMod lme4
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.563 0.4531
## 10501
#Conduct the t-test from package stats
res <- stats::t.test(CR_S11 ~ SCHOOL_NAT,var.equal=TRUE,data=myData)
res
##
## Two Sample t-test
##
## data: CR_S11 by SCHOOL_NAT
## t = 19.903, df = 10501, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 3.451782 4.205957
## sample estimates:
## mean in group PRIVATE mean in group PUBLIC
## 62.74408 58.91521
#Eta squared calculation
effes=round((res$statistic*res$statistic)/((res$statistic*res$statistic)+(res$parameter)),3)
effes
## t
## 0.036
#for MAT_S11#
#difference in math score based on school nature#
#Get descriptive stastitics by group - output as a matrix
psych::describeBy(myData$MAT_S11, myData$SCHOOL_NAT, mat=TRUE)
## item group1 vars n mean sd median trimmed mad min max
## X11 1 PRIVATE 1 5361 67.04346 12.10437 67 66.66146 11.8608 32 100
## X12 2 PUBLIC 1 5142 61.64897 10.93338 61 61.22460 10.3782 26 100
## range skew kurtosis se
## X11 68 0.3009775 -0.04596034 0.1653177
## X12 74 0.4078466 0.25677717 0.1524715
#Conduct Levene's test for homogeneity of variance
car::leveneTest(MAT_S11 ~ SCHOOL_NAT, data=myData)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 53.986 2.17e-13 ***
## 10501
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## does not meet the assumption i.e. non equal variance
#Conduct the t-test from package stats
#In this case we can use the var.equal = FALSE option to specify non-equal variances
res <- stats::t.test(MAT_S11 ~ SCHOOL_NAT,var.equal=FALSE,data=myData)
res
##
## Welch Two Sample t-test
##
## data: MAT_S11 by SCHOOL_NAT
## t = 23.987, df = 10463, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 4.953657 5.835329
## sample estimates:
## mean in group PRIVATE mean in group PUBLIC
## 67.04346 61.64897
#Eta squared calculation
effes=round((res$statistic*res$statistic)/((res$statistic*res$statistic)+(res$parameter)),3)
effes
## t
## 0.052
#### one way ANOVA#
#for CR_S11#
#Get descriptive stastitics by group - output as a matrix
psych::describeBy(myData$CR_S11, myData$SCHOOL_TYPE, mat=TRUE)
## item group1 vars n mean sd median trimmed mad
## X11 1 ACADEMIC 1 6547 61.92103 9.976892 62 61.77400 10.3782
## X12 2 TECHNICAL 1 895 58.74637 9.719524 58 58.64575 8.8956
## X13 3 TECHNICAL/ACADEMIC 1 3061 59.24142 9.951165 59 59.04655 10.3782
## min max range skew kurtosis se
## X11 28 100 72 0.2308671 0.4930983 0.1233031
## X12 32 100 68 0.2280661 0.4947679 0.3248879
## X13 26 100 74 0.2383928 0.4350684 0.1798632
#Conduct Bartlett's test for homogeneity of variance
stats::bartlett.test(CR_S11 ~ SCHOOL_TYPE, data=myData)
##
## Bartlett test of homogeneity of variances
##
## data: CR_S11 by SCHOOL_TYPE
## Bartlett's K-squared = 1.0611, df = 2, p-value = 0.5883
#In this case we can use Tukey as the post-hoc test option since variances in the groups are equal
userfriendlyscience::oneway(as.factor(myData$SCHOOL_TYPE),y=myData$CR_S11,posthoc='Tukey')
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
## ### Oneway Anova for y=CR_S11 and x=SCHOOL_TYPE (groups: ACADEMIC, TECHNICAL, TECHNICAL/ACADEMIC)
## Registered S3 methods overwritten by 'ufs':
## method from
## grid.draw.ggProportionPlot userfriendlyscience
## pander.associationMatrix userfriendlyscience
## pander.dataShape userfriendlyscience
## pander.descr userfriendlyscience
## pander.normalityAssessment userfriendlyscience
## print.CramersV userfriendlyscience
## print.associationMatrix userfriendlyscience
## print.confIntOmegaSq userfriendlyscience
## print.confIntV userfriendlyscience
## print.dataShape userfriendlyscience
## print.descr userfriendlyscience
## print.ggProportionPlot userfriendlyscience
## print.meanConfInt userfriendlyscience
## print.multiVarFreq userfriendlyscience
## print.normalityAssessment userfriendlyscience
## print.regrInfluential userfriendlyscience
## print.scaleDiagnosis userfriendlyscience
## print.scaleStructure userfriendlyscience
## print.scatterMatrix userfriendlyscience
## Omega squared: 95% CI = [.01; .02], point estimate = .02
## Eta Squared: 95% CI = [.01; .02], point estimate = .02
##
## SS Df MS F p
## Between groups (error + effect) 19387.11 2 9693.56 97.96 <.001
## Within groups (error only) 1039052.19 10500 98.96
##
##
## ### Post hoc test: Tukey
##
## diff lwr upr p adj
## TECHNICAL-ACADEMIC -3.17 -4.01 -2.34 <.001
## TECHNICAL/ACADEMIC-ACADEMIC -2.68 -3.19 -2.17 <.001
## TECHNICAL/ACADEMIC-TECHNICAL 0.5 -0.39 1.38 .39
#using AOV function
res2<-stats::aov(CR_S11 ~ SCHOOL_TYPE, data = myData)
res2
## Call:
## stats::aov(formula = CR_S11 ~ SCHOOL_TYPE, data = myData)
##
## Terms:
## SCHOOL_TYPE Residuals
## Sum of Squares 19387.1 1039052.2
## Deg. of Freedom 2 10500
##
## Residual standard error: 9.947731
## Estimated effects may be unbalanced
#Get the F statistic into a variable to make reporting easier
fstat<-summary(res2)[[1]][["F value"]][[1]]
fstat
## [1] 97.95691
#Get the p value into a variable to make reporting easier
aovpvalue<-summary(res2)[[1]][["Pr(>F)"]][[1]]
aovpvalue
## [1] 7.07739e-43
#Calculate effect
aoveta<-sjstats::eta_sq(res2)[2]
aoveta
## etasq
## 1 0.018
#for MAT_s11#
#Get descriptive stastitics by group - output as a matrix
psych::describeBy(myData$MAT_S11, myData$SCHOOL_TYPE, mat=TRUE)
## item group1 vars n mean sd median trimmed mad
## X11 1 ACADEMIC 1 6547 65.98503 12.05318 65 65.56270 11.8608
## X12 2 TECHNICAL 1 895 61.31620 11.04250 60 60.82845 10.3782
## X13 3 TECHNICAL/ACADEMIC 1 3061 61.91996 11.03259 61 61.53165 10.3782
## min max range skew kurtosis se
## X11 26 100 74 0.3356335 -0.02428624 0.1489636
## X12 31 100 69 0.4588160 0.39263192 0.3691101
## X13 32 100 68 0.4024213 0.27914741 0.1994095
#Conduct Bartlett's test for homogeneity of variance
stats::bartlett.test(MAT_S11 ~ SCHOOL_TYPE, data=myData)
##
## Bartlett test of homogeneity of variances
##
## data: MAT_S11 by SCHOOL_TYPE
## Bartlett's K-squared = 37.809, df = 2, p-value = 6.165e-09
#In this case we can use Games-Howell as the post-hoc test option since variances in the groups are not equal
userfriendlyscience::oneway(as.factor(myData$SCHOOL_TYPE),y=myData$MAT_S11,posthoc='Games-Howell')
## ### Oneway Anova for y=MAT_S11 and x=SCHOOL_TYPE (groups: ACADEMIC, TECHNICAL, TECHNICAL/ACADEMIC)
##
## Omega squared: 95% CI = [.02; .04], point estimate = .03
## Eta Squared: 95% CI = [.02; .04], point estimate = .03
##
## SS Df MS F p
## Between groups (error + effect) 43786.38 2 21893.19 160.48 <.001
## Within groups (error only) 1432465.44 10500 136.43
##
##
## ### Post hoc test: Games-Howell
##
## diff ci.lo ci.hi t df p
## TECHNICAL-ACADEMIC -4.67 -5.60 -3.73 11.73 1204.57 <.001
## TECHNICAL/ACADEMIC-ACADEMIC -4.07 -4.65 -3.48 16.33 6484.22 <.001
## TECHNICAL/ACADEMIC-TECHNICAL 0.60 -0.38 1.59 1.44 1455.78 .321
#using AOV function
res2<-stats::aov(MAT_S11 ~ SCHOOL_TYPE, data = myData)
res2
## Call:
## stats::aov(formula = MAT_S11 ~ SCHOOL_TYPE, data = myData)
##
## Terms:
## SCHOOL_TYPE Residuals
## Sum of Squares 43786.4 1432465.4
## Deg. of Freedom 2 10500
##
## Residual standard error: 11.68012
## Estimated effects may be unbalanced
#Get the F statistic into a variable to make reporting easier
fstat<-summary(res2)[[1]][["F value"]][[1]]
fstat
## [1] 160.4775
#Get the p value into a variable to make reporting easier
aovpvalue<-summary(res2)[[1]][["Pr(>F)"]][[1]]
aovpvalue
## [1] 2.235959e-69
#Calculate effect
aoveta<-sjstats::eta_sq(res2)[2]
aoveta
## etasq
## 1 0.03
###chisquare test#
##Comparing Nominal Variables ###Contingency table
#Use the Crosstable function
#CrossTable(predictor, outcome, chisq = TRUE, expected = TRUE)
gmodels::CrossTable(myData$PEOPLE_HOUSE, myData$SCHOOL_NAT, chisq = TRUE, expected = TRUE, sresid = TRUE, format = "SPSS")
##
## Cell Contents
## |-------------------------|
## | Count |
## | Expected Values |
## | Chi-square contribution |
## | Row Percent |
## | Column Percent |
## | Total Percent |
## | Std Residual |
## |-------------------------|
##
## Total Observations in Table: 10503
##
## | myData$SCHOOL_NAT
## myData$PEOPLE_HOUSE | PRIVATE | PUBLIC | Row Total |
## --------------------|-----------|-----------|-----------|
## Eight | 44 | 94 | 138 |
## | 70.439 | 67.561 | |
## | 9.924 | 10.346 | |
## | 31.884% | 68.116% | 1.314% |
## | 0.821% | 1.828% | |
## | 0.419% | 0.895% | |
## | -3.150 | 3.217 | |
## --------------------|-----------|-----------|-----------|
## Five | 1169 | 1307 | 2476 |
## | 1263.814 | 1212.186 | |
## | 7.113 | 7.416 | |
## | 47.213% | 52.787% | 23.574% |
## | 21.806% | 25.418% | |
## | 11.130% | 12.444% | |
## | -2.667 | 2.723 | |
## --------------------|-----------|-----------|-----------|
## Four | 2201 | 1821 | 4022 |
## | 2052.932 | 1969.068 | |
## | 10.679 | 11.134 | |
## | 54.724% | 45.276% | 38.294% |
## | 41.056% | 35.414% | |
## | 20.956% | 17.338% | |
## | 3.268 | -3.337 | |
## --------------------|-----------|-----------|-----------|
## Nueve | 19 | 43 | 62 |
## | 31.646 | 30.354 | |
## | 5.054 | 5.269 | |
## | 30.645% | 69.355% | 0.590% |
## | 0.354% | 0.836% | |
## | 0.181% | 0.409% | |
## | -2.248 | 2.295 | |
## --------------------|-----------|-----------|-----------|
## One | 14 | 12 | 26 |
## | 13.271 | 12.729 | |
## | 0.040 | 0.042 | |
## | 53.846% | 46.154% | 0.248% |
## | 0.261% | 0.233% | |
## | 0.133% | 0.114% | |
## | 0.200 | -0.204 | |
## --------------------|-----------|-----------|-----------|
## Seven | 121 | 205 | 326 |
## | 166.399 | 159.601 | |
## | 12.386 | 12.914 | |
## | 37.117% | 62.883% | 3.104% |
## | 2.257% | 3.987% | |
## | 1.152% | 1.952% | |
## | -3.519 | 3.594 | |
## --------------------|-----------|-----------|-----------|
## Six | 377 | 575 | 952 |
## | 485.925 | 466.075 | |
## | 24.417 | 25.457 | |
## | 39.601% | 60.399% | 9.064% |
## | 7.032% | 11.182% | |
## | 3.589% | 5.475% | |
## | -4.941 | 5.045 | |
## --------------------|-----------|-----------|-----------|
## Ten | 17 | 27 | 44 |
## | 22.459 | 21.541 | |
## | 1.327 | 1.383 | |
## | 38.636% | 61.364% | 0.419% |
## | 0.317% | 0.525% | |
## | 0.162% | 0.257% | |
## | -1.152 | 1.176 | |
## --------------------|-----------|-----------|-----------|
## Three | 1132 | 836 | 1968 |
## | 1004.518 | 963.482 | |
## | 16.179 | 16.868 | |
## | 57.520% | 42.480% | 18.738% |
## | 21.115% | 16.258% | |
## | 10.778% | 7.960% | |
## | 4.022 | -4.107 | |
## --------------------|-----------|-----------|-----------|
## Twelve or more | 8 | 23 | 31 |
## | 15.823 | 15.177 | |
## | 3.868 | 4.033 | |
## | 25.806% | 74.194% | 0.295% |
## | 0.149% | 0.447% | |
## | 0.076% | 0.219% | |
## | -1.967 | 2.008 | |
## --------------------|-----------|-----------|-----------|
## Two | 259 | 199 | 458 |
## | 233.775 | 224.225 | |
## | 2.722 | 2.838 | |
## | 56.550% | 43.450% | 4.361% |
## | 4.831% | 3.870% | |
## | 2.466% | 1.895% | |
## | 1.650 | -1.685 | |
## --------------------|-----------|-----------|-----------|
## Column Total | 5361 | 5142 | 10503 |
## | 51.043% | 48.957% | |
## --------------------|-----------|-----------|-----------|
##
##
## Statistics for All Table Factors
##
##
## Pearson's Chi-squared test
## ------------------------------------------------------------
## Chi^2 = 191.4071 d.f. = 10 p = 9.961248e-36
##
##
##
## Minimum expected frequency: 12.72893
#Create your contingency table
mytable<-xtabs(~SCHOOL_NAT+PEOPLE_HOUSE, data=myData)
mytable
## PEOPLE_HOUSE
## SCHOOL_NAT Eight Five Four Nueve One Seven Six Ten Three Twelve or more Two
## PRIVATE 44 1169 2201 19 14 121 377 17 1132 8 259
## PUBLIC 94 1307 1821 43 12 205 575 27 836 23 199
ctest<-stats::chisq.test(mytable, correct=TRUE)#chi square test
ctest
##
## Pearson's Chi-squared test
##
## data: mytable
## X-squared = 191.41, df = 10, p-value < 2.2e-16
ctest$expected#expected frequencies
## PEOPLE_HOUSE
## SCHOOL_NAT Eight Five Four Nueve One Seven Six
## PRIVATE 70.43873 1263.814 2052.932 31.64639 13.27107 166.3987 485.9252
## PUBLIC 67.56127 1212.186 1969.068 30.35361 12.72893 159.6013 466.0748
## PEOPLE_HOUSE
## SCHOOL_NAT Ten Three Twelve or more Two
## PRIVATE 22.45873 1004.5176 15.82319 233.7749
## PUBLIC 21.54127 963.4824 15.17681 224.2251
ctest$observed#observed frequencies
## PEOPLE_HOUSE
## SCHOOL_NAT Eight Five Four Nueve One Seven Six Ten Three Twelve or more Two
## PRIVATE 44 1169 2201 19 14 121 377 17 1132 8 259
## PUBLIC 94 1307 1821 43 12 205 575 27 836 23 199
ctest$p.value
## [1] 9.961248e-36
#effect size
sjstats::phi(mytable)
## [1] 0.1349965
cramer<-sjstats::cramer(mytable)
cramer
## [1] 0.1349965
myData$SCHOOL_NAT=ifelse(myData$SCHOOL_NAT== "PRIVATE", 0, ifelse(myData$SCHOOL_NAT == "PUBLIC", 1, NA))
myData$GENDER=ifelse(myData$GENDER== "M", 0, ifelse(myData$GENDER == "F", 1, NA))
#Homoscedasticity test#
##scatterplot
#aes(x,y)
scatter <- ggplot(myData, aes(SCHOOL_NAT, MAT_S11))
#Add a regression line
scatter + geom_point() + geom_smooth(method = "lm", colour = "Red", se = F) + labs(x = "school nature", y = "total grades in mathematics")
## `geom_smooth()` using formula 'y ~ x'
#Simple scatterplot
#aes(x,y)
scatter <- ggplot(myData, aes(SCHOOL_NAT, CR_S11))
#Add a regression line
scatter + geom_point() + geom_smooth(method = "lm", colour = "Red", se = F) + labs(x = "school nature", y = "total grades in critical reading")
## `geom_smooth()` using formula 'y ~ x'
##correlation test##
stats::cor.test( myData$SCHOOL_NAT, myData$CR_S11,method='spearman')
## Warning in cor.test.default(myData$SCHOOL_NAT, myData$CR_S11, method =
## "spearman"): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: myData$SCHOOL_NAT and myData$CR_S11
## S = 2.3082e+11, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.1952958
stats::cor.test( myData$SCHOOL_NAT, myData$MAT_S11,method='spearman')
## Warning in cor.test.default(myData$SCHOOL_NAT, myData$MAT_S11, method =
## "spearman"): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: myData$SCHOOL_NAT and myData$MAT_S11
## S = 2.3673e+11, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.2259508