This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(knitr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(ggplot2)
library(ggpubr)
library(modelr)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(broom)
##
## Attaching package: 'broom'
##
## The following object is masked from 'package:modelr':
##
## bootstrap
set.seed(123)
setwd("/Users/saitejaravulapalli/Documents/IUPUI_SEM 01/Intro to Statistic in R/DATA SET")
student_dropout <- read.csv("student dropout.csv" , sep= ";", header = TRUE)
anova_model <- aov(Admission.grade ~ Gender, data = student_dropout)
summary(anova_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## Gender 1 64 64.13 0.306 0.58
## Residuals 4422 927564 209.76
ggscatter(student_dropout, x = "Age.at.enrollment", y = "Admission.grade",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = "Age.at.enrollment", ylab = "Admission.grade")
mod1 <- lm(Admission.grade ~ Age.at.enrollment, data = student_dropout)
summary(mod1)
##
## Call:
## lm(formula = Admission.grade ~ Age.at.enrollment, data = student_dropout)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.222 -9.222 -0.979 7.735 64.149
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 128.30647 0.70204 182.76 <2e-16 ***
## Age.at.enrollment -0.05710 0.02869 -1.99 0.0466 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.48 on 4422 degrees of freedom
## Multiple R-squared: 0.0008949, Adjusted R-squared: 0.000669
## F-statistic: 3.961 on 1 and 4422 DF, p-value: 0.04663
plot(mod1)
mod2 <- lm(Admission.grade ~ Age.at.enrollment + Gender, data = student_dropout)
summary(mod2)
##
## Call:
## lm(formula = Admission.grade ~ Age.at.enrollment + Gender, data = student_dropout)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.494 -9.163 -1.003 7.797 63.965
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 128.25350 0.70476 181.982 <2e-16 ***
## Age.at.enrollment -0.06080 0.02901 -2.096 0.0362 *
## Gender 0.39594 0.46097 0.859 0.3904
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.48 on 4421 degrees of freedom
## Multiple R-squared: 0.001062, Adjusted R-squared: 0.0006097
## F-statistic: 2.349 on 2 and 4421 DF, p-value: 0.09556
plot(mod2)