library(readxl)
Training <- read_excel("C:/Users/celin/OneDrive/Desktop/Applied Quant Class/Training.xls")
View(Training)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(pastecs)
##
## Attaching package: 'pastecs'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## The following object is masked from 'package:tidyr':
##
## extract
Training_Cleaned<-Training
Training_Cleaned$`All Students (7/1/2018 - 6/30/2022)`<-as.numeric(Training_Cleaned$`All Students (7/1/2018 - 6/30/2022)`)
## Warning: NAs introduced by coercion
Training_Cleaned$`Percent Received Credential`<-as.numeric(Training_Cleaned$`Percent Received Credential`)
## Warning: NAs introduced by coercion
Training_Cleaned$`All Students Percent Successfully Completed`<-as.numeric(Training_Cleaned$`All Students Successfully Completed Program(7/1/2018 - 6/30/2022)`)
## Warning: NAs introduced by coercion
Training_Cleaned_Filtered<-Training_Cleaned %>% select(`County`,`Provider
Name`,`All Students (7/1/2018 - 6/30/2022)`,`Percent Received Credential`,`All Students Percent Successfully Completed`) %>% na.omit
pastecs::stat.desc(Training_Cleaned_Filtered$`All Students (7/1/2018 - 6/30/2022)`)
## nbr.val nbr.null nbr.na min max range
## 1.912000e+03 0.000000e+00 0.000000e+00 5.000000e+00 6.366000e+03 6.361000e+03
## sum median mean SE.mean CI.mean.0.95 var
## 4.066900e+05 1.030000e+02 2.127040e+02 8.621951e+00 1.690942e+01 1.421343e+05
## std.dev coef.var
## 3.770071e+02 1.772450e+00
hist(Training_Cleaned_Filtered$`All Students (7/1/2018 - 6/30/2022)`,breaks=10,probability = T)
lines(density(Training_Cleaned_Filtered$`All Students (7/1/2018 - 6/30/2022)`),col='red',lwd=2)

ggplot(Training_Cleaned_Filtered,aes(x=`All Students (7/1/2018 - 6/30/2022)`,y=`All Students Percent Successfully Completed`)) + geom_point() + ggtitle("Previous")

Previous_sqrt<-Training_Cleaned_Filtered %>% mutate(ALLSTUDENTSSQRT=sqrt(`All Students (7/1/2018 - 6/30/2022)`)) %>% select(`All Students (7/1/2018 - 6/30/2022)`,ALLSTUDENTSSQRT)
head(Previous_sqrt)
## # A tibble: 6 × 2
## `All Students (7/1/2018 - 6/30/2022)` ALLSTUDENTSSQRT
## <dbl> <dbl>
## 1 38 6.16
## 2 27 5.20
## 3 12 3.46
## 4 198 14.1
## 5 134 11.6
## 6 20 4.47
hist(Previous_sqrt$ALLSTUDENTSSQRT,breaks=10,probability = T)
lines(density(Previous_sqrt$ALLSTUDENTSSQRT),col='red',lwd=2)
