library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
District_data <-read_excel("district.xls")
library(pastecs)
##
## Attaching package: 'pastecs'
## The following objects are masked from 'package:dplyr':
##
## first, last
## The following object is masked from 'package:tidyr':
##
## extract
Teach_Salary_Avg<-stat.desc(District_data['DPSTTOSA'])
stat.desc(Teach_Salary_Avg)
## DPSTTOSA
## nbr.val 1.400000e+01
## nbr.null 1.000000e+00
## nbr.na 0.000000e+00
## min 0.000000e+00
## max 6.492759e+07
## range 6.492759e+07
## sum 9.378960e+07
## median 2.071101e+04
## mean 6.699257e+06
## SE.mean 4.917483e+06
## CI.mean.0.95 1.062358e+07
## var 3.385430e+14
## std.dev 1.839954e+07
## coef.var 2.746504e+00
This variable is explores the average teacher salary across the different school districts in the dataset. These statistics tell us that 75% of the teachers receive a salary less than $69k. The minimum here is zero due to a few districts not providing the average salary for district teachers.
summary(Teach_Salary_Avg)
## DPSTTOSA
## Min. : 0
## 1st Qu.: 191
## Median : 20711
## Mean : 6699257
## 3rd Qu.: 69352
## Max. :64927589
hist(District_data$DPSTTOSA,breaks=15,probability = T)
Teach_Salary_log<-District_data %>% mutate(LOG_DPSTTOSA=log(DPSTTOSA)) %>% select(DISTNAME,LOG_DPSTTOSA)
head(Teach_Salary_log)
## # A tibble: 6 × 2
## DISTNAME LOG_DPSTTOSA
## <chr> <dbl>
## 1 CAYUGA ISD 10.9
## 2 ELKHART ISD 10.8
## 3 FRANKSTON ISD 10.8
## 4 NECHES ISD 10.9
## 5 PALESTINE ISD 10.8
## 6 WESTWOOD ISD 10.7
hist(Teach_Salary_log$LOG_DPSTTOSA,breaks=15,probability = T)