library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)

District_data <-read_excel("district.xls")
library(pastecs)
## 
## Attaching package: 'pastecs'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## The following object is masked from 'package:tidyr':
## 
##     extract
Teach_Salary_Avg<-stat.desc(District_data['DPSTTOSA'])
stat.desc(Teach_Salary_Avg)
##                  DPSTTOSA
## nbr.val      1.400000e+01
## nbr.null     1.000000e+00
## nbr.na       0.000000e+00
## min          0.000000e+00
## max          6.492759e+07
## range        6.492759e+07
## sum          9.378960e+07
## median       2.071101e+04
## mean         6.699257e+06
## SE.mean      4.917483e+06
## CI.mean.0.95 1.062358e+07
## var          3.385430e+14
## std.dev      1.839954e+07
## coef.var     2.746504e+00

This variable is explores the average teacher salary across the different school districts in the dataset. These statistics tell us that 75% of the teachers receive a salary less than $69k. The minimum here is zero due to a few districts not providing the average salary for district teachers.

summary(Teach_Salary_Avg)
##     DPSTTOSA       
##  Min.   :       0  
##  1st Qu.:     191  
##  Median :   20711  
##  Mean   : 6699257  
##  3rd Qu.:   69352  
##  Max.   :64927589
hist(District_data$DPSTTOSA,breaks=15,probability = T)

Teach_Salary_log<-District_data %>% mutate(LOG_DPSTTOSA=log(DPSTTOSA)) %>% select(DISTNAME,LOG_DPSTTOSA)

head(Teach_Salary_log)
## # A tibble: 6 × 2
##   DISTNAME      LOG_DPSTTOSA
##   <chr>                <dbl>
## 1 CAYUGA ISD            10.9
## 2 ELKHART ISD           10.8
## 3 FRANKSTON ISD         10.8
## 4 NECHES ISD            10.9
## 5 PALESTINE ISD         10.8
## 6 WESTWOOD ISD          10.7
hist(Teach_Salary_log$LOG_DPSTTOSA,breaks=15,probability = T)