data <- read_excel("district.xls")
head(data)
## # A tibble: 6 × 137
##   DISTNAME DISTRICT DZCNTYNM REGION DZRATING DZCAMPUS DPETALLC DPETBLAP DPETHISP
##   <chr>    <chr>    <chr>    <chr>  <chr>       <dbl>    <dbl>    <dbl>    <dbl>
## 1 CAYUGA … 001902   001 AND… 07     A               3      574      4.4     11.5
## 2 ELKHART… 001903   001 AND… 07     A               4     1150      4       11.8
## 3 FRANKST… 001904   001 AND… 07     A               3      808      8.5     11.3
## 4 NECHES … 001906   001 AND… 07     A               2      342      8.2     13.5
## 5 PALESTI… 001907   001 AND… 07     B               6     3360     25.1     42.9
## 6 WESTWOO… 001908   001 AND… 07     B               4     1332     19.7     26.2
## # ℹ 128 more variables: DPETWHIP <dbl>, DPETINDP <dbl>, DPETASIP <dbl>,
## #   DPETPCIP <dbl>, DPETTWOP <dbl>, DPETECOP <dbl>, DPETLEPP <dbl>,
## #   DPETSPEP <dbl>, DPETBILP <dbl>, DPETVOCP <dbl>, DPETGIFP <dbl>,
## #   DA0AT21R <dbl>, DA0912DR21R <dbl>, DAGC4X21R <dbl>, DAGC5X20R <dbl>,
## #   DAGC6X19R <dbl>, DA0GR21N <dbl>, DA0GS21N <dbl>, DDA00A001S22R <dbl>,
## #   DDA00A001222R <dbl>, DDA00A001322R <dbl>, DDA00AR01S22R <dbl>,
## #   DDA00AR01222R <dbl>, DDA00AR01322R <dbl>, DDA00AM01S22R <dbl>, …
summary_stats <- stat.desc(data$DPSTEXPA)
print(summary_stats)
##      nbr.val     nbr.null       nbr.na          min          max        range 
## 1.204000e+03 5.000000e+00 3.000000e+00 0.000000e+00 2.290000e+01 2.290000e+01 
##          sum       median         mean      SE.mean CI.mean.0.95          var 
## 1.414260e+04 1.200000e+01 1.174635e+01 9.749004e-02 1.912694e-01 1.144319e+01 
##      std.dev     coef.var 
## 3.382778e+00 2.879856e-01

DPSTEXPA is the average years of experience for teachers in any of the school districts. The mean is 11.75 years, with a median of 12 years. The minimum value is 0 years, while the maximum is 22.9 years. This means that there is half of the districts have teachers with more than 12 years experience.

data_clean <- data %>% filter(!is.na(DPSTEXPA))
sum(is.na(data_clean$DPSTEXPA))
## [1] 0
ggplot(data_clean, aes(x = DPSTEXPA)) +
  geom_histogram(binwidth = 1, fill = "blue", color = "black") +
  theme_minimal() +
  labs(title = "Histogram of Teacher Experience", x = "Years of Experience", y = "Frequency")

install.packages("e1071")  
## Installing package into 'C:/Users/wamel/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'e1071' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'e1071'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\wamel\AppData\Local\R\win-library\4.4\00LOCK\e1071\libs\x64\e1071.dll
## to C:\Users\wamel\AppData\Local\R\win-library\4.4\e1071\libs\x64\e1071.dll:
## Permission denied
## Warning: restored 'e1071'
## 
## The downloaded binary packages are in
##  C:\Users\wamel\AppData\Local\Temp\RtmpugV2rj\downloaded_packages
library(e1071) 
## Warning: package 'e1071' was built under R version 4.4.3
skew_value <- skewness(data_clean$DPSTEXPA, na.rm = TRUE)
print(skew_value)
## [1] -0.5878227
data_clean <- data_clean %>%
  dplyr::mutate(sqrt_experience = sqrt(DPSTEXPA))
head(data_clean)
## # A tibble: 6 × 138
##   DISTNAME DISTRICT DZCNTYNM REGION DZRATING DZCAMPUS DPETALLC DPETBLAP DPETHISP
##   <chr>    <chr>    <chr>    <chr>  <chr>       <dbl>    <dbl>    <dbl>    <dbl>
## 1 CAYUGA … 001902   001 AND… 07     A               3      574      4.4     11.5
## 2 ELKHART… 001903   001 AND… 07     A               4     1150      4       11.8
## 3 FRANKST… 001904   001 AND… 07     A               3      808      8.5     11.3
## 4 NECHES … 001906   001 AND… 07     A               2      342      8.2     13.5
## 5 PALESTI… 001907   001 AND… 07     B               6     3360     25.1     42.9
## 6 WESTWOO… 001908   001 AND… 07     B               4     1332     19.7     26.2
## # ℹ 129 more variables: DPETWHIP <dbl>, DPETINDP <dbl>, DPETASIP <dbl>,
## #   DPETPCIP <dbl>, DPETTWOP <dbl>, DPETECOP <dbl>, DPETLEPP <dbl>,
## #   DPETSPEP <dbl>, DPETBILP <dbl>, DPETVOCP <dbl>, DPETGIFP <dbl>,
## #   DA0AT21R <dbl>, DA0912DR21R <dbl>, DAGC4X21R <dbl>, DAGC5X20R <dbl>,
## #   DAGC6X19R <dbl>, DA0GR21N <dbl>, DA0GS21N <dbl>, DDA00A001S22R <dbl>,
## #   DDA00A001222R <dbl>, DDA00A001322R <dbl>, DDA00AR01S22R <dbl>,
## #   DDA00AR01222R <dbl>, DDA00AR01322R <dbl>, DDA00AM01S22R <dbl>, …
ggplot(data_clean, aes(x = sqrt_experience)) +
  geom_histogram(binwidth = 1, fill = "gray70", color = "black") +
  theme_minimal() +
  labs(title = "Histogram of Square Root Transformed Teacher Experience", 
       x = "Square Root of Experience", 
       y = "Frequency")

install.packages("ggpubr")
## Installing package into 'C:/Users/wamel/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'ggpubr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\wamel\AppData\Local\Temp\RtmpugV2rj\downloaded_packages
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.4.3
hist_original <- ggplot(data_clean, aes(x = DPSTEXPA)) +
  geom_histogram(binwidth = 1, fill = "gray70", color = "black") +
  theme_minimal() +
  labs(title = "Original Teacher Experience Histogram", 
       x = "Years of Experience", 
       y = "Frequency")
hist_transformed <- ggplot(data_clean, aes(x = sqrt_experience)) +
  geom_histogram(binwidth = 1, fill = "gray70", color = "black") +
  theme_minimal() +
  labs(title = "Transformed (Square Root) Histogram", 
       x = "Square Root of Experience", 
       y = "Frequency")
ggarrange(hist_original, hist_transformed, ncol = 2, nrow = 1)

`ggarrange(hist_original, hist_transformed, ncol = 2, nrow = 1){r}


## R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.

When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:


``` r
summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.