data <- read_excel("district.xls")
head(data)
## # A tibble: 6 × 137
## DISTNAME DISTRICT DZCNTYNM REGION DZRATING DZCAMPUS DPETALLC DPETBLAP DPETHISP
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 CAYUGA … 001902 001 AND… 07 A 3 574 4.4 11.5
## 2 ELKHART… 001903 001 AND… 07 A 4 1150 4 11.8
## 3 FRANKST… 001904 001 AND… 07 A 3 808 8.5 11.3
## 4 NECHES … 001906 001 AND… 07 A 2 342 8.2 13.5
## 5 PALESTI… 001907 001 AND… 07 B 6 3360 25.1 42.9
## 6 WESTWOO… 001908 001 AND… 07 B 4 1332 19.7 26.2
## # ℹ 128 more variables: DPETWHIP <dbl>, DPETINDP <dbl>, DPETASIP <dbl>,
## # DPETPCIP <dbl>, DPETTWOP <dbl>, DPETECOP <dbl>, DPETLEPP <dbl>,
## # DPETSPEP <dbl>, DPETBILP <dbl>, DPETVOCP <dbl>, DPETGIFP <dbl>,
## # DA0AT21R <dbl>, DA0912DR21R <dbl>, DAGC4X21R <dbl>, DAGC5X20R <dbl>,
## # DAGC6X19R <dbl>, DA0GR21N <dbl>, DA0GS21N <dbl>, DDA00A001S22R <dbl>,
## # DDA00A001222R <dbl>, DDA00A001322R <dbl>, DDA00AR01S22R <dbl>,
## # DDA00AR01222R <dbl>, DDA00AR01322R <dbl>, DDA00AM01S22R <dbl>, …
summary_stats <- stat.desc(data$DPSTEXPA)
print(summary_stats)
## nbr.val nbr.null nbr.na min max range
## 1.204000e+03 5.000000e+00 3.000000e+00 0.000000e+00 2.290000e+01 2.290000e+01
## sum median mean SE.mean CI.mean.0.95 var
## 1.414260e+04 1.200000e+01 1.174635e+01 9.749004e-02 1.912694e-01 1.144319e+01
## std.dev coef.var
## 3.382778e+00 2.879856e-01
DPSTEXPA is the average years of experience for teachers in any of the school districts. The mean is 11.75 years, with a median of 12 years. The minimum value is 0 years, while the maximum is 22.9 years. This means that there is half of the districts have teachers with more than 12 years experience.
data_clean <- data %>% filter(!is.na(DPSTEXPA))
sum(is.na(data_clean$DPSTEXPA))
## [1] 0
ggplot(data_clean, aes(x = DPSTEXPA)) +
geom_histogram(binwidth = 1, fill = "blue", color = "black") +
theme_minimal() +
labs(title = "Histogram of Teacher Experience", x = "Years of Experience", y = "Frequency")
install.packages("e1071")
## Installing package into 'C:/Users/wamel/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'e1071' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'e1071'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\wamel\AppData\Local\R\win-library\4.4\00LOCK\e1071\libs\x64\e1071.dll
## to C:\Users\wamel\AppData\Local\R\win-library\4.4\e1071\libs\x64\e1071.dll:
## Permission denied
## Warning: restored 'e1071'
##
## The downloaded binary packages are in
## C:\Users\wamel\AppData\Local\Temp\RtmpugV2rj\downloaded_packages
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.3
skew_value <- skewness(data_clean$DPSTEXPA, na.rm = TRUE)
print(skew_value)
## [1] -0.5878227
data_clean <- data_clean %>%
dplyr::mutate(sqrt_experience = sqrt(DPSTEXPA))
head(data_clean)
## # A tibble: 6 × 138
## DISTNAME DISTRICT DZCNTYNM REGION DZRATING DZCAMPUS DPETALLC DPETBLAP DPETHISP
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 CAYUGA … 001902 001 AND… 07 A 3 574 4.4 11.5
## 2 ELKHART… 001903 001 AND… 07 A 4 1150 4 11.8
## 3 FRANKST… 001904 001 AND… 07 A 3 808 8.5 11.3
## 4 NECHES … 001906 001 AND… 07 A 2 342 8.2 13.5
## 5 PALESTI… 001907 001 AND… 07 B 6 3360 25.1 42.9
## 6 WESTWOO… 001908 001 AND… 07 B 4 1332 19.7 26.2
## # ℹ 129 more variables: DPETWHIP <dbl>, DPETINDP <dbl>, DPETASIP <dbl>,
## # DPETPCIP <dbl>, DPETTWOP <dbl>, DPETECOP <dbl>, DPETLEPP <dbl>,
## # DPETSPEP <dbl>, DPETBILP <dbl>, DPETVOCP <dbl>, DPETGIFP <dbl>,
## # DA0AT21R <dbl>, DA0912DR21R <dbl>, DAGC4X21R <dbl>, DAGC5X20R <dbl>,
## # DAGC6X19R <dbl>, DA0GR21N <dbl>, DA0GS21N <dbl>, DDA00A001S22R <dbl>,
## # DDA00A001222R <dbl>, DDA00A001322R <dbl>, DDA00AR01S22R <dbl>,
## # DDA00AR01222R <dbl>, DDA00AR01322R <dbl>, DDA00AM01S22R <dbl>, …
ggplot(data_clean, aes(x = sqrt_experience)) +
geom_histogram(binwidth = 1, fill = "gray70", color = "black") +
theme_minimal() +
labs(title = "Histogram of Square Root Transformed Teacher Experience",
x = "Square Root of Experience",
y = "Frequency")
install.packages("ggpubr")
## Installing package into 'C:/Users/wamel/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'ggpubr' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\wamel\AppData\Local\Temp\RtmpugV2rj\downloaded_packages
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.4.3
hist_original <- ggplot(data_clean, aes(x = DPSTEXPA)) +
geom_histogram(binwidth = 1, fill = "gray70", color = "black") +
theme_minimal() +
labs(title = "Original Teacher Experience Histogram",
x = "Years of Experience",
y = "Frequency")
hist_transformed <- ggplot(data_clean, aes(x = sqrt_experience)) +
geom_histogram(binwidth = 1, fill = "gray70", color = "black") +
theme_minimal() +
labs(title = "Transformed (Square Root) Histogram",
x = "Square Root of Experience",
y = "Frequency")
ggarrange(hist_original, hist_transformed, ncol = 2, nrow = 1)
`ggarrange(hist_original, hist_transformed, ncol = 2, nrow = 1){r}
## R Markdown
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.
When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
``` r
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.