library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
setwd("~/Desktop/Monday Class")
district<-read_excel("district.xls")
#Question 1: variable
district$DA0912DR21R #Dropout Rate
## [1] 0.0 0.3 0.4 0.0 0.0 0.0 0.0 0.4 0.4 0.7 2.2 0.2 0.7 2.2
## [15] 0.5 1.1 0.6 0.0 0.0 0.0 0.8 0.4 2.8 2.2 4.1 0.0 0.7 0.0
## [29] 0.0 0.0 4.4 0.3 2.0 1.6 0.0 1.1 NA 3.5 NA 0.9 2.6 30.8
## [43] 1.1 0.0 1.0 0.0 0.4 0.0 2.8 0.3 0.1 2.6 0.0 22.1 0.0 0.0
## [57] 2.1 4.1 0.6 NA 22.6 0.0 2.0 0.0 0.0 0.4 0.4 0.8 4.7 0.0
## [71] 0.8 NA 5.0 NA NA NA NA NA 0.3 3.5 0.5 0.0 3.6 5.2
## [85] 2.2 2.7 3.4 1.1 0.0 0.0 1.4 3.8 2.6 0.0 0.5 0.0 0.7 0.8
## [99] 0.0 0.5 0.0 0.0 7.4 0.0 0.4 0.0 0.0 1.4 0.5 8.2 3.0 0.0
## [113] NA NA 0.3 NA NA 0.1 0.2 0.7 2.8 0.2 0.0 0.1 NA NA
## [127] NA 1.3 1.8 0.0 0.9 0.0 NA 0.0 2.2 0.7 0.9 0.0 1.0 0.0
## [141] 0.0 0.0 0.2 1.2 1.3 1.1 0.2 3.6 0.5 7.2 0.4 0.0 0.2 0.0
## [155] 0.0 0.9 1.8 2.5 1.4 3.4 3.6 2.8 2.7 2.1 0.1 0.0 0.0 0.0
## [169] 0.0 0.4 0.0 0.0 2.4 0.0 0.0 0.0 0.9 0.0 0.0 0.0 0.1 0.7
## [183] 0.0 1.0 0.3 0.0 0.0 4.2 1.4 0.7 0.0 0.0 2.7 0.0 0.0 0.0
## [197] 0.4 0.0 0.0 0.4 NA 0.8 1.2 0.2 0.8 0.4 0.6 0.1 0.8 0.8
## [211] 0.1 0.4 0.3 0.2 0.0 0.0 1.0 2.5 0.0 0.0 1.2 0.7 0.5 0.0
## [225] 0.0 0.0 0.0 0.0 0.1 0.0 0.0 0.3 0.0 0.0 NA NA 16.8 2.1
## [239] 0.0 0.0 1.9 0.0 0.9 0.0 0.0 0.0 0.7 1.0 0.0 0.0 0.0 1.1
## [253] 31.8 NA 2.8 0.3 0.5 NA NA NA 40.9 NA 0.0 NA 11.8 0.2
## [267] 1.3 0.0 NA 8.6 0.0 NA NA 0.0 NA 1.0 0.0 1.2 0.0 0.1
## [281] 0.0 NA 1.8 2.8 4.5 0.9 0.4 0.9 2.5 0.0 1.8 0.1 1.2 2.7
## [295] 0.2 0.2 0.0 0.0 0.5 0.0 0.7 NA 1.6 0.0 NA 0.0 0.0 0.3
## [309] 1.0 0.8 1.0 0.4 0.4 1.7 0.0 0.9 0.0 1.3 0.1 0.0 0.2 0.0
## [323] NA NA 0.0 0.0 5.4 0.7 0.0 NA 2.0 2.8 0.9 0.0 0.0 1.1
## [337] 3.4 0.0 0.0 0.5 2.1 0.0 0.0 2.1 0.0 0.9 0.9 1.5 1.0 0.0
## [351] 0.5 2.3 1.1 1.8 0.0 1.0 15.1 0.4 NA NA NA 1.1 3.1 4.4
## [365] 2.6 1.8 1.6 1.6 2.1 1.7 4.9 13.3 0.0 0.3 0.5 0.0 0.0 0.0
## [379] NA 0.0 0.0 NA 1.3 1.5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.5
## [393] 0.6 0.4 0.0 0.0 0.0 1.4 0.5 0.6 0.0 0.9 0.2 1.6 1.8 0.2
## [407] 1.0 0.2 0.0 NA 1.4 0.6 0.0 0.0 0.2 0.0 NA 1.7 3.5 0.0
## [421] 5.7 0.2 1.2 0.2 0.0 2.7 0.0 NA 0.6 0.0 0.0 0.0 1.6 0.3
## [435] 0.0 0.0 0.0 2.0 NA 0.0 0.0 1.5 0.3 1.3 0.0 0.0 1.1 1.1
## [449] 0.2 0.3 0.6 0.9 1.2 3.4 2.4 0.9 0.1 0.2 0.3 0.2 0.0 0.0
## [463] 1.5 0.0 0.6 1.1 1.5 0.2 0.0 0.0 0.5 0.0 0.8 0.0 0.0 0.0
## [477] 0.0 0.0 0.0 0.0 3.1 0.0 0.6 0.2 0.8 0.6 1.1 NA 1.9 1.7
## [491] 0.7 NA 50.5 NA NA NA 7.0 1.4 0.0 6.2 NA 0.0 1.2 0.4
## [505] NA NA NA NA NA 0.3 NA NA 0.1 NA 7.4 0.0 NA NA
## [519] 0.0 0.0 NA NA NA NA 5.2 3.0 2.1 1.4 1.5 2.1 2.4 1.5
## [533] 4.0 1.1 0.9 1.9 1.0 2.7 3.9 3.0 0.5 2.7 0.4 NA 4.3 0.3
## [547] 3.9 0.0 0.0 0.0 0.0 0.6 0.0 0.0 16.7 NA 1.8 4.2 0.8 0.6
## [561] 2.1 0.0 1.1 0.0 0.0 0.0 0.0 0.0 NA 0.0 NA 0.5 0.6 0.0
## [575] NA 1.1 5.5 2.6 2.1 0.1 2.0 1.5 0.9 4.8 0.2 5.5 3.1 3.5
## [589] 1.8 1.7 0.0 0.0 0.0 0.2 1.6 3.9 NA 0.0 1.1 0.0 0.0 0.0
## [603] 0.0 0.6 0.0 0.0 0.0 0.0 -1.0 1.2 0.7 0.0 3.6 2.1 0.0 0.0
## [617] 0.0 0.0 0.0 0.3 1.6 0.0 2.1 0.0 1.4 0.0 0.9 0.0 4.4 11.8
## [631] 0.0 0.0 0.6 1.5 0.6 1.1 0.0 0.0 0.0 0.6 0.6 2.2 0.5 0.0
## [645] 0.0 0.0 0.0 0.0 0.4 0.9 0.3 3.5 0.7 0.7 0.2 0.0 0.0 0.0
## [659] 8.4 NA 0.0 1.3 3.9 1.2 6.5 0.0 1.5 0.9 3.2 0.6 0.3 0.9
## [673] NA 0.3 1.6 1.3 0.0 0.3 0.6 0.0 1.6 0.7 1.3 1.5 0.0 0.0
## [687] 0.5 15.7 2.3 1.4 0.0 0.3 0.9 0.2 1.6 0.5 2.3 1.6 18.8 0.2
## [701] 0.0 NA 0.0 0.0 NA 0.1 0.0 NA 0.0 0.0 0.6 8.5 NA 0.0
## [715] 0.0 0.0 0.0 0.0 0.3 0.1 1.9 2.3 0.0 0.5 0.0 0.0 0.0 0.1
## [729] 0.0 2.8 0.8 0.0 0.0 NA NA NA 1.2 1.5 4.8 0.0 0.0 0.0
## [743] 0.0 1.7 0.3 1.3 NA 0.0 0.0 2.4 1.1 0.0 0.2 3.5 1.1 0.0
## [757] 0.0 0.6 0.0 0.0 NA NA 1.4 NA 3.1 0.0 0.5 0.5 0.3 0.6
## [771] 0.0 0.0 2.4 0.6 0.0 0.0 0.0 1.1 0.3 0.0 0.0 0.0 0.8 2.1
## [785] NA 1.3 0.0 1.5 0.0 0.0 0.0 NA 0.0 0.5 0.0 0.2 1.2 0.0
## [799] 1.3 0.7 0.0 0.8 4.6 0.5 0.5 1.3 0.0 2.9 0.0 0.0 NA 1.6
## [813] 0.0 0.5 0.0 1.5 1.3 0.1 1.1 NA 3.1 0.0 0.6 NA 0.6 0.0
## [827] 0.0 0.0 0.6 0.0 0.0 0.8 2.3 0.0 0.6 3.2 0.0 NA 0.0 0.0
## [841] 1.1 NA 0.5 0.5 1.0 1.3 1.3 0.9 0.0 0.0 0.0 0.8 0.0 NA
## [855] 0.9 0.0 0.0 1.3 0.4 0.0 0.0 NA 0.0 1.0 5.5 0.0 0.8 0.0
## [869] 1.2 0.7 5.4 0.0 1.8 0.6 2.2 0.0 0.0 NA NA NA 0.0 0.7
## [883] 0.8 1.9 NA 0.0 0.0 0.0 0.1 1.4 3.1 2.3 0.4 0.0 0.0 0.0
## [897] 0.0 0.6 0.2 4.1 2.2 0.8 0.0 0.0 1.2 0.0 0.0 NA 0.0 1.0
## [911] 1.8 0.0 0.0 0.0 2.0 0.7 0.3 0.3 0.2 0.0 NA 0.0 0.0 1.0
## [925] 2.0 0.0 2.1 1.3 0.0 0.0 0.8 0.0 1.9 0.0 1.7 1.1 0.0 0.2
## [939] 0.0 1.7 0.2 0.6 2.6 9.6 2.2 0.9 0.0 1.5 0.7 2.0 0.0 0.0
## [953] 0.0 0.0 0.0 0.7 3.8 0.3 2.4 0.0 0.2 0.6 0.7 0.0 0.0 0.0
## [967] 0.8 0.0 0.0 2.4 0.0 0.0 0.5 0.0 1.4 0.0 4.4 0.0 2.8 1.0
## [981] 0.7 2.0 0.0 1.5 0.0 0.6 4.5 0.5 0.0 2.0 0.0 0.0 0.1 0.0
## [995] 0.0 0.0 0.0 0.5 0.0 0.0 0.0 NA NA 0.0 1.3 0.0 0.0 0.1
## [1009] 0.5 0.6 0.2 1.0 0.3 0.6 1.4 0.0 0.5 0.0 2.9 0.6 0.0 1.4
## [1023] 0.0 0.0 1.3 2.2 0.0 NA 0.5 0.9 NA NA NA 0.3 0.0 3.9
## [1037] 2.0 2.3 4.3 0.4 1.4 1.2 3.6 2.4 0.4 0.3 0.5 1.7 1.1 0.0
## [1051] 3.2 1.2 1.3 0.3 0.0 0.0 1.7 0.0 0.4 0.0 0.0 0.0 0.0 0.1
## [1065] 0.0 0.0 0.7 0.0 1.7 0.0 0.3 0.0 1.4 5.1 0.7 1.9 0.0 5.6
## [1079] 1.7 1.2 0.0 NA 1.3 NA 0.4 1.5 NA 41.8 0.0 1.2 1.3 2.6
## [1093] 0.4 0.9 0.7 0.6 1.6 1.0 0.0 0.0 0.7 2.0 0.5 5.1 0.0 0.0
## [1107] 3.0 1.0 0.0 0.0 0.6 0.8 1.3 0.0 1.0 3.4 3.5 0.0 2.5 0.0
## [1121] 10.2 0.0 0.0 0.0 0.0 0.1 1.6 0.0 4.2 0.4 NA 4.2 NA 1.3
## [1135] 2.5 3.3 1.6 0.3 1.3 0.0 1.8 0.0 0.0 3.1 1.2 2.4 0.3 0.0
## [1149] 1.2 1.3 0.0 0.0 0.7 0.0 0.0 1.4 7.8 0.5 0.2 0.6 0.0 0.8
## [1163] 1.4 0.0 0.8 0.7 1.3 0.0 NA 1.9 0.9 0.0 1.1 1.8 1.4 0.8
## [1177] 1.3 0.0 0.2 NA 2.5 0.4 0.4 0.4 0.0 0.0 0.0 0.7 3.1 1.2
## [1191] 0.8 0.0 0.0 0.5 1.4 0.0 2.3 0.8 0.0 1.4 0.0 0.4 0.0 0.5
## [1205] 1.7 2.0 2.0
#Question 2: describe variable
pastecs::stat.desc(district$DA0912DR21R)
## nbr.val nbr.null nbr.na min max range
## 1095.0000000 421.0000000 112.0000000 -1.0000000 50.5000000 51.5000000
## sum median mean SE.mean CI.mean.0.95 var
## 1361.3000000 0.4000000 1.2431963 0.1003733 0.1969460 11.0319075
## std.dev coef.var
## 3.3214315 2.6716870
#Question 3: remove nas
summary(district$DA0912DR21R)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -1.000 0.000 0.400 1.243 1.400 50.500 112
district <- district %>%
filter(!is.na(DA0912DR21R))
summary(district$DA0912DR21R) #double check NAs removed.
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.000 0.000 0.400 1.243 1.400 50.500
#Question 4: graphs
hist(district$DA0912DR21R)
#Question 5: Logs
district <- district%>%
mutate(DA0912DR21Rlog = log(DA0912DR21R))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `DA0912DR21Rlog = log(DA0912DR21R)`.
## Caused by warning in `log()`:
## ! NaNs produced
#Question 6: log histogram
hist(log(district$DA0912DR21R),breaks=10,probability = T)
## Warning in log(district$DA0912DR21R): NaNs produced
From the data you have chosen, select a variable that you are interested in The variable is DA0912DR21R Dropout Rate.
Use pastecs::stat.desc to describe the variable. Include a few sentences about what the variable is and what it’s measuring.
Dropout Rate is the percentage of students that dropout. It is measuring the percentage of students that dropout annually from school districts in Texas.
district %>% filter(!is.na(DA0912DR21R))
Provide a histogram of the variable (as shown in this lesson)
transform the variable using the log transformation or square root transformation (whatever is more appropriate) using dplyr::mutate or something similar
provide a histogram of the transformed variable
submit via rpubs on CANVAS