Load in Data

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
setwd("~/Desktop/Monday Class")
district<-read_excel("district.xls")

#Question 1: variable

district$DA0912DR21R #Dropout Rate
##    [1]  0.0  0.3  0.4  0.0  0.0  0.0  0.0  0.4  0.4  0.7  2.2  0.2  0.7  2.2
##   [15]  0.5  1.1  0.6  0.0  0.0  0.0  0.8  0.4  2.8  2.2  4.1  0.0  0.7  0.0
##   [29]  0.0  0.0  4.4  0.3  2.0  1.6  0.0  1.1   NA  3.5   NA  0.9  2.6 30.8
##   [43]  1.1  0.0  1.0  0.0  0.4  0.0  2.8  0.3  0.1  2.6  0.0 22.1  0.0  0.0
##   [57]  2.1  4.1  0.6   NA 22.6  0.0  2.0  0.0  0.0  0.4  0.4  0.8  4.7  0.0
##   [71]  0.8   NA  5.0   NA   NA   NA   NA   NA  0.3  3.5  0.5  0.0  3.6  5.2
##   [85]  2.2  2.7  3.4  1.1  0.0  0.0  1.4  3.8  2.6  0.0  0.5  0.0  0.7  0.8
##   [99]  0.0  0.5  0.0  0.0  7.4  0.0  0.4  0.0  0.0  1.4  0.5  8.2  3.0  0.0
##  [113]   NA   NA  0.3   NA   NA  0.1  0.2  0.7  2.8  0.2  0.0  0.1   NA   NA
##  [127]   NA  1.3  1.8  0.0  0.9  0.0   NA  0.0  2.2  0.7  0.9  0.0  1.0  0.0
##  [141]  0.0  0.0  0.2  1.2  1.3  1.1  0.2  3.6  0.5  7.2  0.4  0.0  0.2  0.0
##  [155]  0.0  0.9  1.8  2.5  1.4  3.4  3.6  2.8  2.7  2.1  0.1  0.0  0.0  0.0
##  [169]  0.0  0.4  0.0  0.0  2.4  0.0  0.0  0.0  0.9  0.0  0.0  0.0  0.1  0.7
##  [183]  0.0  1.0  0.3  0.0  0.0  4.2  1.4  0.7  0.0  0.0  2.7  0.0  0.0  0.0
##  [197]  0.4  0.0  0.0  0.4   NA  0.8  1.2  0.2  0.8  0.4  0.6  0.1  0.8  0.8
##  [211]  0.1  0.4  0.3  0.2  0.0  0.0  1.0  2.5  0.0  0.0  1.2  0.7  0.5  0.0
##  [225]  0.0  0.0  0.0  0.0  0.1  0.0  0.0  0.3  0.0  0.0   NA   NA 16.8  2.1
##  [239]  0.0  0.0  1.9  0.0  0.9  0.0  0.0  0.0  0.7  1.0  0.0  0.0  0.0  1.1
##  [253] 31.8   NA  2.8  0.3  0.5   NA   NA   NA 40.9   NA  0.0   NA 11.8  0.2
##  [267]  1.3  0.0   NA  8.6  0.0   NA   NA  0.0   NA  1.0  0.0  1.2  0.0  0.1
##  [281]  0.0   NA  1.8  2.8  4.5  0.9  0.4  0.9  2.5  0.0  1.8  0.1  1.2  2.7
##  [295]  0.2  0.2  0.0  0.0  0.5  0.0  0.7   NA  1.6  0.0   NA  0.0  0.0  0.3
##  [309]  1.0  0.8  1.0  0.4  0.4  1.7  0.0  0.9  0.0  1.3  0.1  0.0  0.2  0.0
##  [323]   NA   NA  0.0  0.0  5.4  0.7  0.0   NA  2.0  2.8  0.9  0.0  0.0  1.1
##  [337]  3.4  0.0  0.0  0.5  2.1  0.0  0.0  2.1  0.0  0.9  0.9  1.5  1.0  0.0
##  [351]  0.5  2.3  1.1  1.8  0.0  1.0 15.1  0.4   NA   NA   NA  1.1  3.1  4.4
##  [365]  2.6  1.8  1.6  1.6  2.1  1.7  4.9 13.3  0.0  0.3  0.5  0.0  0.0  0.0
##  [379]   NA  0.0  0.0   NA  1.3  1.5  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.5
##  [393]  0.6  0.4  0.0  0.0  0.0  1.4  0.5  0.6  0.0  0.9  0.2  1.6  1.8  0.2
##  [407]  1.0  0.2  0.0   NA  1.4  0.6  0.0  0.0  0.2  0.0   NA  1.7  3.5  0.0
##  [421]  5.7  0.2  1.2  0.2  0.0  2.7  0.0   NA  0.6  0.0  0.0  0.0  1.6  0.3
##  [435]  0.0  0.0  0.0  2.0   NA  0.0  0.0  1.5  0.3  1.3  0.0  0.0  1.1  1.1
##  [449]  0.2  0.3  0.6  0.9  1.2  3.4  2.4  0.9  0.1  0.2  0.3  0.2  0.0  0.0
##  [463]  1.5  0.0  0.6  1.1  1.5  0.2  0.0  0.0  0.5  0.0  0.8  0.0  0.0  0.0
##  [477]  0.0  0.0  0.0  0.0  3.1  0.0  0.6  0.2  0.8  0.6  1.1   NA  1.9  1.7
##  [491]  0.7   NA 50.5   NA   NA   NA  7.0  1.4  0.0  6.2   NA  0.0  1.2  0.4
##  [505]   NA   NA   NA   NA   NA  0.3   NA   NA  0.1   NA  7.4  0.0   NA   NA
##  [519]  0.0  0.0   NA   NA   NA   NA  5.2  3.0  2.1  1.4  1.5  2.1  2.4  1.5
##  [533]  4.0  1.1  0.9  1.9  1.0  2.7  3.9  3.0  0.5  2.7  0.4   NA  4.3  0.3
##  [547]  3.9  0.0  0.0  0.0  0.0  0.6  0.0  0.0 16.7   NA  1.8  4.2  0.8  0.6
##  [561]  2.1  0.0  1.1  0.0  0.0  0.0  0.0  0.0   NA  0.0   NA  0.5  0.6  0.0
##  [575]   NA  1.1  5.5  2.6  2.1  0.1  2.0  1.5  0.9  4.8  0.2  5.5  3.1  3.5
##  [589]  1.8  1.7  0.0  0.0  0.0  0.2  1.6  3.9   NA  0.0  1.1  0.0  0.0  0.0
##  [603]  0.0  0.6  0.0  0.0  0.0  0.0 -1.0  1.2  0.7  0.0  3.6  2.1  0.0  0.0
##  [617]  0.0  0.0  0.0  0.3  1.6  0.0  2.1  0.0  1.4  0.0  0.9  0.0  4.4 11.8
##  [631]  0.0  0.0  0.6  1.5  0.6  1.1  0.0  0.0  0.0  0.6  0.6  2.2  0.5  0.0
##  [645]  0.0  0.0  0.0  0.0  0.4  0.9  0.3  3.5  0.7  0.7  0.2  0.0  0.0  0.0
##  [659]  8.4   NA  0.0  1.3  3.9  1.2  6.5  0.0  1.5  0.9  3.2  0.6  0.3  0.9
##  [673]   NA  0.3  1.6  1.3  0.0  0.3  0.6  0.0  1.6  0.7  1.3  1.5  0.0  0.0
##  [687]  0.5 15.7  2.3  1.4  0.0  0.3  0.9  0.2  1.6  0.5  2.3  1.6 18.8  0.2
##  [701]  0.0   NA  0.0  0.0   NA  0.1  0.0   NA  0.0  0.0  0.6  8.5   NA  0.0
##  [715]  0.0  0.0  0.0  0.0  0.3  0.1  1.9  2.3  0.0  0.5  0.0  0.0  0.0  0.1
##  [729]  0.0  2.8  0.8  0.0  0.0   NA   NA   NA  1.2  1.5  4.8  0.0  0.0  0.0
##  [743]  0.0  1.7  0.3  1.3   NA  0.0  0.0  2.4  1.1  0.0  0.2  3.5  1.1  0.0
##  [757]  0.0  0.6  0.0  0.0   NA   NA  1.4   NA  3.1  0.0  0.5  0.5  0.3  0.6
##  [771]  0.0  0.0  2.4  0.6  0.0  0.0  0.0  1.1  0.3  0.0  0.0  0.0  0.8  2.1
##  [785]   NA  1.3  0.0  1.5  0.0  0.0  0.0   NA  0.0  0.5  0.0  0.2  1.2  0.0
##  [799]  1.3  0.7  0.0  0.8  4.6  0.5  0.5  1.3  0.0  2.9  0.0  0.0   NA  1.6
##  [813]  0.0  0.5  0.0  1.5  1.3  0.1  1.1   NA  3.1  0.0  0.6   NA  0.6  0.0
##  [827]  0.0  0.0  0.6  0.0  0.0  0.8  2.3  0.0  0.6  3.2  0.0   NA  0.0  0.0
##  [841]  1.1   NA  0.5  0.5  1.0  1.3  1.3  0.9  0.0  0.0  0.0  0.8  0.0   NA
##  [855]  0.9  0.0  0.0  1.3  0.4  0.0  0.0   NA  0.0  1.0  5.5  0.0  0.8  0.0
##  [869]  1.2  0.7  5.4  0.0  1.8  0.6  2.2  0.0  0.0   NA   NA   NA  0.0  0.7
##  [883]  0.8  1.9   NA  0.0  0.0  0.0  0.1  1.4  3.1  2.3  0.4  0.0  0.0  0.0
##  [897]  0.0  0.6  0.2  4.1  2.2  0.8  0.0  0.0  1.2  0.0  0.0   NA  0.0  1.0
##  [911]  1.8  0.0  0.0  0.0  2.0  0.7  0.3  0.3  0.2  0.0   NA  0.0  0.0  1.0
##  [925]  2.0  0.0  2.1  1.3  0.0  0.0  0.8  0.0  1.9  0.0  1.7  1.1  0.0  0.2
##  [939]  0.0  1.7  0.2  0.6  2.6  9.6  2.2  0.9  0.0  1.5  0.7  2.0  0.0  0.0
##  [953]  0.0  0.0  0.0  0.7  3.8  0.3  2.4  0.0  0.2  0.6  0.7  0.0  0.0  0.0
##  [967]  0.8  0.0  0.0  2.4  0.0  0.0  0.5  0.0  1.4  0.0  4.4  0.0  2.8  1.0
##  [981]  0.7  2.0  0.0  1.5  0.0  0.6  4.5  0.5  0.0  2.0  0.0  0.0  0.1  0.0
##  [995]  0.0  0.0  0.0  0.5  0.0  0.0  0.0   NA   NA  0.0  1.3  0.0  0.0  0.1
## [1009]  0.5  0.6  0.2  1.0  0.3  0.6  1.4  0.0  0.5  0.0  2.9  0.6  0.0  1.4
## [1023]  0.0  0.0  1.3  2.2  0.0   NA  0.5  0.9   NA   NA   NA  0.3  0.0  3.9
## [1037]  2.0  2.3  4.3  0.4  1.4  1.2  3.6  2.4  0.4  0.3  0.5  1.7  1.1  0.0
## [1051]  3.2  1.2  1.3  0.3  0.0  0.0  1.7  0.0  0.4  0.0  0.0  0.0  0.0  0.1
## [1065]  0.0  0.0  0.7  0.0  1.7  0.0  0.3  0.0  1.4  5.1  0.7  1.9  0.0  5.6
## [1079]  1.7  1.2  0.0   NA  1.3   NA  0.4  1.5   NA 41.8  0.0  1.2  1.3  2.6
## [1093]  0.4  0.9  0.7  0.6  1.6  1.0  0.0  0.0  0.7  2.0  0.5  5.1  0.0  0.0
## [1107]  3.0  1.0  0.0  0.0  0.6  0.8  1.3  0.0  1.0  3.4  3.5  0.0  2.5  0.0
## [1121] 10.2  0.0  0.0  0.0  0.0  0.1  1.6  0.0  4.2  0.4   NA  4.2   NA  1.3
## [1135]  2.5  3.3  1.6  0.3  1.3  0.0  1.8  0.0  0.0  3.1  1.2  2.4  0.3  0.0
## [1149]  1.2  1.3  0.0  0.0  0.7  0.0  0.0  1.4  7.8  0.5  0.2  0.6  0.0  0.8
## [1163]  1.4  0.0  0.8  0.7  1.3  0.0   NA  1.9  0.9  0.0  1.1  1.8  1.4  0.8
## [1177]  1.3  0.0  0.2   NA  2.5  0.4  0.4  0.4  0.0  0.0  0.0  0.7  3.1  1.2
## [1191]  0.8  0.0  0.0  0.5  1.4  0.0  2.3  0.8  0.0  1.4  0.0  0.4  0.0  0.5
## [1205]  1.7  2.0  2.0
#Question 2: describe variable
pastecs::stat.desc(district$DA0912DR21R)
##      nbr.val     nbr.null       nbr.na          min          max        range 
## 1095.0000000  421.0000000  112.0000000   -1.0000000   50.5000000   51.5000000 
##          sum       median         mean      SE.mean CI.mean.0.95          var 
## 1361.3000000    0.4000000    1.2431963    0.1003733    0.1969460   11.0319075 
##      std.dev     coef.var 
##    3.3214315    2.6716870
#Question 3: remove nas
summary(district$DA0912DR21R)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  -1.000   0.000   0.400   1.243   1.400  50.500     112
district <- district %>% 
  filter(!is.na(DA0912DR21R))

summary(district$DA0912DR21R) #double check NAs removed.
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -1.000   0.000   0.400   1.243   1.400  50.500
#Question 4: graphs
hist(district$DA0912DR21R)

#Question 5: Logs
district <- district%>%
mutate(DA0912DR21Rlog = log(DA0912DR21R))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `DA0912DR21Rlog = log(DA0912DR21R)`.
## Caused by warning in `log()`:
## ! NaNs produced
#Question 6: log histogram

hist(log(district$DA0912DR21R),breaks=10,probability = T)
## Warning in log(district$DA0912DR21R): NaNs produced

HOMEWORK

  1. From the data you have chosen, select a variable that you are interested in The variable is DA0912DR21R Dropout Rate.

  2. Use pastecs::stat.desc to describe the variable. Include a few sentences about what the variable is and what it’s measuring.

Dropout Rate is the percentage of students that dropout. It is measuring the percentage of students that dropout annually from school districts in Texas.

  1. Remove NA’s if needed using dplyr:filter (or anything similar)

district %>% filter(!is.na(DA0912DR21R))

  1. Provide a histogram of the variable (as shown in this lesson)

  2. transform the variable using the log transformation or square root transformation (whatever is more appropriate) using dplyr::mutate or something similar

  3. provide a histogram of the transformed variable

  4. submit via rpubs on CANVAS