#Loaded the data

nyses<- read.csv('NYSES data for class.csv')

#removed the continuous variables and ran the descr stats on remaining categorical variables

subdf_ca <- nyses[,-c(1, 9:10)]

#The frequency for each of the categorical variables.

apply(subdf_ca,2,table)
## $cd
## 
## 101 102 103 104 105 106 107 108 109 110 111 112 201 202 203 204 205 206 
##  41  22  30  64  55  30  64  53  84  56  48  61  66  69  83  50  84  75 
## 207 208 209 210 211 212 213 214 215 216 217 218 301 302 303 304 305 306 
##  57  52  49  50  64  90  43  95  67  49  64  91  19  58  73  62  27  77 
## 307 308 309 310 311 312 401 402 403 404 405 406 407 408 409 410 411 412 
## 131 144  53  56  52  94 113  44  74  61  82  73 106  77  78  66  57 136 
## 413 414 501 502 503 
## 100  67  74  67  73 
## 
## $agecat
## 
##   1   2   3   4   5   6 
## 350 685 815 808 612 690 
## 
## $racecat
## 
##    1    2    3    4    5 
## 1616 1055  164  958   95 
## 
## $edcat
## 
##   1   2   3   4   5 
## 508 923 879 883 730 
## 
## $inc3cat
## 
##    1    2    3 
## 1605 1093  722 
## 
## $binge
## 
##    0    1 
## 3562  438 
## 
## $boro
## 
##    1    2    3    4    5 
##  608 1198  846 1134  214

#Of the respondents, 438 reported binging compared to 3,562 who reported not binge drinking. The majority of respondents were from Brooklyn and the least reported borough was State Island. The largest age group that responded were 35-44 year olds.

#removed the categorical variables

subdf_c <- nyses[,c(9:10)]

#this is the summary data for continuous variables

apply(subdf_c, 2, summary)
##           medinc  pbelowpv
## Min.    16000.00  4.900159
## 1st Qu. 28780.00 11.404857
## Median  38965.00 19.114095
## Mean    40379.44 20.756889
## 3rd Qu. 48085.00 29.077967
## Max.    79475.00 45.665443

#The mean median income is 40,379 and the mean neighborhood percentage below poverty was 21%. The min for neighborhood median income is 16k and the max is 79k. The neighborhood percentage below poverty min is 4.9% and the max is 45.7%.

#Standard deviation of median income

income = nyses$medinc
sd(income)
## [1] 15000.96
boxplot(income)

#The median income appears to be normally distributed, with the exception of one outlier around the $79k mark.

#Standard deviation of percentage of neighborhood below poverty

bp = nyses$pbelowpv
sd(bp)
## [1] 10.71602
boxplot(bp)

#The percent of neighborhood belown poverty also appears to be normally distributed.