#Loaded the data
nyses<- read.csv('NYSES data for class.csv')
#removed the continuous variables and ran the descr stats on remaining categorical variables
subdf_ca <- nyses[,-c(1, 9:10)]
#The frequency for each of the categorical variables.
apply(subdf_ca,2,table)
## $cd
##
## 101 102 103 104 105 106 107 108 109 110 111 112 201 202 203 204 205 206
## 41 22 30 64 55 30 64 53 84 56 48 61 66 69 83 50 84 75
## 207 208 209 210 211 212 213 214 215 216 217 218 301 302 303 304 305 306
## 57 52 49 50 64 90 43 95 67 49 64 91 19 58 73 62 27 77
## 307 308 309 310 311 312 401 402 403 404 405 406 407 408 409 410 411 412
## 131 144 53 56 52 94 113 44 74 61 82 73 106 77 78 66 57 136
## 413 414 501 502 503
## 100 67 74 67 73
##
## $agecat
##
## 1 2 3 4 5 6
## 350 685 815 808 612 690
##
## $racecat
##
## 1 2 3 4 5
## 1616 1055 164 958 95
##
## $edcat
##
## 1 2 3 4 5
## 508 923 879 883 730
##
## $inc3cat
##
## 1 2 3
## 1605 1093 722
##
## $binge
##
## 0 1
## 3562 438
##
## $boro
##
## 1 2 3 4 5
## 608 1198 846 1134 214
#Of the respondents, 438 reported binging compared to 3,562 who reported not binge drinking. The majority of respondents were from Brooklyn and the least reported borough was State Island. The largest age group that responded were 35-44 year olds.
#removed the categorical variables
subdf_c <- nyses[,c(9:10)]
#this is the summary data for continuous variables
apply(subdf_c, 2, summary)
## medinc pbelowpv
## Min. 16000.00 4.900159
## 1st Qu. 28780.00 11.404857
## Median 38965.00 19.114095
## Mean 40379.44 20.756889
## 3rd Qu. 48085.00 29.077967
## Max. 79475.00 45.665443
#The mean median income is 40,379 and the mean neighborhood percentage below poverty was 21%. The min for neighborhood median income is 16k and the max is 79k. The neighborhood percentage below poverty min is 4.9% and the max is 45.7%.
#Standard deviation of median income
income = nyses$medinc
sd(income)
## [1] 15000.96
boxplot(income)
#The median income appears to be normally distributed, with the exception of one outlier around the $79k mark.
#Standard deviation of percentage of neighborhood below poverty
bp = nyses$pbelowpv
sd(bp)
## [1] 10.71602
boxplot(bp)
#The percent of neighborhood belown poverty also appears to be normally distributed.