title: “Homework 4” author: “Ylonda Ambrose” date: “2026-05-10” output: html_document
library(readxl) library(pastecs) ## ## Attaching package: ‘pastecs’ ## ## The following objects are masked from ‘package:dplyr’: ## ## first, last ## ## The following object is masked from ‘package:tidyr’: ## ## extract library(readxl) library(dplyr) setwd(“C:/Users/miche/OneDrive/Desktop/My Class Stuff/Wednesday Class/Data Diabetes”) Diabetes_Data <- read_excel(“C:/Users/miche/OneDrive/Desktop/My Class Stuff/Wednesday Class/Data Diabetes/Diabetes Data.xlsx”)
Diabetes_Data <- read_excel(“Diabetes Data.xlsx”) Cleaned_Diabetes_Data <- Diabetes_Data %>% select(Diagnosed, SNAP) %>% drop_na() pastecs::stat.desc(Cleaned_Diabetes_Data\(Diagnosed, norm = T) ## nbr.val nbr.null nbr.na min max ## 3.710000e+02 0.000000e+00 0.000000e+00 2.500000e+00 2.950000e+01 ## range sum median mean SE.mean ## 2.700000e+01 6.106900e+03 1.530000e+01 1.646065e+01 2.512205e-01 ## CI.mean.0.95 var std.dev coef.var skewness ## 4.939989e-01 2.341445e+01 4.838848e+00 2.939646e-01 2.808935e-01 ## skew.2SE kurtosis kurt.2SE normtest.W normtest.p ## 1.108840e+00 -5.697677e-01 -1.127562e+00 9.700359e-01 6.371567e-07 summary(Cleaned_Diabetes_Data\)Diagnosed) ## Min. 1st Qu. Median Mean 3rd Qu. Max. ## 2.50 12.70 15.30 16.46 20.15 29.50 Observation: The variable is how many SNAP recipients have been diagnosed with diabetes.The summary shows that mean prevalence is about 16.5%, with values ranging from 2.5% to nearly 30%. Skewness is slightly positive, meaning more census tracts cluster on the lower-to-mid end, with fewer extreme high-prevalence tracts.
hist(Cleaned_Diabetes_Data$SNAP)
UpdatedData<- Cleaned_Diabetes_Data %>% mutate(SNAP_log=log(SNAP)) hist(UpdatedData$SNAP_log)