library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
First, set up your working directory where you have your data file and working R file.
setwd("/Users/se776257/OneDrive - University of Central Florida/Desktop/Prof. An/02 Teaching/2024 Spring/PAD 7754 Quantitative Methods/Class Materials/R data/")
If you want to know what is your current working directory
getwd()
## [1] "C:/Users/se776257/OneDrive - University of Central Florida/Desktop/Prof. An/02 Teaching/2024 Spring/PAD 7754 Quantitative Methods/R script"
Import data from your working folder. Make sure that you have the
dataset in your working directory. I will import 2022 Housing Inventory
Count (HIC) data.
* You may need to install packages to import specific types of files.
For instance, to import a xlsx file, you need to import “readxl” package
and use “read_excel(”filename.xlsx”)
db <- read.csv("2022_HIC.csv", stringsAsFactors = F)
To describe a dataset and get information on the number of cases and variables
str(db)
## 'data.frame': 29496 obs. of 103 variables:
## $ Row.. : int 539061 539070 539085 539119 539111 567738 545466 545469 539592 539593 ...
## $ CocState : chr "OH" "OH" "OH" "OH" ...
## $ CoC : chr "Akron, Barberton/Summit County CoC" "Akron, Barberton/Summit County CoC" "Akron, Barberton/Summit County CoC" "Akron, Barberton/Summit County CoC" ...
## $ Coc.ID : int 1350 1350 1350 1350 1350 1350 1380 1380 1380 1380 ...
## $ HudNum : chr "OH-506" "OH-506" "OH-506" "OH-506" ...
## $ Status : chr "Submitted" "Submitted" "Submitted" "Submitted" ...
## $ year : int 2022 2022 2022 2022 2022 2022 2022 2022 2022 2022 ...
## $ Organization.ID : int 495 495 495 45277 6182 6182 45629 45629 45477 45477 ...
## $ Organization.Name : chr "Battered Women's Shelter" "Battered Women's Shelter" "Battered Women's Shelter" "CoC Direct Services" ...
## $ HMIS.Org.ID : chr "194" "194" "194" "222" ...
## $ useHmisDb : chr "Yes" "Yes" "Yes" "" ...
## $ Project.ID : int 3313 11648 26680 150406 138239 157898 153524 153526 151415 151416 ...
## $ Project.Name : chr "Crisis Center" "Step II" "Step III" "CoC ESG COVID Hotel Assistance" ...
## $ HMIS.Project.ID : chr "195" "196" "197" "223" ...
## $ HIC.Date : chr "1/25/2022 12:00:00 AM" "1/25/2022 12:00:00 AM" "1/25/2022 12:00:00 AM" "1/25/2022 12:00:00 AM" ...
## $ Project.Type : chr "ES" "TH" "RRH" "ES" ...
## $ Bed.Type : chr "F" "" "" "V" ...
## $ Geo.Code : int 390042 390042 390042 390042 399153 399153 19109 19109 19105 19105 ...
## $ HMIS.Participating : int 0 0 0 1 1 1 1 1 1 1 ...
## $ Inventory.Type : chr "C" "C" "C" "C" ...
## $ beginsOperationsWithinYear : int NA NA NA NA NA NA NA NA NA NA ...
## $ Target.Population : chr "DV" "DV" "DV" NA ...
## $ mcKinneyVentoEsg : int 0 0 0 0 0 0 0 0 0 0 ...
## $ mcKinneyVentoEsgEs : int NA NA NA NA NA NA NA NA NA NA ...
## $ mcKinneyVentoEsgRrh : int NA NA NA NA NA NA NA NA NA NA ...
## $ mcKinneyVentoEsgCov : int 1 0 0 1 0 1 1 1 1 1 ...
## $ mcKinneyVentoEsgEsCov : int 1 NA NA 1 NA 0 1 0 1 0 ...
## $ mcKinneyVentoEsgRrhCov : int 0 NA NA 0 NA 1 0 1 0 1 ...
## $ mcKinneyVentoCoc : int 0 0 1 0 1 0 0 0 0 0 ...
## $ mcKinneyVentoCocSh : int NA NA 0 NA 0 NA NA NA NA NA ...
## $ mcKinneyVentoCocTh : int NA NA 0 NA 0 NA NA NA NA NA ...
## $ mcKinneyVentoCocPsh : int NA NA 0 NA 0 NA NA NA NA NA ...
## $ mcKinneyVentoCocRrh : int NA NA 1 NA 1 NA NA NA NA NA ...
## $ mcKinneyVentoCocSro : int NA NA 0 NA 0 NA NA NA NA NA ...
## $ mcKinneyVentoCocThRrh : int NA NA 0 NA 0 NA NA NA NA NA ...
## $ mcKinneyVentoSpC : int 0 0 0 0 0 0 0 0 0 0 ...
## $ mcKinneyVentoS8 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ mcKinneyVentoShp : int 0 0 0 0 0 0 0 0 0 0 ...
## $ mcKinneyVentoYhdp : int 0 0 0 0 0 0 0 0 0 0 ...
## $ mcKinneyVentoYhdpRenewals : int 0 0 0 0 0 0 0 0 0 0 ...
## $ federalFundingVash : int 0 0 0 0 0 0 0 0 0 0 ...
## $ federalFundingSsvf : int 0 0 0 0 0 0 0 0 0 0 ...
## $ federalFundingGpd : int 0 0 0 0 0 0 0 0 0 0 ...
## $ federalFundingGpdBh : int NA NA NA NA NA NA NA NA NA NA ...
## $ federalFundingGpdLd : int NA NA NA NA NA NA NA NA NA NA ...
## $ federalFundingGpdHh : int NA NA NA NA NA NA NA NA NA NA ...
## $ federalFundingGpdCt : int NA NA NA NA NA NA NA NA NA NA ...
## $ federalFundingGpdSith : int NA NA NA NA NA NA NA NA NA NA ...
## $ federalFundingGpdTp : int NA NA NA NA NA NA NA NA NA NA ...
## $ federalFundingHchv : int 0 0 0 0 0 0 0 0 0 0 ...
## $ federalFundingHchvCrs : int NA NA NA NA NA NA NA NA NA NA ...
## $ federalFundingHchvSh : int NA NA NA NA NA NA NA NA NA NA ...
## $ federalFundingBcp : int 0 0 0 0 0 0 0 0 0 0 ...
## $ federalFundingTlp : int 0 0 0 0 0 0 0 0 0 0 ...
## $ federalFundingMgh : int 0 0 0 0 0 0 0 0 0 0 ...
## $ federalFundingRhyDp : int 0 0 0 0 0 0 0 0 0 0 ...
## $ federalFundingHopwa : int 0 0 0 0 0 0 0 0 0 0 ...
## $ federalFundingHopwaHmv : int NA NA NA NA NA NA NA NA NA NA ...
## $ federalFundingHopwaPh : int NA NA NA NA NA NA NA NA NA NA ...
## $ federalFundingHopwaStsf : int NA NA NA NA NA NA NA NA NA NA ...
## $ federalFundingHopwaTh : int NA NA NA NA NA NA NA NA NA NA ...
## $ federalFundingHopwaCovid : int NA NA NA NA NA NA NA NA NA NA ...
## $ federalFundingPih : int 0 0 0 0 0 0 0 0 0 0 ...
## $ federalFundingHome : int 0 0 0 0 0 0 0 0 0 0 ...
## $ federalFundingHomeArp : int 0 0 0 0 0 0 0 0 0 0 ...
## $ federalFundingIndianEhv : int 0 0 0 0 0 0 0 0 0 0 ...
## $ federalFundingOther : int 0 0 0 0 0 0 0 0 0 0 ...
## $ federalFundingOtherSpecify : chr "" "" "" "" ...
## $ housingType : chr "Site-based – single site" "Site-based – single site" "Tenant-based – scattered site" "Tenant-based – scattered site" ...
## $ Victim.Service.Provider : int 1 1 1 0 0 0 0 0 0 0 ...
## $ address1 : chr "" "" "" "" ...
## $ address2 : chr "" "" "" "" ...
## $ city : chr "" "" "" "" ...
## $ state : chr "" "" "" "" ...
## $ zip : int NA NA NA 44311 44278 44278 36079 36079 36786 36786 ...
## $ Beds.HH.w..Children : int 45 40 59 NA 3 6 19 8 11 0 ...
## $ Units.HH.w..Children : int 12 11 19 NA 1 2 5 2 3 0 ...
## $ Veteran.Beds.HH.w..Children : int 0 0 0 NA 0 0 0 0 0 NA ...
## $ Youth.Beds.HH.w..Children : int 0 0 0 NA 3 6 0 0 0 NA ...
## $ CH.Beds.HH.w..Children : int NA NA NA NA NA NA NA NA NA NA ...
## $ Beds.HH.w.o.Children : int 32 28 NA 6 6 4 33 10 2 1 ...
## $ Veteran.Beds.HH.w.o.Children : int 0 0 NA 0 0 0 0 0 0 0 ...
## $ Youth.Beds.HH.w.o.Children : int 0 0 NA 0 6 4 0 0 0 0 ...
## $ CH.Beds.HH.w.o.Children : int NA NA NA NA NA NA NA NA NA NA ...
## $ Beds.HH.w..only.Children : int NA 0 NA NA NA NA 0 0 0 0 ...
## $ CH.Beds.HH.w.only.Children : int NA NA NA NA NA NA NA NA NA NA ...
## $ Year.Round.Beds : int 77 68 59 6 9 10 52 18 13 1 ...
## $ DV.Beds : int 77 68 59 NA NA NA NA NA NA NA ...
## $ Total.Seasonal.Beds : int NA NA NA NA NA NA NA NA NA NA ...
## $ Availability.Start.Date : chr "" "" "" "" ...
## $ Availability.End.Date : chr "" "" "" "" ...
## $ O.V.Beds : int NA NA NA NA NA NA NA NA NA NA ...
## $ PIT.Count : int 24 20 59 6 9 10 52 18 13 1 ...
## $ Total.Beds : int 77 68 59 6 9 10 52 18 13 1 ...
## $ Updated.On : chr "8/16/2022 7:56:52 PM" "8/16/2022 7:56:30 PM" "4/19/2022 8:49:55 PM" "4/26/2022 3:56:00 PM" ...
## $ mergedDefunctYear : logi NA NA NA NA NA NA ...
## $ questionUsesDescriptorElements : int 1 1 1 1 1 1 1 1 1 1 ...
## $ questionDesiresToUseDescriptorElements: int NA NA NA NA NA NA NA NA NA NA ...
## $ notes : chr "BWS has ES, TH and RRH projects." "BWS has ES, TH and RRH projects. Due to staff turnover and eligibility requirements, the TH project was not uti"| __truncated__ "" "" ...
## [list output truncated]
How many housing projects are operated in Florida?
table(db$CocState)
##
## AK AL AR AZ CA CO CT DC DE FL GA GU HI IA ID IL
## 156 203 100 409 4666 476 525 360 109 1257 646 29 169 251 115 869
## IN KS KY LA MA MD ME MI MN MO MS MT NC ND NE NH
## 388 161 336 307 1004 454 252 954 1213 566 101 106 731 91 211 186
## NJ NM NV NY OH OK OR PA PR RI SC SD TN TX UT VA
## 798 200 206 2374 907 205 819 1226 138 141 230 87 387 1015 199 630
## VI VT WA WI WV WY
## 6 227 1559 497 168 76
If you want to only select cases in Florida
df <- db %>%
filter(CocState == "FL")
Let’s focus on the type of shelter/housing services
table(df$Project.Type)
##
## ES OPH PSH RRH SH TH
## 339 68 298 329 7 216
Simple histogram of frequencies:
ggplot(data = df) +
geom_bar(mapping = aes(x = Project.Type))
If instead we want to have proportions:
ggplot(data = df) +
geom_bar(mapping = aes(x = Project.Type, y = after_stat(prop), group = 1))
If we want to change the order of the bars in the histogram:
MyVar_order <- c("ES", "SH", "TH", "PSH", "OPH", "RRH")
ggplot(data = df) +
geom_bar(mapping = aes(x = Project.Type))+
scale_x_discrete(limits=MyVar_order)
Now let’s go with number of beds
summary(df$Total.Beds)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 10.00 20.00 39.43 44.00 1010.00
Let’s plot a histogram
ggplot(data = df) +
geom_histogram(mapping = aes(x = Total.Beds), binwidth=5)
Binwidth controls the size of the binning.We can set the max value of
the number of beds in the graph as 500 to see the pattern without
extreme outliers.
ggplot(data = df) +
geom_histogram(mapping = aes(x = Total.Beds),col="blue", binwidth=15) +
coord_cartesian(xlim = c(0, 500), ylim = c(0, 500))
ggplot(df, aes(x=Total.Beds)) + geom_histogram(aes(y=..density..), binwidth=1, col="grey", fill="white", xlim = c(0, 500), ylim = c(0, 500)) +
geom_density(alpha=.2, fill="#FF6666")
## Warning in geom_histogram(aes(y = ..density..), binwidth = 1, col = "grey", :
## Ignoring unknown parameters: `xlim` and `ylim`
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.