Homelessness Garph Analytics

ggplot(ml_data, aes(EntryDateMonth)) + geom_histogram(binwidth = 1)

ggplot(ml_data, aes(EntryDateMonth, fill = as.factor(Destination))) + geom_histogram(binwidth = 1)

ggplot(ml_data, aes(EntryDateMonth, fill = as.factor(TypeProvided))) + geom_histogram(binwidth = 1)

ggplot(ml_data, aes(EntryDateMonth, fill = as.factor(VADisabilityService))) + geom_histogram(binwidth = 1)

Heat Map Graph Analytics

g2
## Warning: Removed 69 rows containing non-finite values (stat_density2d).

g1
## Warning: Removed 1110 rows containing missing values (geom_point).

print("Top 10 the Most Homelessness Area")
## [1] "Top 10 the Most Homelessness Area"
top10_zip$LastPermanentZIP
##  [1] 63101 63136 63118 63103 63111 63125 63114 63104 63107 63143

Predictive modeling

plotFilterValues(var_imp, feat.type.cols=TRUE)

imp_feat <- (var_imp$data %>% arrange(-information.gain) %>% top_n(7))$name
## Selecting by information.gain
imp_feat
## [1] "EntryDateMonth"      "Discharge_Status.1"  "Discharge_Status.2" 
## [4] "Age"                 "Employed"            "TypeProvided"       
## [7] "VADisabilityService"
head(test, 3)
##   TypeProvided VADisabilityService EntryDateMonth Discharge_Status.1
## 2           B1                   0              1                  0
## 5           B1                   0              1                  0
## 7           B1                   0              1                  0
##   Discharge_Status.2 Age Employed Destination
## 2                  1   4        1           3
## 5                  1   4        1           3
## 7                  1   4        1           3
pred.rf.test <- predict(mdl.rf, test)
conf.mtx <- confusionMatrix(pred.rf.test, test$Destination)
conf.mtx$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.8656172      0.7352898      0.8639407      0.8672805      0.6662625 
## AccuracyPValue  McnemarPValue 
##      0.0000000            NaN

Improvement TODO:

Challenges of this project