HAB <- read.csv("C:/Users/Ozili Nwokobia/OneDrive/Desktop/Health_AnimalBites.csv", stringsAsFactors=TRUE)
View(HAB)
str(HAB)
## 'data.frame':    9003 obs. of  15 variables:
##  $ bite_date        : Factor w/ 2703 levels "","1952-05-28 00:00:00",..: 4 5 6 7 8 9 10 11 12 13 ...
##  $ SpeciesIDDesc    : Factor w/ 10 levels "","BAT","CAT",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ BreedIDDesc      : Factor w/ 102 levels "","AAUST. TERR.",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ GenderIDDesc     : Factor w/ 4 levels "","FEMALE","MALE",..: 2 4 4 3 2 4 2 3 3 4 ...
##  $ color            : Factor w/ 714 levels "","APRICOT","B.MERLE-GR",..: 412 244 1 17 72 1 39 72 250 201 ...
##  $ vaccination_yrs  : int  1 NA NA NA NA NA 1 NA NA NA ...
##  $ vaccination_date : Factor w/ 2108 levels "","1985-06-20 00:00:00",..: 2 1 1 1 1 1 3 1 1 1 ...
##  $ victim_zip       : Factor w/ 234 levels "","11215","13440",..: 109 101 102 1 1 94 86 1 1 1 ...
##  $ AdvIssuedYNDesc  : Factor w/ 3 levels "","NO","YES": 2 2 2 2 2 2 2 2 2 2 ...
##  $ WhereBittenIDDesc: Factor w/ 4 levels "","BODY","HEAD",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ quarantine_date  : Factor w/ 603 levels "","1985-05-05 00:00:00",..: 2 3 6 10 1 1 4 5 7 8 ...
##  $ DispositionIDDesc: Factor w/ 5 levels "","DIED","KILLED",..: 5 5 5 5 5 5 5 4 5 4 ...
##  $ head_sent_date   : Factor w/ 326 levels "","2010-01-10 00:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ release_date     : Factor w/ 583 levels "","2011-05-09 00:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ ResultsIDDesc    : Factor w/ 4 levels "","NEGATIVE",..: 4 4 4 4 4 4 4 4 4 4 ...
summary(HAB)
##                bite_date    SpeciesIDDesc           BreedIDDesc  
##                     : 317   DOG    :7029                  :5244  
##  2017-08-15 00:00:00:  13   CAT    :1568   PIT BULL       :1102  
##  2010-04-01 00:00:00:  12   BAT    : 237   GERM SHEPHERD  : 327  
##  2014-04-26 00:00:00:  12          : 118   LABRADOR RETRIV: 253  
##  2015-03-17 00:00:00:  12   RACCOON:  27   BOXER          : 181  
##  2011-08-26 00:00:00:  11   OTHER  :  11   CHICHAUHUA     : 165  
##  (Other)            :8626   (Other):  13   (Other)        :1731  
##   GenderIDDesc      color      vaccination_yrs             vaccination_date
##         :2526          :2576   Min.   : 1.000                      :4888   
##  FEMALE :2016   BLACK  : 671   1st Qu.: 1.000   2015-10-01 00:00:00:   8   
##  MALE   :3832   BROWN  : 433   Median : 1.000   2011-07-08 00:00:00:   7   
##  UNKNOWN: 629   WHITE  : 394   Mean   : 1.452   2012-05-14 00:00:00:   7   
##                 BLK WHT: 277   3rd Qu.: 1.000   2013-04-01 00:00:00:   7   
##                 TAN    : 223   Max.   :11.000   2013-05-08 00:00:00:   7   
##                 (Other):4429   NA's   :5265     (Other)            :4079   
##    victim_zip   AdvIssuedYNDesc WhereBittenIDDesc            quarantine_date
##         :1838      :6438               : 616                         :6983  
##  40272  : 376   NO :1914        BODY   :6213      2010-04-05 00:00:00:  18  
##  40291  : 368   YES: 651        HEAD   :1244      2010-11-08 00:00:00:  14  
##  40216  : 362                   UNKNOWN: 930      2011-06-01 00:00:00:  14  
##  40215  : 356                                     2010-02-01 00:00:00:  13  
##  40214  : 348                                     2011-06-27 00:00:00:  13  
##  (Other):5355                                     (Other)            :1948  
##  DispositionIDDesc             head_sent_date              release_date 
##          :7468                        :8608                      :7558  
##  DIED    :   4     2010-05-21 00:00:00:   4   2016-07-05 00:00:00:  13  
##  KILLED  :  16     2010-07-12 00:00:00:   3   2017-09-05 00:00:00:  12  
##  RELEASED: 912     2015-06-02 00:00:00:   3   2016-04-04 00:00:00:  11  
##  UNKNOWN : 603     2015-06-16 00:00:00:   3   2016-05-23 00:00:00:   9  
##                    2015-08-17 00:00:00:   3   2017-05-08 00:00:00:   9  
##                    (Other)            : 379   (Other)            :1391  
##   ResultsIDDesc 
##          :7460  
##  NEGATIVE: 299  
##  POSITIVE:   4  
##  UNKNOWN :1240  
##                 
##                 
## 
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
#CLEANING THE DATA
HAB<- select(HAB, -BreedIDDesc)
HAB<- select(HAB, -color)
HAB<- select(HAB, -head_sent_date)
HAB<- select(HAB, -release_date)
HAB<- select(HAB, -quarantine_date)
HAB<- select(HAB, -vaccination_date)
HAB<- select(HAB, -bite_date)
HAB<- select(HAB, -WhereBittenIDDesc)
HAB<- select(HAB, -DispositionIDDesc)
HAB<- select(HAB, -AdvIssuedYNDesc)
HAB<- select(HAB, -GenderIDDesc)

HAB$vaccination_yrs[is.na(HAB$vaccination_yrs)] <- median(HAB$vaccination_yrs, na.rm = TRUE)
View(HAB)
str(HAB)
## 'data.frame':    9003 obs. of  4 variables:
##  $ SpeciesIDDesc  : Factor w/ 10 levels "","BAT","CAT",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ vaccination_yrs: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ victim_zip     : Factor w/ 234 levels "","11215","13440",..: 109 101 102 1 1 94 86 1 1 1 ...
##  $ ResultsIDDesc  : Factor w/ 4 levels "","NEGATIVE",..: 4 4 4 4 4 4 4 4 4 4 ...
#ELIMINATING THE N/As or EMPTYIES IN RESULTS TABLE

HAB1 <- HAB[!is.na(HAB$ResultsIDDesc) & HAB$ResultsIDDesc != "", ]
str(HAB1)
## 'data.frame':    1543 obs. of  4 variables:
##  $ SpeciesIDDesc  : Factor w/ 10 levels "","BAT","CAT",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ vaccination_yrs: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ victim_zip     : Factor w/ 234 levels "","11215","13440",..: 109 101 102 1 1 94 86 1 1 1 ...
##  $ ResultsIDDesc  : Factor w/ 4 levels "","NEGATIVE",..: 4 4 4 4 4 4 4 4 4 4 ...
HAB1 <- HAB1[HAB1$ResultsIDDesc != "UNKNOWN", ]
str(HAB1)
## 'data.frame':    303 obs. of  4 variables:
##  $ SpeciesIDDesc  : Factor w/ 10 levels "","BAT","CAT",..: 4 4 4 3 3 2 2 2 9 4 ...
##  $ vaccination_yrs: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ victim_zip     : Factor w/ 234 levels "","11215","13440",..: 97 123 99 98 95 117 102 124 117 109 ...
##  $ ResultsIDDesc  : Factor w/ 4 levels "","NEGATIVE",..: 2 2 2 2 2 2 2 2 2 2 ...
HAB1 <- HAB1[!is.na(HAB1$SpeciesIDDesc) & HAB1$SpeciesIDDesc != "", ]


str(HAB1)
## 'data.frame':    300 obs. of  4 variables:
##  $ SpeciesIDDesc  : Factor w/ 10 levels "","BAT","CAT",..: 4 4 4 3 3 2 2 2 9 4 ...
##  $ vaccination_yrs: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ victim_zip     : Factor w/ 234 levels "","11215","13440",..: 97 123 99 98 95 117 102 124 117 109 ...
##  $ ResultsIDDesc  : Factor w/ 4 levels "","NEGATIVE",..: 2 2 2 2 2 2 2 2 2 2 ...
HAB1<- HAB1[!is.na(HAB1$victim_zip) & HAB1$victim_zip != "",]
str(HAB1)
## 'data.frame':    279 obs. of  4 variables:
##  $ SpeciesIDDesc  : Factor w/ 10 levels "","BAT","CAT",..: 4 4 4 3 3 2 2 2 9 4 ...
##  $ vaccination_yrs: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ victim_zip     : Factor w/ 234 levels "","11215","13440",..: 97 123 99 98 95 117 102 124 117 109 ...
##  $ ResultsIDDesc  : Factor w/ 4 levels "","NEGATIVE",..: 2 2 2 2 2 2 2 2 2 2 ...
View(HAB1)
summary(HAB1)
##  SpeciesIDDesc vaccination_yrs   victim_zip   ResultsIDDesc
##  BAT    :158   Min.   :1.000   40207  : 17           :  0  
##  CAT    : 57   1st Qu.:1.000   40211  : 17   NEGATIVE:275  
##  DOG    : 52   Median :1.000   40205  : 16   POSITIVE:  4  
##  RACCOON:  7   Mean   :1.014   40214  : 16   UNKNOWN :  0  
##  OTHER  :  5   3rd Qu.:1.000   40223  : 16                 
##         :  0   Max.   :3.000   40299  : 16                 
##  (Other):  0                   (Other):181
positive_cases <- HAB1[HAB1$ResultsIDDesc == "POSITIVE", ]
species_counts <- table(positive_cases$SpeciesIDDesc)

species_summary <- as.data.frame(species_counts)
names(species_summary) <- c("Species", "Count")

ggplot(species_summary, aes(x = Species, y = Count, fill = Species)) +
  geom_bar(stat = "identity", color = "black") +
  labs(title = "Positive Rabies Cases by Species", x = "Species", y = "Number of Positive Cases") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))  # Rotate x-axis labels for clarity

set.seed(123)

library(factoextra)
## Warning: package 'factoextra' was built under R version 4.3.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(caret)
## Loading required package: lattice
library(rpart)
## Warning: package 'rpart' was built under R version 4.3.3
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.3.3
library(rattle)
## Warning: package 'rattle' was built under R version 4.3.3
## Loading required package: tibble
## Loading required package: bitops
## Rattle: A free graphical interface for data science with R.
## Version 5.5.1 Copyright (c) 2006-2021 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
HAB1$victim_zip<- as.numeric(as.factor(HAB1$victim_zip))
HAB1$SpeciesIDDesc <- as.numeric(as.factor(HAB1$SpeciesIDDesc))
data_clustering <- HAB1[c("vaccination_yrs", "victim_zip", "SpeciesIDDesc")]
sapply(data_clustering, is.numeric)
## vaccination_yrs      victim_zip   SpeciesIDDesc 
##            TRUE            TRUE            TRUE
summary(data_clustering)
##  vaccination_yrs   victim_zip    SpeciesIDDesc  
##  Min.   :1.000   Min.   : 43.0   Min.   :2.000  
##  1st Qu.:1.000   1st Qu.: 90.0   1st Qu.:2.000  
##  Median :1.000   Median : 97.0   Median :2.000  
##  Mean   :1.014   Mean   :100.2   Mean   :2.842  
##  3rd Qu.:1.000   3rd Qu.:108.0   3rd Qu.:3.000  
##  Max.   :3.000   Max.   :192.0   Max.   :9.000
data_scaled <- scale.default(data_clustering)
distance_mat<-dist(data_scaled, method = "euclidean")

hclust_avg<-hclust(distance_mat, method = "average")

cut_age<-cutree(hclust_avg, k=3)

view(cut_age)

plot(cut_age)

HAB1_cl<-mutate(HAB1, cluster=cut_age)
plot(HAB1_cl)

kmeans_result <- kmeans(data_scaled, centers = 3, nstart = 25)

print(kmeans_result$centers)
##   vaccination_yrs   victim_zip SpeciesIDDesc
## 1     -0.08481944 -0.008170624     3.8378609
## 2     -0.08481944 -0.005921326    -0.1800879
## 3     11.74749268  0.833599390     0.8344861