About The Project

The means of state westnile virus and neuroinvasive cases were calculated. Percent of positive cases that developed into neuroinvasive disease per year and state and percent of the westnile disease per state and year were also calculated.

Read dataset from Github

#West nile disease dataset
#part 1
pt1 <- read.csv(file="https://raw.githubusercontent.com/nnaemeka-git/global-datasets/main/west_nile_pt1.csv", sep=",")
head(pt1)
##        State X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 1    Alabama     0     0     2    49    37    16    10     8    24    18     0
## 2     Alaska     0     0     0     0     0     0     0     0     0     0     0
## 3    Arizona     0     0     0     0    13   391   113   150    97   114    20
## 4   Arkansas     0     0     0    43    25    28    28    29    20     9     6
## 5 California     0     0     0     1     3   779   880   278   380   445   112
## 6   Colorado     0     0     0    14  2947   291   106   345   576    71   103
#part 2
pt2 <- read.csv(file="https://raw.githubusercontent.com/nnaemeka-git/global-datasets/main/west_nile_pt2.csv", sep=",")
head(pt2)
##        State X2010 X2011 X2012 X2013 X2014 X2015 X2016 X2017 X2018 X2019 Total
## 1    Alabama     3     5    62     9     2     9    19    60    28     5   366
## 2     Alaska     0     0     0     0     0     0     0     0     1     1     2
## 3    Arizona   167    69   133    62   107   103    78   111    26   174 1,928
## 4   Arkansas     7     1    64    18    11    18     9    18     8     9   351
## 5 California   111   158   479   379   801   783   442   553   217   225 7,026
## 6   Colorado    81     7   131   322   118   101   149    68    96   122 5,648
#West nile neuroinvasive dataset
#part 1
npt1 <- read.csv(file="https://raw.githubusercontent.com/nnaemeka-git/global-datasets/main/westnile%20neuroinvasive%20pt1.csv", sep=",")
head(npt1)
##        State X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 1    Alabama     0     0     2    34    25    15     6     8    17    11     0
## 2     Alaska     0     0     0     0     0     0     0     0     0     0     0
## 3    Arizona     0     0     0     0     7   215    52    68    50    62    12
## 4   Arkansas     0     0     0    32    23    17    13    24    13     7     6
## 5 California     0     0     0     1     2   291   305    81   154   292    67
## 6   Colorado     0     0     0     6   621    41    21    66    99    17    36
#part 2
npt2 <- read.csv(file="https://raw.githubusercontent.com/nnaemeka-git/global-datasets/main/westnile%20neuroinvasive%20pt2.csv", sep=",")
head(npt2)
##        State X2010 X2011 X2012 X2013 X2014 X2015 X2016 X2017 X2018 X2019 Total
## 1    Alabama     1     5    38     3     0     5    13    40    16     4   243
## 2     Alaska     0     0     0     0     0     0     0     0     1     0     1
## 3    Arizona   107    49    87    50    80    67    57    98    25   132 1,218
## 4   Arkansas     6     1    44    16     9    16     8    15     6     7   263
## 5 California    72   110   297   237   561   585   335   401   154   147 4,092
## 6   Colorado    26     2    62    90    46    57    59    29    52    52 1,382

Join datasets

nile_dt <- left_join(pt1,pt2,by="State")
neuro_dt <- left_join(npt1,npt2,by="State")
head(nile_dt)
##        State X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 1    Alabama     0     0     2    49    37    16    10     8    24    18     0
## 2     Alaska     0     0     0     0     0     0     0     0     0     0     0
## 3    Arizona     0     0     0     0    13   391   113   150    97   114    20
## 4   Arkansas     0     0     0    43    25    28    28    29    20     9     6
## 5 California     0     0     0     1     3   779   880   278   380   445   112
## 6   Colorado     0     0     0    14  2947   291   106   345   576    71   103
##   X2010 X2011 X2012 X2013 X2014 X2015 X2016 X2017 X2018 X2019 Total
## 1     3     5    62     9     2     9    19    60    28     5   366
## 2     0     0     0     0     0     0     0     0     1     1     2
## 3   167    69   133    62   107   103    78   111    26   174 1,928
## 4     7     1    64    18    11    18     9    18     8     9   351
## 5   111   158   479   379   801   783   442   553   217   225 7,026
## 6    81     7   131   322   118   101   149    68    96   122 5,648
head(neuro_dt)
##        State X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 1    Alabama     0     0     2    34    25    15     6     8    17    11     0
## 2     Alaska     0     0     0     0     0     0     0     0     0     0     0
## 3    Arizona     0     0     0     0     7   215    52    68    50    62    12
## 4   Arkansas     0     0     0    32    23    17    13    24    13     7     6
## 5 California     0     0     0     1     2   291   305    81   154   292    67
## 6   Colorado     0     0     0     6   621    41    21    66    99    17    36
##   X2010 X2011 X2012 X2013 X2014 X2015 X2016 X2017 X2018 X2019 Total
## 1     1     5    38     3     0     5    13    40    16     4   243
## 2     0     0     0     0     0     0     0     0     1     0     1
## 3   107    49    87    50    80    67    57    98    25   132 1,218
## 4     6     1    44    16     9    16     8    15     6     7   263
## 5    72   110   297   237   561   585   335   401   154   147 4,092
## 6    26     2    62    90    46    57    59    29    52    52 1,382

Replace 0 with NA

nile_dt[nile_dt==0] <- NA
neuro_dt[neuro_dt==0] <- NA

Remove the last row

nile_dt <- nile_dt[1:(dim(nile_dt)[1]-1),]
neuro_dt <- neuro_dt[1:(dim(neuro_dt)[1]-1),]

Remove comma from Total column values

nile_dt$Total <- unlist(str_remove_all(nile_dt$Total, pattern=","))
neuro_dt$Total <- unlist(str_remove_all(neuro_dt$Total, pattern=","))

Derive number of years and mean number of westnile infections recorded

nile_dt$NumOfYears <- rowSums(!is.na(nile_dt[,2:22]))
nile_dt$AvgNumOfNile <- as.numeric(nile_dt$Total)/nile_dt$NumOfYears
neuro_dt$NumOfYears <- rowSums(!is.na(neuro_dt[,2:22]))
neuro_dt$AvgNumOfNeuro <- as.numeric(neuro_dt$Total)/neuro_dt$NumOfYears

head(neuro_dt)
##        State X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 1    Alabama    NA    NA     2    34    25    15     6     8    17    11    NA
## 2     Alaska    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
## 3    Arizona    NA    NA    NA    NA     7   215    52    68    50    62    12
## 4   Arkansas    NA    NA    NA    32    23    17    13    24    13     7     6
## 5 California    NA    NA    NA     1     2   291   305    81   154   292    67
## 6   Colorado    NA    NA    NA     6   621    41    21    66    99    17    36
##   X2010 X2011 X2012 X2013 X2014 X2015 X2016 X2017 X2018 X2019 Total NumOfYears
## 1     1     5    38     3    NA     5    13    40    16     4   243         17
## 2    NA    NA    NA    NA    NA    NA    NA    NA     1    NA     1          1
## 3   107    49    87    50    80    67    57    98    25   132  1218         17
## 4     6     1    44    16     9    16     8    15     6     7   263         18
## 5    72   110   297   237   561   585   335   401   154   147  4092         18
## 6    26     2    62    90    46    57    59    29    52    52  1382         18
##   AvgNumOfNeuro
## 1      14.29412
## 2       1.00000
## 3      71.64706
## 4      14.61111
## 5     227.33333
## 6      76.77778
head(nile_dt)
##        State X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 1    Alabama    NA    NA     2    49    37    16    10     8    24    18    NA
## 2     Alaska    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
## 3    Arizona    NA    NA    NA    NA    13   391   113   150    97   114    20
## 4   Arkansas    NA    NA    NA    43    25    28    28    29    20     9     6
## 5 California    NA    NA    NA     1     3   779   880   278   380   445   112
## 6   Colorado    NA    NA    NA    14  2947   291   106   345   576    71   103
##   X2010 X2011 X2012 X2013 X2014 X2015 X2016 X2017 X2018 X2019 Total NumOfYears
## 1     3     5    62     9     2     9    19    60    28     5   366         18
## 2    NA    NA    NA    NA    NA    NA    NA    NA     1     1     2          2
## 3   167    69   133    62   107   103    78   111    26   174  1928         17
## 4     7     1    64    18    11    18     9    18     8     9   351         18
## 5   111   158   479   379   801   783   442   553   217   225  7026         18
## 6    81     7   131   322   118   101   149    68    96   122  5648         18
##   AvgNumOfNile
## 1     20.33333
## 2      1.00000
## 3    113.41176
## 4     19.50000
## 5    390.33333
## 6    313.77778

Transform the Year columns with pivot long

nile_long <- nile_dt %>% 
  pivot_longer(!c("State","Total","NumOfYears","AvgNumOfNile"),names_to="DiseaseYear",values_to="DiseaseCount")
nile_long$DiseaseYear <- as.numeric(unlist(str_match_all(nile_long$DiseaseYear,"\\d+..")))


neuro_long <- neuro_dt %>% 
  pivot_longer(!c("State","Total","NumOfYears","AvgNumOfNeuro"),names_to="NeuroYear",values_to="NeuroCount")
neuro_long$NeuroYear <- as.numeric(unlist(str_match_all(neuro_long$NeuroYear,"\\d+..")))

Percent of Disease and positive cases that developed into neuroinvasive disease per year

#Percent of Disease in each state per year
nile_yr <- nile_long %>%group_by(DiseaseYear) %>%
  summarise(YearTotal = sum(DiseaseCount,na.rm=TRUE))
            
nile_perc<-mutate(nile_yr,Total = sum(YearTotal,na.rm=TRUE),
            PercCases = round((YearTotal/Total)*100,3))%>%
  arrange(desc(PercCases))


#Percent of positive cases that developed into neuroinvasive disease per year
neuro_yr <- neuro_long %>%group_by(NeuroYear) %>%
  summarise(YearTotal = sum(NeuroCount,na.rm=TRUE))
            
neuro_perc<-mutate(neuro_yr,Total = sum(YearTotal,na.rm=TRUE),
            PercCases = round((YearTotal/Total)*100,3))%>%
  arrange(desc(PercCases))

Percent of Disease in each state per year

nile_perc
## # A tibble: 21 x 4
##    DiseaseYear YearTotal Total PercCases
##          <dbl>     <int> <int>     <dbl>
##  1        2003      9862 51801     19.0 
##  2        2012      5674 51801     11.0 
##  3        2006      4269 51801      8.24
##  4        2002      4156 51801      8.02
##  5        2007      3630 51801      7.01
##  6        2005      3000 51801      5.79
##  7        2018      2647 51801      5.11
##  8        2004      2539 51801      4.90
##  9        2013      2469 51801      4.77
## 10        2014      2205 51801      4.26
## # ... with 11 more rows

Percent of positive cases that developed into neuroinvasive disease per year

neuro_perc
## # A tibble: 21 x 4
##    NeuroYear YearTotal Total PercCases
##        <dbl>     <int> <int>     <dbl>
##  1      2002      2946 25290     11.6 
##  2      2012      2873 25290     11.4 
##  3      2003      2866 25290     11.3 
##  4      2018      1658 25290      6.56
##  5      2006      1495 25290      5.91
##  6      2015      1455 25290      5.75
##  7      2017      1425 25290      5.64
##  8      2014      1347 25290      5.33
##  9      2005      1309 25290      5.18
## 10      2016      1309 25290      5.18
## # ... with 11 more rows

Show Graph

Disease Percent per year

nile_perc%>% ggplot(aes(reorder(DiseaseYear,PercCases),PercCases))+
  geom_col(fill="#D77E1A")+geom_text(aes(label=PercCases),color="blue")+
  coord_flip()+
  labs(x="Year",y="Percent of west Nile Disease (%)", title="Majority of the westnile disease infections happened 2003 followed by 2012.\n The year 2000, 1999 and 2001 had the least share of the infection")+theme_bw()

Neuroinvasive cases Percent per year

neuro_perc%>% ggplot(aes(reorder(NeuroYear,PercCases),PercCases))+
  geom_col(fill="#B92CA4")+geom_text(aes(label=PercCases),color="blue")+
  coord_flip()+
  labs(x="Year",y="Percent of Positive cases (%)", title="Majority of the Neuroinvasive westnile virus infections happened year 2002, \nfollowed by 2012 and 2003 with the approximately 11.6%, 11.36% and\n 11.33% respectively. The year 2000, 1999 and 2001 had the least share of the\n infection")+theme_bw()

Percent of Disease and positive cases that developed into neuroinvasive disease per state

#Percent of Disease in each state per state

nile_st <- nile_long %>%group_by(State) %>%
  summarise(StateTotal = sum(DiseaseCount,na.rm=TRUE))
            

nile_perc_st<-mutate(nile_st,Total = sum(StateTotal,na.rm=TRUE),
            PercCases = round((StateTotal/Total)*100,3))%>%
  arrange(desc(PercCases))


#Percent of positive cases that developed into neuroinvasive disease per state
neuro_st <- neuro_long %>%group_by(State) %>%
  summarise(StateTotal = sum(NeuroCount,na.rm=TRUE))
            

neuro_perc_st<-mutate(neuro_st,Total = sum(StateTotal,na.rm=TRUE),
            PercCases = round((StateTotal/Total)*100,3))%>%
  arrange(desc(PercCases))

Percent of Disease in each state per state

nile_perc_st
## # A tibble: 52 x 4
##    State        StateTotal Total PercCases
##    <chr>             <int> <int>     <dbl>
##  1 California         7026 51801     13.6 
##  2 Colorado           5648 51801     10.9 
##  3 Texas              5590 51801     10.8 
##  4 Nebraska           4000 51801      7.72
##  5 Illinois           2662 51801      5.14
##  6 South Dakota       2613 51801      5.04
##  7 Arizona            1928 51801      3.72
##  8 North Dakota       1917 51801      3.70
##  9 Louisiana          1841 51801      3.55
## 10 Mississippi        1441 51801      2.78
## # ... with 42 more rows

Percent of positive cases that developed into neuroinvasive disease per state

neuro_perc_st
## # A tibble: 52 x 4
##    State       StateTotal Total PercCases
##    <chr>            <int> <int>     <dbl>
##  1 California        4092 25290     16.2 
##  2 Texas             3390 25290     13.4 
##  3 Illinois          1701 25290      6.73
##  4 Colorado          1382 25290      5.46
##  5 Arizona           1218 25290      4.82
##  6 Louisiana         1114 25290      4.40
##  7 Michigan          1113 25290      4.40
##  8 Nebraska           799 25290      3.16
##  9 Mississippi        789 25290      3.12
## 10 Ohio               741 25290      2.93
## # ... with 42 more rows

Show Graph

Disease Percent per State

nile_perc_st%>% ggplot(aes(reorder(State,PercCases),PercCases))+
  geom_col(fill="#8CD71A")+geom_text(aes(label=PercCases),color="blue")+
  coord_flip()+
  labs(x="State",y="Percent of westnile disease (%)", title="Califonia and colorado top the list of states with highest infections of\n 13.6% and 10.9% respectively while Puerto Rico and Hawaii had the\n least infections of 0.002% approximately")+theme_bw()

Neuroinvasive cases Percent per State

neuro_perc_st%>% ggplot(aes(reorder(State,PercCases),PercCases))+
  geom_col(fill="#8A8F80")+geom_text(aes(label=PercCases),color="blue")+
  coord_flip()+
  labs(x="State",y="Percent of Positive cases (%)", title="Califonia and Texas top the list of states with highest Neuroinvasive\n westnile cases of 16.18% and 13.4% respectively while Puerto Rico,\n Aaska and Hawaii had the least infections of 0.004%, 0.004% and 0.0%\n approximately")+theme_bw()