The means of state westnile virus and neuroinvasive cases were calculated. Percent of positive cases that developed into neuroinvasive disease per year and state and percent of the westnile disease per state and year were also calculated.
Read dataset from Github
#West nile disease dataset
#part 1
pt1 <- read.csv(file="https://raw.githubusercontent.com/nnaemeka-git/global-datasets/main/west_nile_pt1.csv", sep=",")
head(pt1)
## State X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 1 Alabama 0 0 2 49 37 16 10 8 24 18 0
## 2 Alaska 0 0 0 0 0 0 0 0 0 0 0
## 3 Arizona 0 0 0 0 13 391 113 150 97 114 20
## 4 Arkansas 0 0 0 43 25 28 28 29 20 9 6
## 5 California 0 0 0 1 3 779 880 278 380 445 112
## 6 Colorado 0 0 0 14 2947 291 106 345 576 71 103
#part 2
pt2 <- read.csv(file="https://raw.githubusercontent.com/nnaemeka-git/global-datasets/main/west_nile_pt2.csv", sep=",")
head(pt2)
## State X2010 X2011 X2012 X2013 X2014 X2015 X2016 X2017 X2018 X2019 Total
## 1 Alabama 3 5 62 9 2 9 19 60 28 5 366
## 2 Alaska 0 0 0 0 0 0 0 0 1 1 2
## 3 Arizona 167 69 133 62 107 103 78 111 26 174 1,928
## 4 Arkansas 7 1 64 18 11 18 9 18 8 9 351
## 5 California 111 158 479 379 801 783 442 553 217 225 7,026
## 6 Colorado 81 7 131 322 118 101 149 68 96 122 5,648
#West nile neuroinvasive dataset
#part 1
npt1 <- read.csv(file="https://raw.githubusercontent.com/nnaemeka-git/global-datasets/main/westnile%20neuroinvasive%20pt1.csv", sep=",")
head(npt1)
## State X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 1 Alabama 0 0 2 34 25 15 6 8 17 11 0
## 2 Alaska 0 0 0 0 0 0 0 0 0 0 0
## 3 Arizona 0 0 0 0 7 215 52 68 50 62 12
## 4 Arkansas 0 0 0 32 23 17 13 24 13 7 6
## 5 California 0 0 0 1 2 291 305 81 154 292 67
## 6 Colorado 0 0 0 6 621 41 21 66 99 17 36
#part 2
npt2 <- read.csv(file="https://raw.githubusercontent.com/nnaemeka-git/global-datasets/main/westnile%20neuroinvasive%20pt2.csv", sep=",")
head(npt2)
## State X2010 X2011 X2012 X2013 X2014 X2015 X2016 X2017 X2018 X2019 Total
## 1 Alabama 1 5 38 3 0 5 13 40 16 4 243
## 2 Alaska 0 0 0 0 0 0 0 0 1 0 1
## 3 Arizona 107 49 87 50 80 67 57 98 25 132 1,218
## 4 Arkansas 6 1 44 16 9 16 8 15 6 7 263
## 5 California 72 110 297 237 561 585 335 401 154 147 4,092
## 6 Colorado 26 2 62 90 46 57 59 29 52 52 1,382
Join datasets
nile_dt <- left_join(pt1,pt2,by="State")
neuro_dt <- left_join(npt1,npt2,by="State")
head(nile_dt)
## State X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 1 Alabama 0 0 2 49 37 16 10 8 24 18 0
## 2 Alaska 0 0 0 0 0 0 0 0 0 0 0
## 3 Arizona 0 0 0 0 13 391 113 150 97 114 20
## 4 Arkansas 0 0 0 43 25 28 28 29 20 9 6
## 5 California 0 0 0 1 3 779 880 278 380 445 112
## 6 Colorado 0 0 0 14 2947 291 106 345 576 71 103
## X2010 X2011 X2012 X2013 X2014 X2015 X2016 X2017 X2018 X2019 Total
## 1 3 5 62 9 2 9 19 60 28 5 366
## 2 0 0 0 0 0 0 0 0 1 1 2
## 3 167 69 133 62 107 103 78 111 26 174 1,928
## 4 7 1 64 18 11 18 9 18 8 9 351
## 5 111 158 479 379 801 783 442 553 217 225 7,026
## 6 81 7 131 322 118 101 149 68 96 122 5,648
head(neuro_dt)
## State X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 1 Alabama 0 0 2 34 25 15 6 8 17 11 0
## 2 Alaska 0 0 0 0 0 0 0 0 0 0 0
## 3 Arizona 0 0 0 0 7 215 52 68 50 62 12
## 4 Arkansas 0 0 0 32 23 17 13 24 13 7 6
## 5 California 0 0 0 1 2 291 305 81 154 292 67
## 6 Colorado 0 0 0 6 621 41 21 66 99 17 36
## X2010 X2011 X2012 X2013 X2014 X2015 X2016 X2017 X2018 X2019 Total
## 1 1 5 38 3 0 5 13 40 16 4 243
## 2 0 0 0 0 0 0 0 0 1 0 1
## 3 107 49 87 50 80 67 57 98 25 132 1,218
## 4 6 1 44 16 9 16 8 15 6 7 263
## 5 72 110 297 237 561 585 335 401 154 147 4,092
## 6 26 2 62 90 46 57 59 29 52 52 1,382
Replace 0 with NA
nile_dt[nile_dt==0] <- NA
neuro_dt[neuro_dt==0] <- NA
Remove the last row
nile_dt <- nile_dt[1:(dim(nile_dt)[1]-1),]
neuro_dt <- neuro_dt[1:(dim(neuro_dt)[1]-1),]
Remove comma from Total column values
nile_dt$Total <- unlist(str_remove_all(nile_dt$Total, pattern=","))
neuro_dt$Total <- unlist(str_remove_all(neuro_dt$Total, pattern=","))
Derive number of years and mean number of westnile infections recorded
nile_dt$NumOfYears <- rowSums(!is.na(nile_dt[,2:22]))
nile_dt$AvgNumOfNile <- as.numeric(nile_dt$Total)/nile_dt$NumOfYears
neuro_dt$NumOfYears <- rowSums(!is.na(neuro_dt[,2:22]))
neuro_dt$AvgNumOfNeuro <- as.numeric(neuro_dt$Total)/neuro_dt$NumOfYears
head(neuro_dt)
## State X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 1 Alabama NA NA 2 34 25 15 6 8 17 11 NA
## 2 Alaska NA NA NA NA NA NA NA NA NA NA NA
## 3 Arizona NA NA NA NA 7 215 52 68 50 62 12
## 4 Arkansas NA NA NA 32 23 17 13 24 13 7 6
## 5 California NA NA NA 1 2 291 305 81 154 292 67
## 6 Colorado NA NA NA 6 621 41 21 66 99 17 36
## X2010 X2011 X2012 X2013 X2014 X2015 X2016 X2017 X2018 X2019 Total NumOfYears
## 1 1 5 38 3 NA 5 13 40 16 4 243 17
## 2 NA NA NA NA NA NA NA NA 1 NA 1 1
## 3 107 49 87 50 80 67 57 98 25 132 1218 17
## 4 6 1 44 16 9 16 8 15 6 7 263 18
## 5 72 110 297 237 561 585 335 401 154 147 4092 18
## 6 26 2 62 90 46 57 59 29 52 52 1382 18
## AvgNumOfNeuro
## 1 14.29412
## 2 1.00000
## 3 71.64706
## 4 14.61111
## 5 227.33333
## 6 76.77778
head(nile_dt)
## State X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 1 Alabama NA NA 2 49 37 16 10 8 24 18 NA
## 2 Alaska NA NA NA NA NA NA NA NA NA NA NA
## 3 Arizona NA NA NA NA 13 391 113 150 97 114 20
## 4 Arkansas NA NA NA 43 25 28 28 29 20 9 6
## 5 California NA NA NA 1 3 779 880 278 380 445 112
## 6 Colorado NA NA NA 14 2947 291 106 345 576 71 103
## X2010 X2011 X2012 X2013 X2014 X2015 X2016 X2017 X2018 X2019 Total NumOfYears
## 1 3 5 62 9 2 9 19 60 28 5 366 18
## 2 NA NA NA NA NA NA NA NA 1 1 2 2
## 3 167 69 133 62 107 103 78 111 26 174 1928 17
## 4 7 1 64 18 11 18 9 18 8 9 351 18
## 5 111 158 479 379 801 783 442 553 217 225 7026 18
## 6 81 7 131 322 118 101 149 68 96 122 5648 18
## AvgNumOfNile
## 1 20.33333
## 2 1.00000
## 3 113.41176
## 4 19.50000
## 5 390.33333
## 6 313.77778
Transform the Year columns with pivot long
nile_long <- nile_dt %>%
pivot_longer(!c("State","Total","NumOfYears","AvgNumOfNile"),names_to="DiseaseYear",values_to="DiseaseCount")
nile_long$DiseaseYear <- as.numeric(unlist(str_match_all(nile_long$DiseaseYear,"\\d+..")))
neuro_long <- neuro_dt %>%
pivot_longer(!c("State","Total","NumOfYears","AvgNumOfNeuro"),names_to="NeuroYear",values_to="NeuroCount")
neuro_long$NeuroYear <- as.numeric(unlist(str_match_all(neuro_long$NeuroYear,"\\d+..")))
Percent of Disease and positive cases that developed into neuroinvasive disease per year
#Percent of Disease in each state per year
nile_yr <- nile_long %>%group_by(DiseaseYear) %>%
summarise(YearTotal = sum(DiseaseCount,na.rm=TRUE))
nile_perc<-mutate(nile_yr,Total = sum(YearTotal,na.rm=TRUE),
PercCases = round((YearTotal/Total)*100,3))%>%
arrange(desc(PercCases))
#Percent of positive cases that developed into neuroinvasive disease per year
neuro_yr <- neuro_long %>%group_by(NeuroYear) %>%
summarise(YearTotal = sum(NeuroCount,na.rm=TRUE))
neuro_perc<-mutate(neuro_yr,Total = sum(YearTotal,na.rm=TRUE),
PercCases = round((YearTotal/Total)*100,3))%>%
arrange(desc(PercCases))
Percent of Disease in each state per year
nile_perc
## # A tibble: 21 x 4
## DiseaseYear YearTotal Total PercCases
## <dbl> <int> <int> <dbl>
## 1 2003 9862 51801 19.0
## 2 2012 5674 51801 11.0
## 3 2006 4269 51801 8.24
## 4 2002 4156 51801 8.02
## 5 2007 3630 51801 7.01
## 6 2005 3000 51801 5.79
## 7 2018 2647 51801 5.11
## 8 2004 2539 51801 4.90
## 9 2013 2469 51801 4.77
## 10 2014 2205 51801 4.26
## # ... with 11 more rows
Percent of positive cases that developed into neuroinvasive disease per year
neuro_perc
## # A tibble: 21 x 4
## NeuroYear YearTotal Total PercCases
## <dbl> <int> <int> <dbl>
## 1 2002 2946 25290 11.6
## 2 2012 2873 25290 11.4
## 3 2003 2866 25290 11.3
## 4 2018 1658 25290 6.56
## 5 2006 1495 25290 5.91
## 6 2015 1455 25290 5.75
## 7 2017 1425 25290 5.64
## 8 2014 1347 25290 5.33
## 9 2005 1309 25290 5.18
## 10 2016 1309 25290 5.18
## # ... with 11 more rows
Disease Percent per year
nile_perc%>% ggplot(aes(reorder(DiseaseYear,PercCases),PercCases))+
geom_col(fill="#D77E1A")+geom_text(aes(label=PercCases),color="blue")+
coord_flip()+
labs(x="Year",y="Percent of west Nile Disease (%)", title="Majority of the westnile disease infections happened 2003 followed by 2012.\n The year 2000, 1999 and 2001 had the least share of the infection")+theme_bw()
Neuroinvasive cases Percent per year
neuro_perc%>% ggplot(aes(reorder(NeuroYear,PercCases),PercCases))+
geom_col(fill="#B92CA4")+geom_text(aes(label=PercCases),color="blue")+
coord_flip()+
labs(x="Year",y="Percent of Positive cases (%)", title="Majority of the Neuroinvasive westnile virus infections happened year 2002, \nfollowed by 2012 and 2003 with the approximately 11.6%, 11.36% and\n 11.33% respectively. The year 2000, 1999 and 2001 had the least share of the\n infection")+theme_bw()
Percent of Disease and positive cases that developed into neuroinvasive disease per state
#Percent of Disease in each state per state
nile_st <- nile_long %>%group_by(State) %>%
summarise(StateTotal = sum(DiseaseCount,na.rm=TRUE))
nile_perc_st<-mutate(nile_st,Total = sum(StateTotal,na.rm=TRUE),
PercCases = round((StateTotal/Total)*100,3))%>%
arrange(desc(PercCases))
#Percent of positive cases that developed into neuroinvasive disease per state
neuro_st <- neuro_long %>%group_by(State) %>%
summarise(StateTotal = sum(NeuroCount,na.rm=TRUE))
neuro_perc_st<-mutate(neuro_st,Total = sum(StateTotal,na.rm=TRUE),
PercCases = round((StateTotal/Total)*100,3))%>%
arrange(desc(PercCases))
Percent of Disease in each state per state
nile_perc_st
## # A tibble: 52 x 4
## State StateTotal Total PercCases
## <chr> <int> <int> <dbl>
## 1 California 7026 51801 13.6
## 2 Colorado 5648 51801 10.9
## 3 Texas 5590 51801 10.8
## 4 Nebraska 4000 51801 7.72
## 5 Illinois 2662 51801 5.14
## 6 South Dakota 2613 51801 5.04
## 7 Arizona 1928 51801 3.72
## 8 North Dakota 1917 51801 3.70
## 9 Louisiana 1841 51801 3.55
## 10 Mississippi 1441 51801 2.78
## # ... with 42 more rows
Percent of positive cases that developed into neuroinvasive disease per state
neuro_perc_st
## # A tibble: 52 x 4
## State StateTotal Total PercCases
## <chr> <int> <int> <dbl>
## 1 California 4092 25290 16.2
## 2 Texas 3390 25290 13.4
## 3 Illinois 1701 25290 6.73
## 4 Colorado 1382 25290 5.46
## 5 Arizona 1218 25290 4.82
## 6 Louisiana 1114 25290 4.40
## 7 Michigan 1113 25290 4.40
## 8 Nebraska 799 25290 3.16
## 9 Mississippi 789 25290 3.12
## 10 Ohio 741 25290 2.93
## # ... with 42 more rows
Disease Percent per State
nile_perc_st%>% ggplot(aes(reorder(State,PercCases),PercCases))+
geom_col(fill="#8CD71A")+geom_text(aes(label=PercCases),color="blue")+
coord_flip()+
labs(x="State",y="Percent of westnile disease (%)", title="Califonia and colorado top the list of states with highest infections of\n 13.6% and 10.9% respectively while Puerto Rico and Hawaii had the\n least infections of 0.002% approximately")+theme_bw()
Neuroinvasive cases Percent per State
neuro_perc_st%>% ggplot(aes(reorder(State,PercCases),PercCases))+
geom_col(fill="#8A8F80")+geom_text(aes(label=PercCases),color="blue")+
coord_flip()+
labs(x="State",y="Percent of Positive cases (%)", title="Califonia and Texas top the list of states with highest Neuroinvasive\n westnile cases of 16.18% and 13.4% respectively while Puerto Rico,\n Aaska and Hawaii had the least infections of 0.004%, 0.004% and 0.0%\n approximately")+theme_bw()