covid_url<- "https://raw.githubusercontent.com/jayleecunysps/AssignmentforSPS/main/United_States_COVID-19_Cases_and_Deaths_by_State_over_Time.csv"
covid_url <-read.csv(covid_url)
rawcovid <- covid_url
covid_url[is.na(covid_url)] = 0
covid_url$submission_date <- as.Date(covid_url$submission_date, "%m/%d/%Y")
covid_url$submission_Year<-year(covid_url$submission_date)
covid_url$submission_Year <-as.character(covid_url$submission_Year)
covid_url$tot_death <-as.numeric(covid_url$tot_death)
covid_url$tot_cases <-as.numeric(covid_url$tot_cases)
statedeath <- aggregate(tot_death ~ state, covid_url, sum)
statecase <- aggregate(tot_cases ~ state, covid_url, sum)
covid_url2 <- covid_url %>%
select("submission_Year","state","tot_cases","tot_death")
newdata2020 <- subset(covid_url2, submission_Year == "2020",
select=c(submission_Year, state, tot_cases,tot_death))
statedeath2020 <- aggregate(tot_death ~ state, newdata2020, sum)
statecase2020 <- aggregate(tot_cases ~ state, newdata2020, sum)
newdata2021 <- subset(covid_url2, submission_Year == "2021",
select=c(submission_Year, state, tot_cases,tot_death))
statedeath2021 <- aggregate(tot_death ~ state, newdata2021, sum)
statecase2021 <- aggregate(tot_cases ~ state, newdata2021, sum)
Find the state with the highest and lowest deaths. Although NYC is not a state, it is still representative since New York City has more people than 40 U.S. states. New York City comprises over two-fifths of New York State’s entire population.
We can see NYC and NJ has the highest death rate, and both NYC and NJ has the highest population density. I think this is the major reason of high death rate. NYC/NYS and NJ may need to reconsider the budget of healthcare in upcoming years.
The lowest death rate is UT excluding the small islands.
https://worldpopulationreview.com/state-rankings/state-densities
Compare the death rate both before and after the vaccine was released.
2022 was excluded since the death rate of Omicron is low as well as case is dropping in 2022.
I compare 2020 and 2021 because vaccine released in late 2020 and early 2021. it takes time to let people get vaccinated so 2020 vs 2021 is fair enough to show the result.
The mean of the death rate drop from 2.5 to 1.5 after vaccine released in late 2020 and early 2021.At the same time, median drop from 2.048 to 1.537
some additional math:
NYC death rate drop 62% while vaccination percentage is 77.2% FL death rate drop 25% while vaccination percentage is 66.2%
#ans 1
total <- merge(statedeath,statecase,by="state")
total$deathrate <- total$tot_death/total$tot_cases*100
total <- total[order(total$deathrate, decreasing = TRUE), ]
head(total,10)
## state tot_death tot_cases deathrate
## 40 NYC 20499031 561724705 3.649302
## 36 NJ 15396408 572248244 2.690512
## 23 MA 10238244 407472441 2.512622
## 8 CT 4787009 190925550 2.507265
## 44 PA 14816813 683591905 2.167494
## 30 MS 4433196 208495430 2.126280
## 22 LA 6496628 315633987 2.058279
## 9 DC 638435 31393330 2.033664
## 24 MD 5383676 266423255 2.020723
## 26 MI 11681656 585973614 1.993546
tail(total,10)
## state tot_death tot_cases deathrate
## 34 NE 1268761 129967236 0.9762160
## 55 VI 27131 2907817 0.9330367
## 56 VT 151036 16296859 0.9267798
## 1 AK 262671 47773253 0.5498286
## 53 UT 1310876 241175659 0.5435358
## 29 MP 3123 592583 0.5270148
## 46 PW 150 141591 0.1059389
## 4 AS 0 5225 0.0000000
## 12 FSM 0 1898 0.0000000
## 48 RMI 0 2066 0.0000000
#ans 2
total2020 <- merge(statedeath2020,statecase2020,by="state")
total2021 <- merge(statedeath2021,statecase2021,by="state")
total2020$deathrate <- total2020$tot_death/total2020$tot_cases*100
total2021$deathrate <- total2021$tot_death/total2021$tot_cases*100
total2020$deathrate <- str_remove_all(total2020$deathrate,"Inf")
total2020$deathrate <-as.numeric(total2020$deathrate)
total2020[is.na(total2020)] = 0
total2021[is.na(total2021)] = 0
total2020
## state tot_death tot_cases deathrate
## 1 AK 20473 2717469 0.7533849
## 2 AL 790736 32227193 2.4536298
## 3 AR 285708 18405550 1.5522927
## 4 AS 0 159 0.0000000
## 5 AZ 1047749 47085893 2.2251866
## 6 CA 3015768 171639237 1.7570388
## 7 CO 539308 24249202 2.2240237
## 8 CT 1119855 17213832 6.5055532
## 9 DC 146404 3644833 4.0167547
## 10 DE 146126 5272333 2.7715624
## 11 FL 2960021 136182541 2.1735686
## 12 FSM 0 0 0.0000000
## 13 GA 1394107 65939590 2.1142185
## 14 GU 10133 615585 1.6460765
## 15 HI 27310 2113991 1.2918693
## 16 IA 334206 23168716 1.4424882
## 17 ID 108598 10555689 1.0288102
## 18 IL 2206778 81900195 2.6944722
## 19 IN 915492 35489288 2.5796291
## 20 KS 186330 15658968 1.1899252
## 21 KY 387667 19250542 2.0137978
## 22 LA 1184366 35105987 3.3736867
## 23 MA 2265434 36800376 6.1560078
## 24 MD 949159 29041682 3.2682646
## 25 ME 35909 1649297 2.1772307
## 26 MI 1923045 61428107 3.1305620
## 27 MN 532365 29929745 1.7787155
## 28 MO 623276 33830563 1.8423459
## 29 MP 541 15419 3.5086581
## 30 MS 687046 21566880 3.1856532
## 31 MT 58808 5010619 1.1736674
## 32 NC 802694 46550483 1.7243516
## 33 ND 87243 6765011 1.2896210
## 34 NE 130184 13131749 0.9913683
## 35 NH 102974 2724523 3.7795240
## 36 NJ 3817302 59506997 6.4148792
## 37 NM 207114 9585260 2.1607552
## 38 NV 323089 18263502 1.7690419
## 39 NY 2301126 60254386 3.8190183
## 40 NYC 6028173 66532310 9.0605196
## 41 OH 1062208 44872380 2.3671755
## 42 OK 345324 22962642 1.5038513
## 43 OR 127845 8340435 1.5328337
## 44 PA 1983222 47572446 4.1688460
## 45 PR 133473 5671330 2.3534691
## 46 PW 0 0 0.0000000
## 47 RI 262146 7015594 3.7366187
## 48 RMI 0 207 0.0000000
## 49 SC 614010 29712384 2.0665121
## 50 SD 86844 7219654 1.2028831
## 51 TN 538831 43830162 1.2293612
## 52 TX 3281869 157034957 2.0898971
## 53 UT 111526 20060636 0.5559445
## 54 VA 673734 33192580 2.0297729
## 55 VI 3509 217062 1.6165888
## 56 VT 16595 523941 3.1673414
## 57 WA 487233 21289735 2.2885818
## 58 WI 410066 37967887 1.0800338
## 59 WV 84123 4681178 1.7970477
## 60 WY 20735 2591294 0.8001794
total2021
## state tot_death tot_cases deathrate
## 1 AK 169476 30977973 0.5470855
## 2 AL 4582061 227596967 2.0132346
## 3 AR 2367676 144793479 1.6352090
## 4 AS 0 871 0.0000000
## 5 AZ 6589996 350452837 1.8804231
## 6 CA 22226510 1472475446 1.5094656
## 7 CO 2561972 213838444 1.1980877
## 8 CT 2976674 126931906 2.3450952
## 9 DC 403386 18974952 2.1258868
## 10 DE 626854 41717301 1.5026236
## 11 FL 15894059 974996694 1.6301654
## 12 FSM 0 1415 0.0000000
## 13 GA 8059841 457894070 1.7601977
## 14 GU 61179 4058437 1.5074523
## 15 HI 221997 18060015 1.2292182
## 16 IA 2238773 146421723 1.5289897
## 17 ID 902069 79242692 1.1383624
## 18 IL 9307643 526709410 1.7671306
## 19 IN 5161081 298637568 1.7282089
## 20 KS 1941543 128162938 1.5149021
## 21 KY 3163319 196796292 1.6074078
## 22 LA 4217664 205929048 2.0481151
## 23 MA 6443316 263687704 2.4435406
## 24 MD 3509225 171849572 2.0420330
## 25 ME 329793 27366796 1.2050844
## 26 MI 7514300 373501215 2.0118542
## 27 MN 2821300 233228156 1.2096738
## 28 MO 3762293 250277309 1.5032497
## 29 MP 942 135958 0.6928610
## 30 MS 2968984 137482900 2.1595297
## 31 MT 652286 46807290 1.3935564
## 32 NC 5292601 404340310 1.3089472
## 33 ND 575847 43760496 1.3159060
## 34 NE 881050 87306429 1.0091468
## 35 NH 499852 38617785 1.2943570
## 36 NJ 9407178 370909011 2.5362495
## 37 NM 1565267 82114798 1.9061936
## 38 NV 2202484 131314441 1.6772595
## 39 NY 7172948 431719701 1.6614827
## 40 NYC 11828547 345747620 3.4211507
## 41 OH 7330858 442495546 1.6567077
## 42 OK 2954628 186913216 1.5807486
## 43 OR 1147500 88800645 1.2922203
## 44 PA 10017116 457469178 2.1896811
## 45 PR 938651 53292640 1.7613145
## 46 PW 0 1272 0.0000000
## 47 RI 973993 53646552 1.8155743
## 48 RMI 0 1460 0.0000000
## 49 SC 3804873 244000977 1.5593679
## 50 SD 743898 47906702 1.5528057
## 51 TN 4829336 354564281 1.3620481
## 52 TX 20007291 1191617013 1.6790035
## 53 UT 911564 162672795 0.5603666
## 54 VA 4089312 264698353 1.5448951
## 55 VI 16684 1699734 0.9815654
## 56 VT 98731 9415192 1.0486350
## 57 WA 2341020 184524785 1.2686751
## 58 WI 2973152 267338308 1.1121309
## 59 WV 1154412 68568419 1.6835914
## 60 WY 316182 26502914 1.1930084
NYCdeathchange <- (3.4211507-9.0605196)/9.0605196*100
FLdeathchange <-(1.6301654-2.1735686)/2.1735686*100
NYCdeathchange
## [1] -62.24112
FLdeathchange
## [1] -25.00051
summary(total2020)
## state tot_death tot_cases deathrate
## Length:60 Min. : 0 Min. : 0 Min. :0.000
## Class :character 1st Qu.: 87143 1st Qu.: 4928259 1st Qu.:1.291
## Mode :character Median : 339765 Median : 19655589 Median :2.048
## Mean : 799098 Mean : 29096337 Mean :2.310
## 3rd Qu.: 973806 3rd Qu.: 37092254 3rd Qu.:2.861
## Max. :6028173 Max. :171639237 Max. :9.061
summary(total2021)
## state tot_death tot_cases deathrate
## Length:60 Min. : 0 Min. :8.710e+02 Min. :0.000
## Class :character 1st Qu.: 614102 1st Qu.:4.325e+07 1st Qu.:1.209
## Mode :character Median : 2354348 Median :1.456e+08 Median :1.537
## Mean : 3762520 Mean :2.202e+08 Mean :1.481
## 3rd Qu.: 4912272 3rd Qu.:2.752e+08 3rd Qu.:1.763
## Max. :22226510 Max. :1.472e+09 Max. :3.421