Upload the file to Github and read the file from Github

covid_url<- "https://raw.githubusercontent.com/jayleecunysps/AssignmentforSPS/main/United_States_COVID-19_Cases_and_Deaths_by_State_over_Time.csv"
covid_url <-read.csv(covid_url)
rawcovid <- covid_url

Clean, select and join the data

covid_url[is.na(covid_url)] = 0
covid_url$submission_date <- as.Date(covid_url$submission_date, "%m/%d/%Y")
covid_url$submission_Year<-year(covid_url$submission_date)
covid_url$submission_Year <-as.character(covid_url$submission_Year)
covid_url$tot_death <-as.numeric(covid_url$tot_death)
covid_url$tot_cases <-as.numeric(covid_url$tot_cases)
statedeath <- aggregate(tot_death ~ state, covid_url, sum)
statecase  <- aggregate(tot_cases ~ state, covid_url, sum)

covid_url2 <- covid_url %>%  
select("submission_Year","state","tot_cases","tot_death")

newdata2020 <- subset(covid_url2, submission_Year == "2020",
select=c(submission_Year, state, tot_cases,tot_death))

statedeath2020 <- aggregate(tot_death ~ state, newdata2020, sum)
statecase2020  <- aggregate(tot_cases ~ state, newdata2020, sum)

newdata2021 <- subset(covid_url2, submission_Year == "2021",
select=c(submission_Year, state, tot_cases,tot_death))

statedeath2021 <- aggregate(tot_death ~ state, newdata2021, sum)
statecase2021  <- aggregate(tot_cases ~ state, newdata2021, sum)

Answer of the Moiya Josephs’s suggested analysis

Find the state with the highest and lowest deaths. Although NYC is not a state, it is still representative since New York City has more people than 40 U.S. states. New York City comprises over two-fifths of New York State’s entire population.

We can see NYC and NJ has the highest death rate, and both NYC and NJ has the highest population density. I think this is the major reason of high death rate. NYC/NYS and NJ may need to reconsider the budget of healthcare in upcoming years.

The lowest death rate is UT excluding the small islands.

https://worldpopulationreview.com/state-rankings/state-densities

Compare the death rate both before and after the vaccine was released.

2022 was excluded since the death rate of Omicron is low as well as case is dropping in 2022.

https://centerforneurologyandspine.com/what-is-the-death-rate-of-omicron-its-way-less-than-you-think/

I compare 2020 and 2021 because vaccine released in late 2020 and early 2021. it takes time to let people get vaccinated so 2020 vs 2021 is fair enough to show the result.

The mean of the death rate drop from 2.5 to 1.5 after vaccine released in late 2020 and early 2021.At the same time, median drop from 2.048 to 1.537

some additional math:

NYC death rate drop 62% while vaccination percentage is 77.2% FL death rate drop 25% while vaccination percentage is 66.2%

#ans 1
total <- merge(statedeath,statecase,by="state")  
total$deathrate <- total$tot_death/total$tot_cases*100
total <- total[order(total$deathrate, decreasing = TRUE), ]

head(total,10)
##    state tot_death tot_cases deathrate
## 40   NYC  20499031 561724705  3.649302
## 36    NJ  15396408 572248244  2.690512
## 23    MA  10238244 407472441  2.512622
## 8     CT   4787009 190925550  2.507265
## 44    PA  14816813 683591905  2.167494
## 30    MS   4433196 208495430  2.126280
## 22    LA   6496628 315633987  2.058279
## 9     DC    638435  31393330  2.033664
## 24    MD   5383676 266423255  2.020723
## 26    MI  11681656 585973614  1.993546
tail(total,10)
##    state tot_death tot_cases deathrate
## 34    NE   1268761 129967236 0.9762160
## 55    VI     27131   2907817 0.9330367
## 56    VT    151036  16296859 0.9267798
## 1     AK    262671  47773253 0.5498286
## 53    UT   1310876 241175659 0.5435358
## 29    MP      3123    592583 0.5270148
## 46    PW       150    141591 0.1059389
## 4     AS         0      5225 0.0000000
## 12   FSM         0      1898 0.0000000
## 48   RMI         0      2066 0.0000000
#ans 2

total2020 <- merge(statedeath2020,statecase2020,by="state") 
total2021 <- merge(statedeath2021,statecase2021,by="state") 

total2020$deathrate <- total2020$tot_death/total2020$tot_cases*100
total2021$deathrate <- total2021$tot_death/total2021$tot_cases*100

total2020$deathrate <- str_remove_all(total2020$deathrate,"Inf")
total2020$deathrate <-as.numeric(total2020$deathrate)

total2020[is.na(total2020)] = 0
total2021[is.na(total2021)] = 0

total2020
##    state tot_death tot_cases deathrate
## 1     AK     20473   2717469 0.7533849
## 2     AL    790736  32227193 2.4536298
## 3     AR    285708  18405550 1.5522927
## 4     AS         0       159 0.0000000
## 5     AZ   1047749  47085893 2.2251866
## 6     CA   3015768 171639237 1.7570388
## 7     CO    539308  24249202 2.2240237
## 8     CT   1119855  17213832 6.5055532
## 9     DC    146404   3644833 4.0167547
## 10    DE    146126   5272333 2.7715624
## 11    FL   2960021 136182541 2.1735686
## 12   FSM         0         0 0.0000000
## 13    GA   1394107  65939590 2.1142185
## 14    GU     10133    615585 1.6460765
## 15    HI     27310   2113991 1.2918693
## 16    IA    334206  23168716 1.4424882
## 17    ID    108598  10555689 1.0288102
## 18    IL   2206778  81900195 2.6944722
## 19    IN    915492  35489288 2.5796291
## 20    KS    186330  15658968 1.1899252
## 21    KY    387667  19250542 2.0137978
## 22    LA   1184366  35105987 3.3736867
## 23    MA   2265434  36800376 6.1560078
## 24    MD    949159  29041682 3.2682646
## 25    ME     35909   1649297 2.1772307
## 26    MI   1923045  61428107 3.1305620
## 27    MN    532365  29929745 1.7787155
## 28    MO    623276  33830563 1.8423459
## 29    MP       541     15419 3.5086581
## 30    MS    687046  21566880 3.1856532
## 31    MT     58808   5010619 1.1736674
## 32    NC    802694  46550483 1.7243516
## 33    ND     87243   6765011 1.2896210
## 34    NE    130184  13131749 0.9913683
## 35    NH    102974   2724523 3.7795240
## 36    NJ   3817302  59506997 6.4148792
## 37    NM    207114   9585260 2.1607552
## 38    NV    323089  18263502 1.7690419
## 39    NY   2301126  60254386 3.8190183
## 40   NYC   6028173  66532310 9.0605196
## 41    OH   1062208  44872380 2.3671755
## 42    OK    345324  22962642 1.5038513
## 43    OR    127845   8340435 1.5328337
## 44    PA   1983222  47572446 4.1688460
## 45    PR    133473   5671330 2.3534691
## 46    PW         0         0 0.0000000
## 47    RI    262146   7015594 3.7366187
## 48   RMI         0       207 0.0000000
## 49    SC    614010  29712384 2.0665121
## 50    SD     86844   7219654 1.2028831
## 51    TN    538831  43830162 1.2293612
## 52    TX   3281869 157034957 2.0898971
## 53    UT    111526  20060636 0.5559445
## 54    VA    673734  33192580 2.0297729
## 55    VI      3509    217062 1.6165888
## 56    VT     16595    523941 3.1673414
## 57    WA    487233  21289735 2.2885818
## 58    WI    410066  37967887 1.0800338
## 59    WV     84123   4681178 1.7970477
## 60    WY     20735   2591294 0.8001794
total2021
##    state tot_death  tot_cases deathrate
## 1     AK    169476   30977973 0.5470855
## 2     AL   4582061  227596967 2.0132346
## 3     AR   2367676  144793479 1.6352090
## 4     AS         0        871 0.0000000
## 5     AZ   6589996  350452837 1.8804231
## 6     CA  22226510 1472475446 1.5094656
## 7     CO   2561972  213838444 1.1980877
## 8     CT   2976674  126931906 2.3450952
## 9     DC    403386   18974952 2.1258868
## 10    DE    626854   41717301 1.5026236
## 11    FL  15894059  974996694 1.6301654
## 12   FSM         0       1415 0.0000000
## 13    GA   8059841  457894070 1.7601977
## 14    GU     61179    4058437 1.5074523
## 15    HI    221997   18060015 1.2292182
## 16    IA   2238773  146421723 1.5289897
## 17    ID    902069   79242692 1.1383624
## 18    IL   9307643  526709410 1.7671306
## 19    IN   5161081  298637568 1.7282089
## 20    KS   1941543  128162938 1.5149021
## 21    KY   3163319  196796292 1.6074078
## 22    LA   4217664  205929048 2.0481151
## 23    MA   6443316  263687704 2.4435406
## 24    MD   3509225  171849572 2.0420330
## 25    ME    329793   27366796 1.2050844
## 26    MI   7514300  373501215 2.0118542
## 27    MN   2821300  233228156 1.2096738
## 28    MO   3762293  250277309 1.5032497
## 29    MP       942     135958 0.6928610
## 30    MS   2968984  137482900 2.1595297
## 31    MT    652286   46807290 1.3935564
## 32    NC   5292601  404340310 1.3089472
## 33    ND    575847   43760496 1.3159060
## 34    NE    881050   87306429 1.0091468
## 35    NH    499852   38617785 1.2943570
## 36    NJ   9407178  370909011 2.5362495
## 37    NM   1565267   82114798 1.9061936
## 38    NV   2202484  131314441 1.6772595
## 39    NY   7172948  431719701 1.6614827
## 40   NYC  11828547  345747620 3.4211507
## 41    OH   7330858  442495546 1.6567077
## 42    OK   2954628  186913216 1.5807486
## 43    OR   1147500   88800645 1.2922203
## 44    PA  10017116  457469178 2.1896811
## 45    PR    938651   53292640 1.7613145
## 46    PW         0       1272 0.0000000
## 47    RI    973993   53646552 1.8155743
## 48   RMI         0       1460 0.0000000
## 49    SC   3804873  244000977 1.5593679
## 50    SD    743898   47906702 1.5528057
## 51    TN   4829336  354564281 1.3620481
## 52    TX  20007291 1191617013 1.6790035
## 53    UT    911564  162672795 0.5603666
## 54    VA   4089312  264698353 1.5448951
## 55    VI     16684    1699734 0.9815654
## 56    VT     98731    9415192 1.0486350
## 57    WA   2341020  184524785 1.2686751
## 58    WI   2973152  267338308 1.1121309
## 59    WV   1154412   68568419 1.6835914
## 60    WY    316182   26502914 1.1930084
NYCdeathchange <- (3.4211507-9.0605196)/9.0605196*100
FLdeathchange <-(1.6301654-2.1735686)/2.1735686*100

NYCdeathchange
## [1] -62.24112
FLdeathchange
## [1] -25.00051
summary(total2020)
##     state             tot_death         tot_cases           deathrate    
##  Length:60          Min.   :      0   Min.   :        0   Min.   :0.000  
##  Class :character   1st Qu.:  87143   1st Qu.:  4928259   1st Qu.:1.291  
##  Mode  :character   Median : 339765   Median : 19655589   Median :2.048  
##                     Mean   : 799098   Mean   : 29096337   Mean   :2.310  
##                     3rd Qu.: 973806   3rd Qu.: 37092254   3rd Qu.:2.861  
##                     Max.   :6028173   Max.   :171639237   Max.   :9.061
summary(total2021)
##     state             tot_death          tot_cases           deathrate    
##  Length:60          Min.   :       0   Min.   :8.710e+02   Min.   :0.000  
##  Class :character   1st Qu.:  614102   1st Qu.:4.325e+07   1st Qu.:1.209  
##  Mode  :character   Median : 2354348   Median :1.456e+08   Median :1.537  
##                     Mean   : 3762520   Mean   :2.202e+08   Mean   :1.481  
##                     3rd Qu.: 4912272   3rd Qu.:2.752e+08   3rd Qu.:1.763  
##                     Max.   :22226510   Max.   :1.472e+09   Max.   :3.421