This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

plot(cars)

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

#Resources

#Plans convert to date type make data skinny. mutate import county pop get per capita infection and death rates. get population density per sq mile. merge demographics : age, do all counties have same male/female ratio? income

#PACKAGES
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ──────────────────────────── tidyverse 1.3.0 ──
✓ ggplot2 3.3.0     ✓ dplyr   0.8.5
✓ tibble  3.0.0     ✓ stringr 1.4.0
✓ tidyr   1.0.2     ✓ forcats 0.5.0
✓ purrr   0.3.3     
── Conflicts ─────────────────────────────── tidyverse_conflicts() ──
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(lubridate)

Attaching package: ‘lubridate’

The following object is masked from ‘package:base’:

    date
getwd()
[1] "/Users/joefoley/Covid/Covid"
NYS_Covid_Data <- read_csv("NYS_Statewide_COVID-19_Testing.csv")
Parsed with column specification:
cols(
  Test_Date = col_character(),
  County = col_character(),
  New_Positives = col_double(),
  Cumulative_Number_of_Positives = col_double(),
  Total_Number_of_Tests_Performed = col_double(),
  Cumulative_Number_of_Tests_Performed = col_double()
)
head(NYS_Covid_Data,3)
View(NYS_Covid_Data)
dim(NYS_Covid_Data)
[1] 2232    6
str(NYS_Covid_Data)
tibble [2,232 × 6] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ Test_Date                           : chr [1:2232] "04/06/2020" "04/06/2020" "04/06/2020" "04/06/2020" ...
 $ County                              : chr [1:2232] "Albany" "Allegany" "Bronx" "Broome" ...
 $ New_Positives                       : num [1:2232] 14 0 1021 10 1 ...
 $ Cumulative_Number_of_Positives      : num [1:2232] 333 17 15348 86 13 ...
 $ Total_Number_of_Tests_Performed     : num [1:2232] 86 3 1996 47 14 ...
 $ Cumulative_Number_of_Tests_Performed: num [1:2232] 4330 199 29367 654 265 ...
 - attr(*, "spec")=
  .. cols(
  ..   Test_Date = col_character(),
  ..   County = col_character(),
  ..   New_Positives = col_double(),
  ..   Cumulative_Number_of_Positives = col_double(),
  ..   Total_Number_of_Tests_Performed = col_double(),
  ..   Cumulative_Number_of_Tests_Performed = col_double()
  .. )
#convert Test Date from character to date type
NYS_Covid_Data$Test_Date <- as.Date(NYS_Covid_Data$Test_Date , format= "%m/%d/%Y")
head(NYS_Covid_Data)

Add population data from the Census

county_pop <- read_csv("NY_counties.csv")
Parsed with column specification:
cols(
  .default = col_double(),
  SUMLEV = col_character(),
  COUNTY = col_character(),
  STNAME = col_character(),
  CTYNAME = col_character()
)
See spec(...) for full column specifications.
dim(county_pop)
[1] 12958    80

Since there is only a single row we are intersted in let us filter this down. Each county has many rows, age groups , but AGEGRP = 0 is the total population for that county

pop <- county_pop%>%
          filter(AGEGRP==0) %>%
              select(c(CTYNAME, TOT_POP, AGEGRP))
head(pop)
dim(pop)
[1] 682   3
head(pop,10)
pop <- pop%>%
          group_by(CTYNAME)%>%
            top_n(n=1 ,wt=TOT_POP)
head(pop)

Make col names uniform new col anme = exisitng col name

pop <- pop %>% 
          rename(County = CTYNAME)

Next remove the word COUNTY from the name of each county. use SEPARATE

pop<- pop%>%
          separate(County, c("County", NA))       #df %>% separate(x, c("a", NA))
Expected 2 pieces. Additional pieces discarded in 2 rows [31, 46].
head(pop)   
head(NYS_Covid_Data)

Next join the two datasets on the county name.

pop_and_count <- full_join(NYS_Covid_Data,pop, by ="County")
View(pop_and_count)
write_csv(pop_and_count,"Pop_And_Count.csv")

######################3

Lets add the days of the week to our Df

pop_and_count<- pop_and_count %>%
mutate(day_week = wday(Test_Date,label = TRUE))

Now let us look at every Monday

pop_and_count<-filter(pop_and_count, day_week == "Mon")
View(pop_and_count)

########3

pop1<- pop1 %>%
mutate(day_week = wday(Test_Date,label = TRUE))
View(pop1)
pop2<-filter(pop1, day_week == "Mon")
View(pop2)

#Next we will import 3 census tables: #Race & Hispanics to get ethnicity and Income to our Covide data.

#RACE

Race<- read_csv("Race.csv")
Parsed with column specification:
cols(
  County = col_character(),
  Total = col_character(),
  White = col_character(),
  African_American = col_character(),
  `American_Indian_and_ Alaska_Native` = col_character(),
  Asian = col_character(),
  Hawaiian_Pacific_Islander = col_character(),
  Other_race = col_character()
)
View(Race)

###Hispanics

Hispanic_Age_NYS<- read_csv("Hispanic_Age_NYS.csv")
Duplicated column names deduplicated: 'Female_5_to_9' => 'Female_5_to_9_1' [21], 'Female_10_to_14' => 'Female_10_to_14_1' [23], 'Female_75_to_84' => 'Female_75_to_84_1' [34]Parsed with column specification:
cols(
  .default = col_double(),
  County = col_character()
)
See spec(...) for full column specifications.

#INCOME

NYS_Income <- read_csv("Income_NYS.csv")
Parsed with column specification:
cols(
  .default = col_double(),
  County = col_character(),
  Married_couple_families_Mean_income_dollars = col_character()
)
See spec(...) for full column specifications.
View(NYS_Income)

Next join all of the tables: Covid+Race+Hispanics+Income

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKVGhpcyBpcyBhbiBbUiBNYXJrZG93bl0oaHR0cDovL3JtYXJrZG93bi5yc3R1ZGlvLmNvbSkgTm90ZWJvb2suIFdoZW4geW91IGV4ZWN1dGUgY29kZSB3aXRoaW4gdGhlIG5vdGVib29rLCB0aGUgcmVzdWx0cyBhcHBlYXIgYmVuZWF0aCB0aGUgY29kZS4gCgpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ21kK1NoaWZ0K0VudGVyKi4gCgpgYGB7cn0KcGxvdChjYXJzKQpgYGAKCkFkZCBhIG5ldyBjaHVuayBieSBjbGlja2luZyB0aGUgKkluc2VydCBDaHVuayogYnV0dG9uIG9uIHRoZSB0b29sYmFyIG9yIGJ5IHByZXNzaW5nICpDbWQrT3B0aW9uK0kqLgoKV2hlbiB5b3Ugc2F2ZSB0aGUgbm90ZWJvb2ssIGFuIEhUTUwgZmlsZSBjb250YWluaW5nIHRoZSBjb2RlIGFuZCBvdXRwdXQgd2lsbCBiZSBzYXZlZCBhbG9uZ3NpZGUgaXQgKGNsaWNrIHRoZSAqUHJldmlldyogYnV0dG9uIG9yIHByZXNzICpDbWQrU2hpZnQrSyogdG8gcHJldmlldyB0aGUgSFRNTCBmaWxlKS4gCgpUaGUgcHJldmlldyBzaG93cyB5b3UgYSByZW5kZXJlZCBIVE1MIGNvcHkgb2YgdGhlIGNvbnRlbnRzIG9mIHRoZSBlZGl0b3IuIENvbnNlcXVlbnRseSwgdW5saWtlICpLbml0KiwgKlByZXZpZXcqIGRvZXMgbm90IHJ1biBhbnkgUiBjb2RlIGNodW5rcy4gSW5zdGVhZCwgdGhlIG91dHB1dCBvZiB0aGUgY2h1bmsgd2hlbiBpdCB3YXMgbGFzdCBydW4gaW4gdGhlIGVkaXRvciBpcyBkaXNwbGF5ZWQuCgojUmVzb3VyY2VzCgojUGxhbnMKY29udmVydCB0byBkYXRlIHR5cGUKbWFrZSBkYXRhIHNraW5ueS4KbXV0YXRlIGltcG9ydCBjb3VudHkgcG9wCmdldCBwZXIgY2FwaXRhIGluZmVjdGlvbiBhbmQgZGVhdGggcmF0ZXMuCmdldCBwb3B1bGF0aW9uIGRlbnNpdHkgcGVyIHNxIG1pbGUuCm1lcmdlIGRlbW9ncmFwaGljcyA6IGFnZSwgZG8gYWxsIGNvdW50aWVzIGhhdmUgc2FtZSBtYWxlL2ZlbWFsZSByYXRpbz8gIGluY29tZQoKYGBge3J9CiNQQUNLQUdFUwpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGx1YnJpZGF0ZSkKYGBgCgpgYGB7cn0KZ2V0d2QoKQpgYGAKCmBgYHtyfQpOWVNfQ292aWRfRGF0YSA8LSByZWFkX2NzdigiTllTX1N0YXRld2lkZV9DT1ZJRC0xOV9UZXN0aW5nLmNzdiIpCmBgYAoKYGBge3J9CmhlYWQoTllTX0NvdmlkX0RhdGEsMykKYGBgCgoKYGBge3J9ClZpZXcoTllTX0NvdmlkX0RhdGEpCmBgYAoKYGBge3J9CmRpbShOWVNfQ292aWRfRGF0YSkKYGBgCgoKYGBge3J9CnN0cihOWVNfQ292aWRfRGF0YSkKYGBgCgpgYGB7cn0KI2NvbnZlcnQgVGVzdCBEYXRlIGZyb20gY2hhcmFjdGVyIHRvIGRhdGUgdHlwZQpOWVNfQ292aWRfRGF0YSRUZXN0X0RhdGUgPC0gYXMuRGF0ZShOWVNfQ292aWRfRGF0YSRUZXN0X0RhdGUgLCBmb3JtYXQ9ICIlbS8lZC8lWSIpCgpgYGAKCmBgYHtyfQpoZWFkKE5ZU19Db3ZpZF9EYXRhKQpgYGAKCkFkZCBwb3B1bGF0aW9uIGRhdGEgZnJvbSB0aGUgQ2Vuc3VzCmBgYHtyfQpjb3VudHlfcG9wIDwtIHJlYWRfY3N2KCJOWV9jb3VudGllcy5jc3YiKQpgYGAKCgpgYGB7cn0KZGltKGNvdW50eV9wb3ApCmBgYAoKU2luY2UgdGhlcmUgaXMgb25seSBhIHNpbmdsZSByb3cgd2UgYXJlIGludGVyc3RlZCBpbiBsZXQgdXMgZmlsdGVyIHRoaXMgZG93bi4KRWFjaCBjb3VudHkgaGFzIG1hbnkgcm93cywgYWdlIGdyb3VwcyAsIGJ1dCAgQUdFR1JQID0gMCBpcwp0aGUgdG90YWwgcG9wdWxhdGlvbiBmb3IgdGhhdCBjb3VudHkKYGBge3J9CnBvcCA8LSBjb3VudHlfcG9wJT4lCiAgICAgICAgICBmaWx0ZXIoQUdFR1JQPT0wKSAlPiUKICAgICAgICAgICAgICBzZWxlY3QoYyhDVFlOQU1FLCBUT1RfUE9QLCBBR0VHUlApKQpgYGAKCmBgYHtyfQpoZWFkKHBvcCkKYGBgCmBgYHtyfQpkaW0ocG9wKQpgYGAKCmBgYHtyfQpoZWFkKHBvcCwxMCkKYGBgCgpgYGB7cn0KcG9wIDwtIHBvcCU+JQogICAgICAgICAgZ3JvdXBfYnkoQ1RZTkFNRSklPiUKICAgICAgICAgICAgdG9wX24obj0xICx3dD1UT1RfUE9QKQpgYGAKCgpgYGB7cn0KaGVhZChwb3ApCmBgYAoKTWFrZSBjb2wgbmFtZXMgdW5pZm9ybQpuZXcgY29sIGFubWUgPSBleGlzaXRuZyBjb2wgbmFtZQpgYGB7cn0KcG9wIDwtIHBvcCAlPiUgCiAgICAgICAgICByZW5hbWUoQ291bnR5ID0gQ1RZTkFNRSkKYGBgIAoKYGBge3J9CmhlYWQocG9wLDMpCmBgYAoKTmV4dCByZW1vdmUgdGhlIHdvcmQgQ09VTlRZIGZyb20gdGhlIG5hbWUgb2YgZWFjaCBjb3VudHkuCnVzZSBTRVBBUkFURQpgYGB7cn0KcG9wPC0gcG9wJT4lCiAgICAgICAgICBzZXBhcmF0ZShDb3VudHksIGMoIkNvdW50eSIsIE5BKSkgICAgICAgI2RmICU+JSBzZXBhcmF0ZSh4LCBjKCJhIiwgTkEpKQpgYGAKCgpgYGB7cn0KaGVhZChwb3ApICAgCmBgYAoKYGBge3J9CmhlYWQoTllTX0NvdmlkX0RhdGEpCmBgYAoKCk5leHQgam9pbiB0aGUgdHdvIGRhdGFzZXRzIG9uIHRoZSBjb3VudHkgbmFtZS4KYGBge3J9CnBvcF9hbmRfY291bnQgPC0gZnVsbF9qb2luKE5ZU19Db3ZpZF9EYXRhLHBvcCwgYnkgPSJDb3VudHkiKQpgYGAKCmBgYHtyfQpWaWV3KHBvcF9hbmRfY291bnQpCmBgYAoKYGBge3J9CndyaXRlX2Nzdihwb3BfYW5kX2NvdW50LCJQb3BfQW5kX0NvdW50LmNzdiIpCmBgYAoKCiMjIyMjIyMjIyMjIyMjIyMjIyMjIyMzCgpMZXRzIGFkZCB0aGUgZGF5cyBvZiB0aGUgd2VlayB0byBvdXIgRGYKYGBge3J9CnBvcF9hbmRfY291bnQ8LSBwb3BfYW5kX2NvdW50ICU+JQptdXRhdGUoZGF5X3dlZWsgPSB3ZGF5KFRlc3RfRGF0ZSxsYWJlbCA9IFRSVUUpKQpgYGAKCk5vdyBsZXQgdXMgbG9vayBhdCBldmVyeSBNb25kYXkKYGBge3J9CnBvcF9hbmRfY291bnQ8LWZpbHRlcihwb3BfYW5kX2NvdW50LCBkYXlfd2VlayA9PSAiTW9uIikKYGBgCgoKYGBge3J9ClZpZXcocG9wX2FuZF9jb3VudCkKYGBgCgojIyMjIyMjIzMKCgoKYGBge3J9CnBvcDE8LSBwb3AxICU+JQptdXRhdGUoZGF5X3dlZWsgPSB3ZGF5KFRlc3RfRGF0ZSxsYWJlbCA9IFRSVUUpKQpgYGAKCmBgYHtyfQpWaWV3KHBvcDEpCmBgYAoKYGBge3J9CnBvcDI8LWZpbHRlcihwb3AxLCBkYXlfd2VlayA9PSAiTW9uIikKYGBgCgpgYGB7cn0KVmlldyhwb3AyKQpgYGAKCiNOZXh0IHdlIHdpbGwgaW1wb3J0IDMgY2Vuc3VzIHRhYmxlczoKI1JhY2UgJiBIaXNwYW5pY3MgdG8gZ2V0IGV0aG5pY2l0eSBhbmQgSW5jb21lIHRvIG91ciBDb3ZpZGUgZGF0YS4KCiNSQUNFCmBgYHtyfQpSYWNlPC0gcmVhZF9jc3YoIlJhY2UuY3N2IikKYGBgCgpgYGB7cn0KVmlldyhSYWNlKQpgYGAKCiMjI0hpc3BhbmljcwpgYGB7cn0KSGlzcGFuaWNfQWdlX05ZUzwtIHJlYWRfY3N2KCJIaXNwYW5pY19BZ2VfTllTLmNzdiIpCmBgYAoKYGBge3J9ClZpZXcoSGlzcGFuaWNfQWdlX05ZUykKYGBgCgojSU5DT01FCmBgYHtyfQpOWVNfSW5jb21lIDwtIHJlYWRfY3N2KCJJbmNvbWVfTllTLmNzdiIpCmBgYAoKYGBge3J9ClZpZXcoTllTX0luY29tZSkKYGBgCgojIyMgTmV4dCBqb2luIGFsbCBvZiB0aGUgdGFibGVzOiBDb3ZpZCtSYWNlK0hpc3BhbmljcytJbmNvbWUKCgoKCgoKCgo=