library(ggplot2)
library(tidyr)
library(dplyr)
library(readxl)
library(stringr)
#cleaning and isolating data from food insecurity dataset

  #isolate ny in the data
fi_bystate = food_insecurity_df %>% filter(State=='NY')

  #isolate 2021 in the data
fi_bystate_2021 = fi_bystate %>% filter(Year == 2021)

#colnames(fi_bystate_2021)

  #split county, state into seperate columsn to isolate county
  #https://www.geeksforgeeks.org/how-to-split-column-into-multiple-columns-in-r-dataframe/
fi_bystate_2021[c('County', 'State')] <- str_split_fixed(fi_bystate_2021$`County, State`, ',', 2)

  #drop unnecessary columns
  #https://stackoverflow.com/questions/4605206/drop-data-frame-columns-by-name
drops = c("FIPS", "State", "County, State", "SNAP Threshold", "% FI ≤ SNAP Threshold", 
                   "% FI > SNAP Threshold" )
fi_bystate_2021 = fi_bystate_2021[ , !(names(fi_bystate_2021) %in% drops)]

  #drop "county" from county identifier
fi_bystate_2021[c('County', 'drop')] <- str_split_fixed(fi_bystate_2021$County, ' ', 2)
fi_bycounty_2021 = fi_bystate_2021[ , !(names(fi_bystate_2021) %in% c("drop", "Year"))]

  #rearrange columns so county is is the first column
fi_bycounty_2021 = fi_bycounty_2021 %>% select("County", everything())

  #rename 2 columns
fi_bycounty_2021$County[31] <- "New York"
fi_bycounty_2021$County[45] <- "St. Lawrence"

  #fix variable types
#str(fi_bycounty_2021)

fi_bycounty_2021$`Food Insecurity Rate among Black Persons (all ethnicities)` = as.numeric(fi_bycounty_2021$`Food Insecurity Rate among Black Persons (all ethnicities)`)
Warning: NAs introduced by coercion
fi_bycounty_2021$`Food Insecurity Rate among Hispanic Persons (any race)` = as.numeric(fi_bycounty_2021$`Food Insecurity Rate among Hispanic Persons (any race)`)
Warning: NAs introduced by coercion
fi_bycounty_2021$`Food Insecurity Rate among White, non-Hispanic Persons` = as.numeric(fi_bycounty_2021$`Food Insecurity Rate among White, non-Hispanic Persons`)

#str(fi_bycounty_2021)

  #cleaned and adjusted dataset before joining
fi_bycounty_2021
NA
NA
#splitting health issues dataframe into county and region dataframes
health_issues_df["County"]

  #identify columns that seperate the two
regions = c("REGION: Capital Region", "REGION: Central NY", "REGION: Finger Lakes", "REGION: Long Island", "REGION: Mid-Hudson", "REGION: Mohawk Valley", "REGION: New York City", "REGION: North Country", "REGION: Southern Tier", "REGION: Tug Hill Seaway", "REGION: Western NY", "New York City", 
"NYS Exclusive of NYC", "Statewide")

hi_bycounty = health_issues_df[!health_issues_df$County %in% regions, ]
hi_byregion = health_issues_df[health_issues_df$County %in% regions, ]

  #divide all values by 100 so they appear as decimals
hi_columns = colnames(hi_bycounty)
hi_columns <- hi_columns[-1]

hi_bycounty[hi_columns] <- hi_bycounty[hi_columns] / 100


  #cleaned and adjusted dtaset before joining
hi_bycounty
#joining the datasets

  #initial test
df_merged <- merge(x = hi_bycounty, y = fi_bycounty_2021, by = "County", all = TRUE)

  #realized that "Statewide" was in the hi data but not fi data
    #went back to the hi cleaning data, removed "Statewide from the hi_bycounty, and moved it to     hi_byregion

df_merged

```

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeSh0aWR5cikKbGlicmFyeShkcGx5cikKbGlicmFyeShyZWFkeGwpCmxpYnJhcnkoc3RyaW5ncikKYGBgCgoKYGBge3J9CmhlYWx0aF9pc3N1ZXNfZGY8LXJlYWRfeGxzeCgiSGVhbHRoX0lzc3Vlc19OWV8yMDIxLnhsc3giLCByYW5nZT0gIkE0Okc4MCIpCmZvb2RfaW5zZWN1cml0eV9kZjwtcmVhZF94bHN4KCJDb3B5IG9mIEZvb2RTZWN1cml0eS54bHN4IikKCmZvb2RfaW5zZWN1cml0eV9kZgpgYGAKYGBge3J9CiNjbGVhbmluZyBhbmQgaXNvbGF0aW5nIGRhdGEgZnJvbSBmb29kIGluc2VjdXJpdHkgZGF0YXNldAoKICAjaXNvbGF0ZSBueSBpbiB0aGUgZGF0YQpmaV9ieXN0YXRlID0gZm9vZF9pbnNlY3VyaXR5X2RmICU+JSBmaWx0ZXIoU3RhdGU9PSdOWScpCgogICNpc29sYXRlIDIwMjEgaW4gdGhlIGRhdGEKZmlfYnlzdGF0ZV8yMDIxID0gZmlfYnlzdGF0ZSAlPiUgZmlsdGVyKFllYXIgPT0gMjAyMSkKCiNjb2xuYW1lcyhmaV9ieXN0YXRlXzIwMjEpCgogICNzcGxpdCBjb3VudHksIHN0YXRlIGludG8gc2VwZXJhdGUgY29sdW1ucyB0byBpc29sYXRlIGNvdW50eQogICNodHRwczovL3d3dy5nZWVrc2ZvcmdlZWtzLm9yZy9ob3ctdG8tc3BsaXQtY29sdW1uLWludG8tbXVsdGlwbGUtY29sdW1ucy1pbi1yLWRhdGFmcmFtZS8KZmlfYnlzdGF0ZV8yMDIxW2MoJ0NvdW50eScsICdTdGF0ZScpXSA8LSBzdHJfc3BsaXRfZml4ZWQoZmlfYnlzdGF0ZV8yMDIxJGBDb3VudHksIFN0YXRlYCwgJywnLCAyKQoKICAjZHJvcCB1bm5lY2Vzc2FyeSBjb2x1bW5zCiAgI2h0dHBzOi8vc3RhY2tvdmVyZmxvdy5jb20vcXVlc3Rpb25zLzQ2MDUyMDYvZHJvcC1kYXRhLWZyYW1lLWNvbHVtbnMtYnktbmFtZQpkcm9wcyA9IGMoIkZJUFMiLCAiU3RhdGUiLCAiQ291bnR5LCBTdGF0ZSIsICJTTkFQIFRocmVzaG9sZCIsICIlIEZJIOKJpCBTTkFQIFRocmVzaG9sZCIsIAogICAgICAgICAgICAgICAgICAgIiUgRkkgPiBTTkFQIFRocmVzaG9sZCIgKQpmaV9ieXN0YXRlXzIwMjEgPSBmaV9ieXN0YXRlXzIwMjFbICwgIShuYW1lcyhmaV9ieXN0YXRlXzIwMjEpICVpbiUgZHJvcHMpXQoKICAjZHJvcCAiY291bnR5IiBmcm9tIGNvdW50eSBpZGVudGlmaWVyCmZpX2J5c3RhdGVfMjAyMVtjKCdDb3VudHknLCAnZHJvcCcpXSA8LSBzdHJfc3BsaXRfZml4ZWQoZmlfYnlzdGF0ZV8yMDIxJENvdW50eSwgJyAnLCAyKQpmaV9ieWNvdW50eV8yMDIxID0gZmlfYnlzdGF0ZV8yMDIxWyAsICEobmFtZXMoZmlfYnlzdGF0ZV8yMDIxKSAlaW4lIGMoImRyb3AiLCAiWWVhciIpKV0KCiAgI3JlYXJyYW5nZSBjb2x1bW5zIHNvIGNvdW50eSBpcyBpcyB0aGUgZmlyc3QgY29sdW1uCmZpX2J5Y291bnR5XzIwMjEgPSBmaV9ieWNvdW50eV8yMDIxICU+JSBzZWxlY3QoIkNvdW50eSIsIGV2ZXJ5dGhpbmcoKSkKCiAgI3JlbmFtZSAyIGNvbHVtbnMKZmlfYnljb3VudHlfMjAyMSRDb3VudHlbMzFdIDwtICJOZXcgWW9yayIKZmlfYnljb3VudHlfMjAyMSRDb3VudHlbNDVdIDwtICJTdC4gTGF3cmVuY2UiCgogICNmaXggdmFyaWFibGUgdHlwZXMKI3N0cihmaV9ieWNvdW50eV8yMDIxKQoKZmlfYnljb3VudHlfMjAyMSRgRm9vZCBJbnNlY3VyaXR5IFJhdGUgYW1vbmcgQmxhY2sgUGVyc29ucyAoYWxsIGV0aG5pY2l0aWVzKWAgPSBhcy5udW1lcmljKGZpX2J5Y291bnR5XzIwMjEkYEZvb2QgSW5zZWN1cml0eSBSYXRlIGFtb25nIEJsYWNrIFBlcnNvbnMgKGFsbCBldGhuaWNpdGllcylgKQoKZmlfYnljb3VudHlfMjAyMSRgRm9vZCBJbnNlY3VyaXR5IFJhdGUgYW1vbmcgSGlzcGFuaWMgUGVyc29ucyAoYW55IHJhY2UpYCA9IGFzLm51bWVyaWMoZmlfYnljb3VudHlfMjAyMSRgRm9vZCBJbnNlY3VyaXR5IFJhdGUgYW1vbmcgSGlzcGFuaWMgUGVyc29ucyAoYW55IHJhY2UpYCkKCmZpX2J5Y291bnR5XzIwMjEkYEZvb2QgSW5zZWN1cml0eSBSYXRlIGFtb25nIFdoaXRlLCBub24tSGlzcGFuaWMgUGVyc29uc2AgPSBhcy5udW1lcmljKGZpX2J5Y291bnR5XzIwMjEkYEZvb2QgSW5zZWN1cml0eSBSYXRlIGFtb25nIFdoaXRlLCBub24tSGlzcGFuaWMgUGVyc29uc2ApCgojc3RyKGZpX2J5Y291bnR5XzIwMjEpCgogICNjbGVhbmVkIGFuZCBhZGp1c3RlZCBkYXRhc2V0IGJlZm9yZSBqb2luaW5nCmZpX2J5Y291bnR5XzIwMjEKCgpgYGAKCmBgYHtyfQojc3BsaXR0aW5nIGhlYWx0aCBpc3N1ZXMgZGF0YWZyYW1lIGludG8gY291bnR5IGFuZCByZWdpb24gZGF0YWZyYW1lcwpoZWFsdGhfaXNzdWVzX2RmWyJDb3VudHkiXQoKICAjaWRlbnRpZnkgY29sdW1ucyB0aGF0IHNlcGVyYXRlIHRoZSB0d28KcmVnaW9ucyA9IGMoIlJFR0lPTjogQ2FwaXRhbCBSZWdpb24iLCAiUkVHSU9OOiBDZW50cmFsIE5ZIiwgIlJFR0lPTjogRmluZ2VyIExha2VzIiwgIlJFR0lPTjogTG9uZyBJc2xhbmQiLCAiUkVHSU9OOiBNaWQtSHVkc29uIiwgIlJFR0lPTjogTW9oYXdrIFZhbGxleSIsICJSRUdJT046IE5ldyBZb3JrIENpdHkiLCAiUkVHSU9OOiBOb3J0aCBDb3VudHJ5IiwgIlJFR0lPTjogU291dGhlcm4gVGllciIsICJSRUdJT046IFR1ZyBIaWxsIFNlYXdheSIsICJSRUdJT046IFdlc3Rlcm4gTlkiLCAiTmV3IFlvcmsgQ2l0eSIsIAoiTllTIEV4Y2x1c2l2ZSBvZiBOWUMiLCAiU3RhdGV3aWRlIikgCgpoaV9ieWNvdW50eSA9IGhlYWx0aF9pc3N1ZXNfZGZbIWhlYWx0aF9pc3N1ZXNfZGYkQ291bnR5ICVpbiUgcmVnaW9ucywgXQpoaV9ieXJlZ2lvbiA9IGhlYWx0aF9pc3N1ZXNfZGZbaGVhbHRoX2lzc3Vlc19kZiRDb3VudHkgJWluJSByZWdpb25zLCBdCgogICNkaXZpZGUgYWxsIHZhbHVlcyBieSAxMDAgc28gdGhleSBhcHBlYXIgYXMgZGVjaW1hbHMKaGlfY29sdW1ucyA9IGNvbG5hbWVzKGhpX2J5Y291bnR5KQpoaV9jb2x1bW5zIDwtIGhpX2NvbHVtbnNbLTFdCgpoaV9ieWNvdW50eVtoaV9jb2x1bW5zXSA8LSBoaV9ieWNvdW50eVtoaV9jb2x1bW5zXSAvIDEwMAoKCiAgI2NsZWFuZWQgYW5kIGFkanVzdGVkIGR0YXNldCBiZWZvcmUgam9pbmluZwpoaV9ieWNvdW50eQpgYGAKCmBgYHtyfQojam9pbmluZyB0aGUgZGF0YXNldHMKCiAgI2luaXRpYWwgdGVzdApkZl9tZXJnZWQgPC0gbWVyZ2UoeCA9IGhpX2J5Y291bnR5LCB5ID0gZmlfYnljb3VudHlfMjAyMSwgYnkgPSAiQ291bnR5IiwgYWxsID0gVFJVRSkKCiAgI3JlYWxpemVkIHRoYXQgIlN0YXRld2lkZSIgd2FzIGluIHRoZSBoaSBkYXRhIGJ1dCBub3QgZmkgZGF0YQogICAgI3dlbnQgYmFjayB0byB0aGUgaGkgY2xlYW5pbmcgZGF0YSwgcmVtb3ZlZCAiU3RhdGV3aWRlIGZyb20gdGhlIGhpX2J5Y291bnR5LCBhbmQgbW92ZWQgaXQgdG8gICAgIGhpX2J5cmVnaW9uCgpkZl9tZXJnZWQKYGBgCgpgYGAKCg==