getwd()
## [1] "C:/Users/Dano/Documents"
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.4
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.5 v dplyr 1.0.3
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
setwd("C:/Users/Dano/Documents")
#Pharmacy dataset is an example of a tidy set
pharmacy <- read_csv("Pilot_Pharmaceuticals.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## Name = col_character(),
## `Address Line 1` = col_character(),
## `Address Line 2` = col_character(),
## City = col_character(),
## State = col_character(),
## `Zip Code` = col_double(),
## County = col_character(),
## `Available to Public?` = col_character(),
## `Location 1` = col_character()
## )
pharmacy
## # A tibble: 127 x 9
## Name `Address Line 1` `Address Line 2` City State `Zip Code` County
## <chr> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 Fair~ 4747-10 Nescons~ <NA> Port~ NY 11776 Suffo~
## 2 Bart~ 2 Elm Street <NA> Fran~ NY 14737 Catta~
## 3 Gero~ 130 South Main ~ <NA> Elmi~ NY 14904 Chemu~
## 4 Gras~ 640 Tuckahoe Rd <NA> Yonk~ NY 10710 Westc~
## 5 Mark~ 78 W. Market St <NA> Corn~ NY 14830 Steub~
## 6 Kenb~ 6024 5th Avenue <NA> Broo~ NY 11220 Kings
## 7 Summ~ 2578 Niagara Fa~ <NA> Niag~ NY 14304 Niaga~
## 8 Esta~ 3001 Clarendon ~ <NA> Broo~ NY 11226 Kings
## 9 JAK ~ 2343 Arthur Ave <NA> Bronx NY 10458 Bronx
## 10 Eliz~ 75 Park St PO Box 277 Eliz~ NY 12932 Essex
## # ... with 117 more rows, and 2 more variables: `Available to Public?` <chr>,
## # `Location 1` <chr>
Cemetery <- read_csv("Public_Cemetery_Corporations.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## CEMID = col_double(),
## `Cemetery Name` = col_character(),
## County = col_character(),
## Crematory = col_double(),
## `Non-Traditional` = col_double(),
## `Community Mausoleum` = col_double(),
## Status = col_character(),
## `Active Since` = col_character(),
## `Abandoned Or Merged On` = col_logical(),
## `Taken Over By Cemetery` = col_logical(),
## `Abandoned To Town MuniCode` = col_logical(),
## `Abandoned To Town Name` = col_logical()
## )
## Warning: 486 parsing failures.
## row col expected actual file
## 1690 Taken Over By Cemetery 1/0/T/F/TRUE/FALSE 4036 'Public_Cemetery_Corporations.csv'
## 1691 Taken Over By Cemetery 1/0/T/F/TRUE/FALSE 7020 'Public_Cemetery_Corporations.csv'
## 1692 Taken Over By Cemetery 1/0/T/F/TRUE/FALSE 13047 'Public_Cemetery_Corporations.csv'
## 1693 Taken Over By Cemetery 1/0/T/F/TRUE/FALSE 41007 'Public_Cemetery_Corporations.csv'
## 1694 Taken Over By Cemetery 1/0/T/F/TRUE/FALSE 29003 'Public_Cemetery_Corporations.csv'
## .... ...................... .................. ...... ..................................
## See problems(...) for more details.
Cemetery
## # A tibble: 1,938 x 12
## CEMID `Cemetery Name` County Crematory `Non-Traditiona~ `Community Maus~
## <dbl> <chr> <chr> <dbl> <dbl> <dbl>
## 1 1025 Rensselaervill~ Albany 0 0 0
## 2 1026 Westerlo Centr~ Albany 0 0 0
## 3 1027 Westerlo Rural~ Albany 0 0 0
## 4 1028 Woodlawn Cemet~ Albany 0 0 0
## 5 2001 Alfred Center ~ Alleg~ 0 0 0
## 6 2002 Alger Rural Ce~ Alleg~ 0 0 0
## 7 2003 Almond Cemeter~ Alleg~ 0 0 0
## 8 2004 Bates Family C~ Alleg~ 0 0 0
## 9 2005 Bellville Ceme~ Alleg~ 0 0 0
## 10 2006 Black Creek Ce~ Alleg~ 0 0 0
## # ... with 1,928 more rows, and 6 more variables: Status <chr>, `Active
## # Since` <chr>, `Abandoned Or Merged On` <lgl>, `Taken Over By
## # Cemetery` <lgl>, `Abandoned To Town MuniCode` <lgl>, `Abandoned To Town
## # Name` <lgl>
#In this table, I used the pivot longer command to combine columns: crematory, non-traditional and community mausoleum into one column services.
cemetery1 <- Cemetery%>%
pivot_longer(cols= c("Crematory" , "Non-Traditional","Community Mausoleum"), names_to = "Services", values_to = "1=Y_0=N")
cemetery1
## # A tibble: 5,814 x 11
## CEMID `Cemetery Name` County Status `Active Since` `Abandoned Or M~
## <dbl> <chr> <chr> <chr> <chr> <lgl>
## 1 1025 Rensselaervill~ Albany Active <NA> NA
## 2 1025 Rensselaervill~ Albany Active <NA> NA
## 3 1025 Rensselaervill~ Albany Active <NA> NA
## 4 1026 Westerlo Centr~ Albany Active 01/20/1882 NA
## 5 1026 Westerlo Centr~ Albany Active 01/20/1882 NA
## 6 1026 Westerlo Centr~ Albany Active 01/20/1882 NA
## 7 1027 Westerlo Rural~ Albany Active 10/27/1871 NA
## 8 1027 Westerlo Rural~ Albany Active 10/27/1871 NA
## 9 1027 Westerlo Rural~ Albany Active 10/27/1871 NA
## 10 1028 Woodlawn Cemet~ Albany Active <NA> NA
## # ... with 5,804 more rows, and 5 more variables: `Taken Over By
## # Cemetery` <lgl>, `Abandoned To Town MuniCode` <lgl>, `Abandoned To Town
## # Name` <lgl>, Services <chr>, `1=Y_0=N` <dbl>
#In this table, I used the pivot longer command to combine columns:taken over by cemetery and abandoned to town municode into one column "taken/abandoned_status"
cemetery2 <- cemetery1 %>%
pivot_longer(cols= c("Taken Over By Cemetery" , "Abandoned To Town MuniCode"), names_to = "Taken/Abandoned_Status", values_to = "Code")
cemetery2
## # A tibble: 11,628 x 11
## CEMID `Cemetery Name` County Status `Active Since` `Abandoned Or M~
## <dbl> <chr> <chr> <chr> <chr> <lgl>
## 1 1025 Rensselaervill~ Albany Active <NA> NA
## 2 1025 Rensselaervill~ Albany Active <NA> NA
## 3 1025 Rensselaervill~ Albany Active <NA> NA
## 4 1025 Rensselaervill~ Albany Active <NA> NA
## 5 1025 Rensselaervill~ Albany Active <NA> NA
## 6 1025 Rensselaervill~ Albany Active <NA> NA
## 7 1026 Westerlo Centr~ Albany Active 01/20/1882 NA
## 8 1026 Westerlo Centr~ Albany Active 01/20/1882 NA
## 9 1026 Westerlo Centr~ Albany Active 01/20/1882 NA
## 10 1026 Westerlo Centr~ Albany Active 01/20/1882 NA
## # ... with 11,618 more rows, and 5 more variables: `Abandoned To Town
## # Name` <lgl>, Services <chr>, `1=Y_0=N` <dbl>,
## # `Taken/Abandoned_Status` <chr>, Code <lgl>
tourism <-read_csv("Tourism_Centers.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## `Plaza Name` = col_character(),
## Route = col_character(),
## Milepost = col_double(),
## Direction = col_character(),
## Operation = col_character(),
## Location = col_character(),
## Latitude = col_double(),
## Longitude = col_double(),
## `Mapping Location` = col_character()
## )
view(tourism)
census <- read_csv("Census_Villages.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## `Geographic Area` = col_character(),
## `2000` = col_double(),
## `2010` = col_double(),
## `Population Change Number` = col_double(),
## `Population Change Percent` = col_double(),
## SUMLEV = col_double(),
## State = col_double(),
## County = col_logical(),
## `Sub County` = col_logical(),
## Place = col_double(),
## FuncStat = col_character()
## )
view(census)
#In this table, I used the pivot wider to divide the routes into two columns with plaza names so is viewer friendly
tourism1<- tourism %>%
pivot_wider(names_from = Route, values_from = "Plaza Name")
tourism1
## # A tibble: 8 x 9
## Milepost Direction Operation Location Latitude Longitude `Mapping Locati~
## <dbl> <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 33 Northbou~ Open All~ Exit 15~ 41.2 -74.2 (41.15128, -74.~
## 2 65 Northbou~ Open All~ Exit 17~ 41.6 -74.1 (41.57777, -74.~
## 3 397 Eastbound Seasonal~ Exit 48~ 43.0 -78.3 (43.01187, -78.~
## 4 127 Northbou~ Open All~ Exit 21~ 42.4 -73.8 (42.42772, -73.~
## 5 412 Westbound Open All~ Exit 48~ 43.0 -78.6 (42.95248, -78.~
## 6 447 Eastboun~ Open All~ Exit 57~ 42.6 -79.0 (42.63576, -78.~
## 7 292 Westbound Seasonal~ Exit 39~ 43.1 -76.3 (43.09199, -76.~
## 8 350 Westbound Seasonal~ Exit 44~ 43.0 -77.4 (42.998759, -77~
## # ... with 2 more variables: `I-87 - NYS Thruway` <chr>, `I-90 - NYS
## # Thruway` <chr>
# In this table, I used the pivot longer comand to combine 2000 and 2010 years into one column
census1 <- census %>%
pivot_longer(cols = c("2000","2010"), names_to= "year", values_to="Population")
census1
## # A tibble: 1,234 x 11
## `Geographic Are~ `Population Cha~ `Population Cha~ SUMLEV State County
## <chr> <dbl> <dbl> <dbl> <dbl> <lgl>
## 1 Adams village 159 9.8 160 36 NA
## 2 Adams village 159 9.8 160 36 NA
## 3 Addison village -34 -1.9 160 36 NA
## 4 Addison village -34 -1.9 160 36 NA
## 5 Afton village -14 -1.7 160 36 NA
## 6 Afton village -14 -1.7 160 36 NA
## 7 Airmont village 829 10.6 160 36 NA
## 8 Airmont village 829 10.6 160 36 NA
## 9 Akron village -217 -7 160 36 NA
## 10 Akron village -217 -7 160 36 NA
## # ... with 1,224 more rows, and 5 more variables: `Sub County` <lgl>,
## # Place <dbl>, FuncStat <chr>, year <chr>, Population <dbl>