Tidy and Untidy

getwd()

## [1] "C:/Users/Dano/Documents"

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.0.4

## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --

## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.5     v dplyr   1.0.3
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

setwd("C:/Users/Dano/Documents")

#Pharmacy dataset is an example of a tidy set
pharmacy <- read_csv("Pilot_Pharmaceuticals.csv")

## 
## -- Column specification --------------------------------------------------------
## cols(
##   Name = col_character(),
##   `Address Line 1` = col_character(),
##   `Address Line 2` = col_character(),
##   City = col_character(),
##   State = col_character(),
##   `Zip Code` = col_double(),
##   County = col_character(),
##   `Available to Public?` = col_character(),
##   `Location 1` = col_character()
## )

pharmacy

## # A tibble: 127 x 9
##    Name  `Address Line 1` `Address Line 2` City  State `Zip Code` County
##    <chr> <chr>            <chr>            <chr> <chr>      <dbl> <chr> 
##  1 Fair~ 4747-10 Nescons~ <NA>             Port~ NY         11776 Suffo~
##  2 Bart~ 2 Elm Street     <NA>             Fran~ NY         14737 Catta~
##  3 Gero~ 130 South Main ~ <NA>             Elmi~ NY         14904 Chemu~
##  4 Gras~ 640 Tuckahoe Rd  <NA>             Yonk~ NY         10710 Westc~
##  5 Mark~ 78 W. Market St  <NA>             Corn~ NY         14830 Steub~
##  6 Kenb~ 6024 5th Avenue  <NA>             Broo~ NY         11220 Kings 
##  7 Summ~ 2578 Niagara Fa~ <NA>             Niag~ NY         14304 Niaga~
##  8 Esta~ 3001 Clarendon ~ <NA>             Broo~ NY         11226 Kings 
##  9 JAK ~ 2343 Arthur Ave  <NA>             Bronx NY         10458 Bronx 
## 10 Eliz~ 75 Park St       PO Box 277       Eliz~ NY         12932 Essex 
## # ... with 117 more rows, and 2 more variables: `Available to Public?` <chr>,
## #   `Location 1` <chr>

Cemetery <- read_csv("Public_Cemetery_Corporations.csv")

## 
## -- Column specification --------------------------------------------------------
## cols(
##   CEMID = col_double(),
##   `Cemetery Name` = col_character(),
##   County = col_character(),
##   Crematory = col_double(),
##   `Non-Traditional` = col_double(),
##   `Community Mausoleum` = col_double(),
##   Status = col_character(),
##   `Active Since` = col_character(),
##   `Abandoned Or Merged On` = col_logical(),
##   `Taken Over By Cemetery` = col_logical(),
##   `Abandoned To Town MuniCode` = col_logical(),
##   `Abandoned To Town Name` = col_logical()
## )

## Warning: 486 parsing failures.
##  row                    col           expected actual                               file
## 1690 Taken Over By Cemetery 1/0/T/F/TRUE/FALSE  4036  'Public_Cemetery_Corporations.csv'
## 1691 Taken Over By Cemetery 1/0/T/F/TRUE/FALSE  7020  'Public_Cemetery_Corporations.csv'
## 1692 Taken Over By Cemetery 1/0/T/F/TRUE/FALSE  13047 'Public_Cemetery_Corporations.csv'
## 1693 Taken Over By Cemetery 1/0/T/F/TRUE/FALSE  41007 'Public_Cemetery_Corporations.csv'
## 1694 Taken Over By Cemetery 1/0/T/F/TRUE/FALSE  29003 'Public_Cemetery_Corporations.csv'
## .... ...................... .................. ...... ..................................
## See problems(...) for more details.

Cemetery

## # A tibble: 1,938 x 12
##    CEMID `Cemetery Name` County Crematory `Non-Traditiona~ `Community Maus~
##    <dbl> <chr>           <chr>      <dbl>            <dbl>            <dbl>
##  1  1025 Rensselaervill~ Albany         0                0                0
##  2  1026 Westerlo Centr~ Albany         0                0                0
##  3  1027 Westerlo Rural~ Albany         0                0                0
##  4  1028 Woodlawn Cemet~ Albany         0                0                0
##  5  2001 Alfred Center ~ Alleg~         0                0                0
##  6  2002 Alger Rural Ce~ Alleg~         0                0                0
##  7  2003 Almond Cemeter~ Alleg~         0                0                0
##  8  2004 Bates Family C~ Alleg~         0                0                0
##  9  2005 Bellville Ceme~ Alleg~         0                0                0
## 10  2006 Black Creek Ce~ Alleg~         0                0                0
## # ... with 1,928 more rows, and 6 more variables: Status <chr>, `Active
## #   Since` <chr>, `Abandoned Or Merged On` <lgl>, `Taken Over By
## #   Cemetery` <lgl>, `Abandoned To Town MuniCode` <lgl>, `Abandoned To Town
## #   Name` <lgl>

#In this table, I used the pivot longer command to combine columns: crematory, non-traditional and community mausoleum into one column services.
cemetery1 <- Cemetery%>% 
  pivot_longer(cols= c("Crematory" , "Non-Traditional","Community Mausoleum"), names_to = "Services", values_to = "1=Y_0=N")
cemetery1

## # A tibble: 5,814 x 11
##    CEMID `Cemetery Name` County Status `Active Since` `Abandoned Or M~
##    <dbl> <chr>           <chr>  <chr>  <chr>          <lgl>           
##  1  1025 Rensselaervill~ Albany Active <NA>           NA              
##  2  1025 Rensselaervill~ Albany Active <NA>           NA              
##  3  1025 Rensselaervill~ Albany Active <NA>           NA              
##  4  1026 Westerlo Centr~ Albany Active 01/20/1882     NA              
##  5  1026 Westerlo Centr~ Albany Active 01/20/1882     NA              
##  6  1026 Westerlo Centr~ Albany Active 01/20/1882     NA              
##  7  1027 Westerlo Rural~ Albany Active 10/27/1871     NA              
##  8  1027 Westerlo Rural~ Albany Active 10/27/1871     NA              
##  9  1027 Westerlo Rural~ Albany Active 10/27/1871     NA              
## 10  1028 Woodlawn Cemet~ Albany Active <NA>           NA              
## # ... with 5,804 more rows, and 5 more variables: `Taken Over By
## #   Cemetery` <lgl>, `Abandoned To Town MuniCode` <lgl>, `Abandoned To Town
## #   Name` <lgl>, Services <chr>, `1=Y_0=N` <dbl>

#In this table, I used the pivot longer command to combine columns:taken over by cemetery and abandoned to town municode into one column "taken/abandoned_status"
cemetery2 <- cemetery1 %>%
   pivot_longer(cols= c("Taken Over By Cemetery" , "Abandoned To Town MuniCode"), names_to = "Taken/Abandoned_Status", values_to = "Code")
cemetery2

## # A tibble: 11,628 x 11
##    CEMID `Cemetery Name` County Status `Active Since` `Abandoned Or M~
##    <dbl> <chr>           <chr>  <chr>  <chr>          <lgl>           
##  1  1025 Rensselaervill~ Albany Active <NA>           NA              
##  2  1025 Rensselaervill~ Albany Active <NA>           NA              
##  3  1025 Rensselaervill~ Albany Active <NA>           NA              
##  4  1025 Rensselaervill~ Albany Active <NA>           NA              
##  5  1025 Rensselaervill~ Albany Active <NA>           NA              
##  6  1025 Rensselaervill~ Albany Active <NA>           NA              
##  7  1026 Westerlo Centr~ Albany Active 01/20/1882     NA              
##  8  1026 Westerlo Centr~ Albany Active 01/20/1882     NA              
##  9  1026 Westerlo Centr~ Albany Active 01/20/1882     NA              
## 10  1026 Westerlo Centr~ Albany Active 01/20/1882     NA              
## # ... with 11,618 more rows, and 5 more variables: `Abandoned To Town
## #   Name` <lgl>, Services <chr>, `1=Y_0=N` <dbl>,
## #   `Taken/Abandoned_Status` <chr>, Code <lgl>

tourism <-read_csv("Tourism_Centers.csv")

## 
## -- Column specification --------------------------------------------------------
## cols(
##   `Plaza Name` = col_character(),
##   Route = col_character(),
##   Milepost = col_double(),
##   Direction = col_character(),
##   Operation = col_character(),
##   Location = col_character(),
##   Latitude = col_double(),
##   Longitude = col_double(),
##   `Mapping Location` = col_character()
## )

view(tourism)
census <- read_csv("Census_Villages.csv")

## 
## -- Column specification --------------------------------------------------------
## cols(
##   `Geographic Area` = col_character(),
##   `2000` = col_double(),
##   `2010` = col_double(),
##   `Population Change Number` = col_double(),
##   `Population Change Percent` = col_double(),
##   SUMLEV = col_double(),
##   State = col_double(),
##   County = col_logical(),
##   `Sub County` = col_logical(),
##   Place = col_double(),
##   FuncStat = col_character()
## )

view(census)

#In this table, I used the pivot wider to divide the routes into two columns with plaza names so is viewer friendly
tourism1<- tourism %>%
  pivot_wider(names_from = Route, values_from = "Plaza Name")
tourism1

## # A tibble: 8 x 9
##   Milepost Direction Operation Location Latitude Longitude `Mapping Locati~
##      <dbl> <chr>     <chr>     <chr>       <dbl>     <dbl> <chr>           
## 1       33 Northbou~ Open All~ Exit 15~     41.2     -74.2 (41.15128, -74.~
## 2       65 Northbou~ Open All~ Exit 17~     41.6     -74.1 (41.57777, -74.~
## 3      397 Eastbound Seasonal~ Exit 48~     43.0     -78.3 (43.01187, -78.~
## 4      127 Northbou~ Open All~ Exit 21~     42.4     -73.8 (42.42772, -73.~
## 5      412 Westbound Open All~ Exit 48~     43.0     -78.6 (42.95248, -78.~
## 6      447 Eastboun~ Open All~ Exit 57~     42.6     -79.0 (42.63576, -78.~
## 7      292 Westbound Seasonal~ Exit 39~     43.1     -76.3 (43.09199, -76.~
## 8      350 Westbound Seasonal~ Exit 44~     43.0     -77.4 (42.998759, -77~
## # ... with 2 more variables: `I-87 - NYS Thruway` <chr>, `I-90 - NYS
## #   Thruway` <chr>

# In this table, I used the pivot longer comand to combine 2000 and 2010 years into one column
census1 <- census %>%
  pivot_longer(cols = c("2000","2010"), names_to= "year", values_to="Population")
census1

## # A tibble: 1,234 x 11
##    `Geographic Are~ `Population Cha~ `Population Cha~ SUMLEV State County
##    <chr>                       <dbl>            <dbl>  <dbl> <dbl> <lgl> 
##  1 Adams village                 159              9.8    160    36 NA    
##  2 Adams village                 159              9.8    160    36 NA    
##  3 Addison village               -34             -1.9    160    36 NA    
##  4 Addison village               -34             -1.9    160    36 NA    
##  5 Afton village                 -14             -1.7    160    36 NA    
##  6 Afton village                 -14             -1.7    160    36 NA    
##  7 Airmont village               829             10.6    160    36 NA    
##  8 Airmont village               829             10.6    160    36 NA    
##  9 Akron village                -217             -7      160    36 NA    
## 10 Akron village                -217             -7      160    36 NA    
## # ... with 1,224 more rows, and 5 more variables: `Sub County` <lgl>,
## #   Place <dbl>, FuncStat <chr>, year <chr>, Population <dbl>

Tidy and Untidy

MT

3/4/2021