Import your data

library(readxl)
## Warning: package 'readxl' was built under R version 4.4.2
MyData <- read_excel("C:/Users/owner/OneDrive - USNH/Desktop/PSU_DAT3000_IntroToDA/00_data/MyData.xlsx")
View(MyData)

Pivoting

long to wide form

MyData %>% 
  pivot_longer(c("office", "state"), names_to = "representative", values_to = "homestate")
## # A tibble: 64,904 × 20
##     year state_po state_fips state_cen state_ic district stage runoff special
##    <dbl> <chr>         <dbl>     <dbl>    <dbl>    <dbl> <chr> <lgl>  <lgl>  
##  1  1976 AL                1        63       41        1 GEN   FALSE  FALSE  
##  2  1976 AL                1        63       41        1 GEN   FALSE  FALSE  
##  3  1976 AL                1        63       41        1 GEN   FALSE  FALSE  
##  4  1976 AL                1        63       41        1 GEN   FALSE  FALSE  
##  5  1976 AL                1        63       41        1 GEN   FALSE  FALSE  
##  6  1976 AL                1        63       41        1 GEN   FALSE  FALSE  
##  7  1976 AL                1        63       41        2 GEN   FALSE  FALSE  
##  8  1976 AL                1        63       41        2 GEN   FALSE  FALSE  
##  9  1976 AL                1        63       41        2 GEN   FALSE  FALSE  
## 10  1976 AL                1        63       41        2 GEN   FALSE  FALSE  
## # ℹ 64,894 more rows
## # ℹ 11 more variables: candidate <chr>, party <chr>, writein <lgl>, mode <chr>,
## #   candidatevotes <dbl>, totalvotes <dbl>, unofficial <lgl>, version <dbl>,
## #   fusion_ticket <lgl>, representative <chr>, homestate <chr>

wide to long form

MyData %>%
     pivot_wider(names_from = totalvotes, values_from = candidatevotes)
## Warning: Values from `candidatevotes` are not uniquely identified; output will contain
## list-cols.
## • Use `values_fn = list` to suppress this warning.
## • Use `values_fn = {summary_fun}` to summarise duplicates.
## • Use the following dplyr code to identify duplicates.
##   {data} |>
##   dplyr::summarise(n = dplyr::n(), .by = c(year, state, state_po, state_fips,
##   state_cen, state_ic, office, district, stage, runoff, special, candidate,
##   party, writein, mode, unofficial, version, fusion_ticket, totalvotes)) |>
##   dplyr::filter(n > 1L)
## # A tibble: 32,347 × 10,062
##     year state   state_po state_fips state_cen state_ic office   district stage
##    <dbl> <chr>   <chr>         <dbl>     <dbl>    <dbl> <chr>       <dbl> <chr>
##  1  1976 ALABAMA AL                1        63       41 US HOUSE        1 GEN  
##  2  1976 ALABAMA AL                1        63       41 US HOUSE        1 GEN  
##  3  1976 ALABAMA AL                1        63       41 US HOUSE        1 GEN  
##  4  1976 ALABAMA AL                1        63       41 US HOUSE        2 GEN  
##  5  1976 ALABAMA AL                1        63       41 US HOUSE        2 GEN  
##  6  1976 ALABAMA AL                1        63       41 US HOUSE        2 GEN  
##  7  1976 ALABAMA AL                1        63       41 US HOUSE        3 GEN  
##  8  1976 ALABAMA AL                1        63       41 US HOUSE        3 GEN  
##  9  1976 ALABAMA AL                1        63       41 US HOUSE        3 GEN  
## 10  1976 ALABAMA AL                1        63       41 US HOUSE        4 GEN  
## # ℹ 32,337 more rows
## # ℹ 10,053 more variables: runoff <lgl>, special <lgl>, candidate <chr>,
## #   party <chr>, writein <lgl>, mode <chr>, unofficial <lgl>, version <dbl>,
## #   fusion_ticket <lgl>, `157170` <list>, `156362` <list>, `108048` <list>,
## #   `176022` <list>, `113560` <list>, `162518` <list>, `110501` <list>,
## #   `118208` <list>, `168119` <list>, `182128` <list>, `187165` <list>,
## #   `191590` <list>, `168782` <list>, `167607` <list>, `1` <list>, …

Separating and Uniting

Unite two columns

MyData %>%
    unite(col = "congressional district", c(state,district), sep = " ") 
## # A tibble: 32,452 × 19
##     year `congressional district` state_po state_fips state_cen state_ic office 
##    <dbl> <chr>                    <chr>         <dbl>     <dbl>    <dbl> <chr>  
##  1  1976 ALABAMA 1                AL                1        63       41 US HOU…
##  2  1976 ALABAMA 1                AL                1        63       41 US HOU…
##  3  1976 ALABAMA 1                AL                1        63       41 US HOU…
##  4  1976 ALABAMA 2                AL                1        63       41 US HOU…
##  5  1976 ALABAMA 2                AL                1        63       41 US HOU…
##  6  1976 ALABAMA 2                AL                1        63       41 US HOU…
##  7  1976 ALABAMA 3                AL                1        63       41 US HOU…
##  8  1976 ALABAMA 3                AL                1        63       41 US HOU…
##  9  1976 ALABAMA 3                AL                1        63       41 US HOU…
## 10  1976 ALABAMA 4                AL                1        63       41 US HOU…
## # ℹ 32,442 more rows
## # ℹ 12 more variables: stage <chr>, runoff <lgl>, special <lgl>,
## #   candidate <chr>, party <chr>, writein <lgl>, mode <chr>,
## #   candidatevotes <dbl>, totalvotes <dbl>, unofficial <lgl>, version <dbl>,
## #   fusion_ticket <lgl>

Separate a column

MyData %>%
    separate(col = "office", into = c("coutry", "position"), sep = "/")
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 32452 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## # A tibble: 32,452 × 21
##     year state   state_po state_fips state_cen state_ic coutry position district
##    <dbl> <chr>   <chr>         <dbl>     <dbl>    <dbl> <chr>  <chr>       <dbl>
##  1  1976 ALABAMA AL                1        63       41 US HO… <NA>            1
##  2  1976 ALABAMA AL                1        63       41 US HO… <NA>            1
##  3  1976 ALABAMA AL                1        63       41 US HO… <NA>            1
##  4  1976 ALABAMA AL                1        63       41 US HO… <NA>            2
##  5  1976 ALABAMA AL                1        63       41 US HO… <NA>            2
##  6  1976 ALABAMA AL                1        63       41 US HO… <NA>            2
##  7  1976 ALABAMA AL                1        63       41 US HO… <NA>            3
##  8  1976 ALABAMA AL                1        63       41 US HO… <NA>            3
##  9  1976 ALABAMA AL                1        63       41 US HO… <NA>            3
## 10  1976 ALABAMA AL                1        63       41 US HO… <NA>            4
## # ℹ 32,442 more rows
## # ℹ 12 more variables: stage <chr>, runoff <lgl>, special <lgl>,
## #   candidate <chr>, party <chr>, writein <lgl>, mode <chr>,
## #   candidatevotes <dbl>, totalvotes <dbl>, unofficial <lgl>, version <dbl>,
## #   fusion_ticket <lgl>

Missing Values