Import data
# excel file
data <- read_excel("../00_data/MyData.xlsx")
data
## # A tibble: 32,452 × 20
## year state state_po state_fips state_cen state_ic office district stage
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr>
## 1 1976 ALABAMA AL 1 63 41 US HOUSE 1 GEN
## 2 1976 ALABAMA AL 1 63 41 US HOUSE 1 GEN
## 3 1976 ALABAMA AL 1 63 41 US HOUSE 1 GEN
## 4 1976 ALABAMA AL 1 63 41 US HOUSE 2 GEN
## 5 1976 ALABAMA AL 1 63 41 US HOUSE 2 GEN
## 6 1976 ALABAMA AL 1 63 41 US HOUSE 2 GEN
## 7 1976 ALABAMA AL 1 63 41 US HOUSE 3 GEN
## 8 1976 ALABAMA AL 1 63 41 US HOUSE 3 GEN
## 9 1976 ALABAMA AL 1 63 41 US HOUSE 3 GEN
## 10 1976 ALABAMA AL 1 63 41 US HOUSE 4 GEN
## # ℹ 32,442 more rows
## # ℹ 11 more variables: runoff <lgl>, special <lgl>, candidate <chr>,
## # party <chr>, writein <lgl>, mode <chr>, candidatevotes <dbl>,
## # totalvotes <dbl>, unofficial <lgl>, version <dbl>, fusion_ticket <lgl>
Filter rows
filter(data, runoff == "true" )
## # A tibble: 0 × 20
## # ℹ 20 variables: year <dbl>, state <chr>, state_po <chr>, state_fips <dbl>,
## # state_cen <dbl>, state_ic <dbl>, office <chr>, district <dbl>, stage <chr>,
## # runoff <lgl>, special <lgl>, candidate <chr>, party <chr>, writein <lgl>,
## # mode <chr>, candidatevotes <dbl>, totalvotes <dbl>, unofficial <lgl>,
## # version <dbl>, fusion_ticket <lgl>
Arrange rows
arrange(data, desc(year) )
## # A tibble: 32,452 × 20
## year state state_po state_fips state_cen state_ic office district stage
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr>
## 1 2022 ALABAMA AL 1 63 41 US HOUSE 1 GEN
## 2 2022 ALABAMA AL 1 63 41 US HOUSE 1 GEN
## 3 2022 ALABAMA AL 1 63 41 US HOUSE 1 GEN
## 4 2022 ALABAMA AL 1 63 41 US HOUSE 2 GEN
## 5 2022 ALABAMA AL 1 63 41 US HOUSE 2 GEN
## 6 2022 ALABAMA AL 1 63 41 US HOUSE 2 GEN
## 7 2022 ALABAMA AL 1 63 41 US HOUSE 2 GEN
## 8 2022 ALABAMA AL 1 63 41 US HOUSE 3 GEN
## 9 2022 ALABAMA AL 1 63 41 US HOUSE 3 GEN
## 10 2022 ALABAMA AL 1 63 41 US HOUSE 3 GEN
## # ℹ 32,442 more rows
## # ℹ 11 more variables: runoff <lgl>, special <lgl>, candidate <chr>,
## # party <chr>, writein <lgl>, mode <chr>, candidatevotes <dbl>,
## # totalvotes <dbl>, unofficial <lgl>, version <dbl>, fusion_ticket <lgl>
Select columns
select(data,year,state,district, party, candidatevotes)
## # A tibble: 32,452 × 5
## year state district party candidatevotes
## <dbl> <chr> <dbl> <chr> <dbl>
## 1 1976 ALABAMA 1 DEMOCRAT 58906
## 2 1976 ALABAMA 1 REPUBLICAN 98257
## 3 1976 ALABAMA 1 <NA> 7
## 4 1976 ALABAMA 2 DEMOCRAT 66288
## 5 1976 ALABAMA 2 REPUBLICAN 90069
## 6 1976 ALABAMA 2 <NA> 5
## 7 1976 ALABAMA 3 DEMOCRAT 106935
## 8 1976 ALABAMA 3 PROHIBITION 1111
## 9 1976 ALABAMA 3 <NA> 2
## 10 1976 ALABAMA 4 REPUBLICAN 34531
## # ℹ 32,442 more rows
Add columns
mutate(data,candidatevotes_thousands = candidatevotes / 1000)
## # A tibble: 32,452 × 21
## year state state_po state_fips state_cen state_ic office district stage
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr>
## 1 1976 ALABAMA AL 1 63 41 US HOUSE 1 GEN
## 2 1976 ALABAMA AL 1 63 41 US HOUSE 1 GEN
## 3 1976 ALABAMA AL 1 63 41 US HOUSE 1 GEN
## 4 1976 ALABAMA AL 1 63 41 US HOUSE 2 GEN
## 5 1976 ALABAMA AL 1 63 41 US HOUSE 2 GEN
## 6 1976 ALABAMA AL 1 63 41 US HOUSE 2 GEN
## 7 1976 ALABAMA AL 1 63 41 US HOUSE 3 GEN
## 8 1976 ALABAMA AL 1 63 41 US HOUSE 3 GEN
## 9 1976 ALABAMA AL 1 63 41 US HOUSE 3 GEN
## 10 1976 ALABAMA AL 1 63 41 US HOUSE 4 GEN
## # ℹ 32,442 more rows
## # ℹ 12 more variables: runoff <lgl>, special <lgl>, candidate <chr>,
## # party <chr>, writein <lgl>, mode <chr>, candidatevotes <dbl>,
## # totalvotes <dbl>, unofficial <lgl>, version <dbl>, fusion_ticket <lgl>,
## # candidatevotes_thousands <dbl>
Summarize by groups
summarise(data, votes_avg = mean(totalvotes, na.rm = TRUE))
## # A tibble: 1 × 1
## votes_avg
## <dbl>
## 1 215165.
data %>%
# Group by Airlines
group_by(year) %>%
# Calculate Average Departure Delay
summarise(votes_avg = mean(totalvotes, na.rm = TRUE))
## # A tibble: 24 × 2
## year votes_avg
## <dbl> <dbl>
## 1 1976 173045.
## 2 1978 129435.
## 3 1980 180327.
## 4 1982 149305.
## 5 1984 196323.
## 6 1986 140428.
## 7 1988 193617.
## 8 1990 146097.
## 9 1992 229095.
## 10 1994 168899.
## # ℹ 14 more rows