Part 1

lab11_theme = theme_minimal(base_family = "Optima", base_size = 12)

data(temp_carbon)
temp_anomaly = temp_carbon %>%
  select(Year = year, Global = temp_anomaly, Land = land_anomaly,
         Ocean = ocean_anomaly) %>%
  pivot_longer(Global:Ocean, names_to = "Region", values_to = "Anomaly") %>%
  filter(!is.na(Anomaly)) 


anim_1 = temp_anomaly %>%
  ggplot(aes(x = Year, y = Anomaly, col = Region)) +
  geom_line(size = 1) +
  geom_hline(aes(yintercept = 0), lty = 2) +
  annotate("text", x = 2005, y = -.08,
           label = "20th century mean", size = 3) +
  scale_color_viridis_d(option = "C", end = .75) +
  labs(
    title = "Temperature anomaly relative to 20th century mean",
    x = "Year",
    y = "Temperature anomaly (degrees C)"
  ) +
  lab11_theme +
  transition_reveal(Year)

  animate(anim_1, nframes = 15)

anim_2 = temp_anomaly %>%
  ggplot(aes(x = Year, y = Anomaly, col = Region)) +
  geom_line(size = 1) +
  
  # a dotted line between the time series line and the label
  # It gets the x and y aesthetic from ggplot2
  geom_segment(aes(xend = 2030, yend = Anomaly), linetype = 2) +
  
  # To label each line and keep the labels to the right side
  # of the plot area, so x = 2030 to make sure the x position
  # doesn't change. Lines are based on color, so label = Region.
  geom_text(aes(x = 2030, label = Region), hjust = 0) +
  geom_hline(aes(yintercept = 0), lty = 2) +
  annotate("text", x = 2005, y = -.08,
           label = "20th century mean", size = 3) +
  scale_color_viridis_d(option = "C", end = .75) +
  labs(
    title = "Temperature anomaly relative to 20th century mean",
    x = "Year", 
    y = "Temperature anomaly (degrees C)"
  ) +
  xlim(1880, 2040) +
  # To allow ggplot2 to draw anywhere on the plot (incl. on the margins)
  coord_cartesian(clip = 'off') +
  lab11_theme +
  # Hide legend for color
  guides(col = FALSE) +
  transition_reveal(Year) 

animate(anim_2, nframes = 15)

Part 2

covid_data = read_csv("covid_data.csv")

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   signal = col_character(),
##   geo_value = col_character(),
##   time_value = col_date(format = ""),
##   value = col_double(),
##   stderr = col_double(),
##   sample_size = col_double()
## )

covid_wide = covid_data %>%
  pivot_wider(., id_cols = c(geo_value, time_value), names_from = signal,
              values_from = value)

anim_3 = covid_wide %>%
  ggplot(aes(x = smoothed_wearing_mask, y = smoothed_restaurant_1d, col = smoothed_cli, size = smoothed_cli)) +
  geom_point(alpha = .5) +
  scale_color_viridis_c(option = "C", end = .75) +
  lab11_theme +
  theme(legend.position = "bottom") +
  transition_time(time_value)

animate(anim_3, nframes = 10)

anim_4 = covid_wide %>%
  ggplot(aes(x = smoothed_wearing_mask, y = smoothed_restaurant_1d,
             size = smoothed_cli, col = smoothed_cli)) +
  geom_point(alpha = .5) +
  scale_color_viridis_c(option = "C", end = .75, guide = "legend") +
  lab11_theme +
  theme(legend.position = "bottom") +
  transition_time(time_value) +
  shadow_mark(size = .1, alpha = .1) +
  labs(
    title = "Restaurant visits, mask-wearing, and COVID-like illness over time",
    
    #add the date of each animation:
    subtitle = "Date: {frame_time}",
    x = "% Wearing Mask in Public",
    y = "% Visited Restaurant",
    col = "% with COVID-like Illness",
    size = "% with COVID-like Illness",
    caption = "Source: CMU Delphi Symptom Survey"
  )

animate(anim_4, nframes = 10)

Part 3: Your turn!

(for more info, use ?anim_save)

oakland_data <- read_csv("oakland_policing.csv")

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character(),
##   date = col_date(format = ""),
##   time = col_time(format = ""),
##   lat = col_double(),
##   lng = col_double(),
##   beat = col_logical(),
##   subject_age = col_logical(),
##   officer_assignment = col_logical(),
##   arrest_made = col_logical(),
##   citation_issued = col_logical(),
##   warning_issued = col_logical(),
##   contraband_found = col_logical(),
##   contraband_drugs = col_logical(),
##   contraband_weapons = col_logical(),
##   search_conducted = col_logical(),
##   use_of_force_description = col_logical(),
##   raw_subject_resultofsearch = col_logical()
## )
## ℹ Use `spec()` for the full column specifications.

## Warning: 142089 parsing failures.
##   row                        col           expected         actual                   file
## 72425 beat                       1/0/T/F/TRUE/FALSE 14X            'oakland_policing.csv'
## 72425 raw_subject_resultofsearch 1/0/T/F/TRUE/FALSE None,          'oakland_policing.csv'
## 72426 beat                       1/0/T/F/TRUE/FALSE 30X            'oakland_policing.csv'
## 72426 use_of_force_description   1/0/T/F/TRUE/FALSE handcuffed     'oakland_policing.csv'
## 72426 raw_subject_resultofsearch 1/0/T/F/TRUE/FALSE Other Weapons, 'oakland_policing.csv'
## ..... .......................... .................. .............. ......................
## See problems(...) for more details.

oakland_data = oakland_data %>%
drop_na(date)

oakland_data = oakland_data %>%
  group_by(date) %>%
  mutate(black_daily = sum(subject_race == "black"),
            white_daily = sum(subject_race == "white"),
            hispanic_daily = sum(subject_race == "hispanic"),
            asian_daily = sum(subject_race == "asian/pacific islander"),
            other_daily = sum(subject_race == "other"),
            unknown_daily = sum(subject_race == "unknown"),
            daily_cases = black_daily + white_daily + hispanic_daily + asian_daily, other_daily, unknown_daily)

head(oakland_data)

## # A tibble: 6 x 35
## # Groups:   date [1]
##   raw_row_number date       time  location   lat   lng beat  subject_age
##   <chr>          <date>     <tim> <chr>    <dbl> <dbl> <lgl> <lgl>      
## 1 1              2013-04-01 00:00 31st St…  37.8 -122. NA    NA         
## 2 2              2013-04-01 00:01 3000 Bl…  37.8 -122. NA    NA         
## 3 19             2013-04-01 10:41 12th St…  37.8 -122. NA    NA         
## 4 20             2013-04-01 14:18 2100 WE…  37.8 -122. NA    NA         
## 5 4              2013-04-01 00:15 2200 Bl…  37.8 -122. NA    NA         
## 6 21             2013-04-01 15:32 300 Blk…  37.7 -122. NA    NA         
## # … with 27 more variables: subject_race <chr>, subject_sex <chr>,
## #   officer_assignment <lgl>, type <chr>, arrest_made <lgl>,
## #   citation_issued <lgl>, warning_issued <lgl>, outcome <chr>,
## #   contraband_found <lgl>, contraband_drugs <lgl>, contraband_weapons <lgl>,
## #   search_conducted <lgl>, search_basis <chr>, reason_for_stop <chr>,
## #   use_of_force_description <lgl>, raw_subject_sdrace <chr>,
## #   raw_subject_resultofencounter <chr>, raw_subject_searchconducted <chr>,
## #   raw_subject_typeofsearch <chr>, raw_subject_resultofsearch <lgl>,
## #   black_daily <int>, white_daily <int>, hispanic_daily <int>,
## #   asian_daily <int>, other_daily <int>, unknown_daily <int>,
## #   daily_cases <int>

head(temp_carbon)

##   year temp_anomaly land_anomaly ocean_anomaly carbon_emissions
## 1 1880        -0.11        -0.48         -0.01              236
## 2 1881        -0.08        -0.40          0.01              243
## 3 1882        -0.10        -0.48          0.00              256
## 4 1883        -0.18        -0.66         -0.04              272
## 5 1884        -0.26        -0.69         -0.14              275
## 6 1885        -0.25        -0.56         -0.17              277

head(covid_data)

## # A tibble: 6 x 6
##   signal       geo_value time_value value stderr sample_size
##   <chr>        <chr>     <date>     <dbl>  <dbl>       <dbl>
## 1 smoothed_cli ak        2020-10-01 1.59  0.429         760 
## 2 smoothed_cli al        2020-10-01 0.983 0.141        3742.
## 3 smoothed_cli ar        2020-10-01 1.06  0.174        2587.
## 4 smoothed_cli az        2020-10-01 0.597 0.0905       5682.
## 5 smoothed_cli ca        2020-10-01 0.450 0.0399      21930.
## 6 smoothed_cli co        2020-10-01 0.561 0.0917       5137.

anim_oakland = oakland_data %>%
  ggplot(aes(x = date, fill = subject_race)) +
  geom_area(stat = "bin", binwidth = 3) +
scale_color_viridis_d("Subject Race", end = .75, option = "C") +
scale_x_date(date_breaks = "3 months", 
                 labels = date_format("%b-%Y"),
                 limits = as.Date(c('2016-04-01','2018-01-01'))) +
    transition_reveal(date) +
  labs(
  title = "Police Stops by Race, Oakland, CA",
  subtitle = "2016 - 2018",
    x = "Date",
    y = "Number of Cases",
    fill = "Race",
    caption = "Source: The Stanford Open Policing Project")

animate(anim_oakland, nframes = 90)

## Warning: Removed 82711 rows containing non-finite values (stat_bin).

Lab11 Write-Up

EGOR CHERNIUK

Part 1

Part 2

Part 3: Your turn!