Load Data

data(cars)
median(cars$speed)
## [1] 15

Question 2

library(jsonlite)
url <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=BTC&tsym=USD&limit=100"
resp <- fromJSON(url)

df <- resp$Data$Data   
max_close <- max(df$close, na.rm = TRUE)

max_close
## [1] 96945.09
file.exists("record_highs_2023.txt")
## [1] TRUE
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)


lines <- readLines("record_highs_2023.txt")[-1]


lines <- lines[grepl("^\\d", lines)]


lines <- str_squish(lines)


n_fields <- sapply(strsplit(lines, " "), length)
lines <- lines[n_fields == 15]


raw <- read_table(
  I(lines),
  col_names = c("station_city","period","JAN","FEB","MAR","APR","MAY","JUN",
                "JUL","AUG","SEP","OCT","NOV","DEC","ANN")
) %>%
  mutate(across(JAN:ANN, as.numeric))


head(raw)
## # A tibble: 6 × 15
##   station_city      period   JAN   FEB   MAR   APR   MAY   JUN   JUL   AUG   SEP
##   <chr>             <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 13876BIRMINGHAM,… 19300…    81    83    89    92    99   106   107   105   102
## 2 03856HUNTSVILLE,… 19580…    79    83    88    92    96   106   105   105   101
## 3 13894MOBILE,AL    19480…    84    85    89    94   100   103   104   106   100
## 4 13895MONTGOMERY,… 19480…    83    86    90    94    98   105   105   106   103
## 5 26451ANCHORAGE,AK 19520…    50    49    53    69    77    85    90    82    73
## 6 25308ANNETTE,AK   19410…    61    65    65    82    88    93    90    90    82
## # ℹ 4 more variables: OCT <dbl>, NOV <dbl>, DEC <dbl>, ANN <dbl>

Research Questions

Which cities in the dataset have the highest annual record temperatures?

top_cities <- raw %>%
  arrange(desc(ANN)) %>%
  select(station_city, ANN) %>%
  head(10)

top_cities
## # A tibble: 10 × 2
##    station_city             ANN
##    <chr>                  <dbl>
##  1 23183PHOENIX,AZ          122
##  2 24257REDDING,CA          118
##  3 23160TUCSON,AZ           117
##  4 24155PENDLETON,OR        117
##  5 24232SALEM,OR            117
##  6 13984CONCORDIA,KS        116
##  7 24229PORTLAND,OR         116
##  8 23023MIDLAND-ODESSA,TX   116
##  9 23155BAKERSFIELD,CA      115
## 10 23232SACRAMENTO,CA       115

Which month tends to have the highest record temperatures across U.S. cities?

monthly_avg <- raw %>%
  summarise(across(JAN:DEC, \(x) mean(x, na.rm = TRUE)))

monthly_avg
## # A tibble: 1 × 12
##     JAN   FEB   MAR   APR   MAY   JUN   JUL   AUG   SEP   OCT   NOV   DEC
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1  72.1  76.1  83.7  90.7  96.3  103.  104.  102.  99.4  91.5  80.4  73.7

How do record high temperatures vary between colder regions (e.g., Alaska) and warmer regions (e.g., southern states)?

region_compare <- raw %>%
  mutate(region = ifelse(grepl(",AK", station_city), "Alaska", "Other")) %>%
  group_by(region) %>%
  summarise(
    avg_record_high = mean(ANN, na.rm = TRUE),
    max_record_high = max(ANN, na.rm = TRUE),
    min_record_high = min(ANN, na.rm = TRUE)
  )

region_compare
## # A tibble: 2 × 4
##   region avg_record_high max_record_high min_record_high
##   <chr>            <dbl>           <dbl>           <dbl>
## 1 Alaska            89.6              97              79
## 2 Other            106.              122              91

What is the overall distribution of annual record high temperatures across all cities in the dataset?

glimpse(raw)
## Rows: 192
## Columns: 15
## $ station_city <chr> "13876BIRMINGHAM,AL", "03856HUNTSVILLE,AL", "13894MOBILE,…
## $ period       <chr> "193001-202312", "195809-202312", "194801-202312", "19480…
## $ JAN          <dbl> 81, 79, 84, 83, 50, 61, 36, 49, 42, 52, 52, 57, 60, 54, 4…
## $ FEB          <dbl> 83, 83, 85, 86, 49, 65, 36, 51, 40, 50, 46, 53, 57, 56, 4…
## $ MAR          <dbl> 89, 88, 89, 90, 53, 65, 34, 53, 49, 56, 54, 54, 61, 57, 4…
## $ APR          <dbl> 92, 92, 94, 94, 69, 82, 42, 63, 66, 76, 70, 65, 74, 69, 4…
## $ MAY          <dbl> 99, 96, 100, 98, 77, 88, 47, 80, 86, 90, 85, 72, 80, 80, …
## $ JUN          <dbl> 106, 106, 103, 105, 85, 93, 73, 90, 92, 96, 90, 80, 86, 8…
## $ JUL          <dbl> 107, 105, 104, 105, 90, 90, 79, 89, 93, 94, 97, 81, 90, 8…
## $ AUG          <dbl> 105, 105, 106, 106, 82, 90, 76, 87, 88, 93, 88, 78, 84, 8…
## $ SEP          <dbl> 102, 101, 100, 103, 73, 82, 62, 76, 79, 84, 76, 69, 78, 7…
## $ OCT          <dbl> 101, 100, 98, 102, 64, 71, 44, 65, 59, 72, 69, 64, 63, 62…
## $ NOV          <dbl> 88, 88, 88, 91, 54, 67, 39, 60, 45, 54, 48, 58, 56, 59, 4…
## $ DEC          <dbl> 80, 79, 85, 85, 51, 62, 40, 49, 38, 58, 49, 52, 54, 65, 3…
## $ ANN          <dbl> 107, 106, 106, 106, 90, 93, 79, 90, 93, 96, 97, 81, 90, 8…
summary(raw$ANN)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    79.0   101.0   105.0   104.8   109.0   122.0