data(cars)
median(cars$speed)
## [1] 15
library(jsonlite)
url <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=BTC&tsym=USD&limit=100"
resp <- fromJSON(url)
df <- resp$Data$Data
max_close <- max(df$close, na.rm = TRUE)
max_close
## [1] 96945.09
file.exists("record_highs_2023.txt")
## [1] TRUE
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
lines <- readLines("record_highs_2023.txt")[-1]
lines <- lines[grepl("^\\d", lines)]
lines <- str_squish(lines)
n_fields <- sapply(strsplit(lines, " "), length)
lines <- lines[n_fields == 15]
raw <- read_table(
I(lines),
col_names = c("station_city","period","JAN","FEB","MAR","APR","MAY","JUN",
"JUL","AUG","SEP","OCT","NOV","DEC","ANN")
) %>%
mutate(across(JAN:ANN, as.numeric))
head(raw)
## # A tibble: 6 × 15
## station_city period JAN FEB MAR APR MAY JUN JUL AUG SEP
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 13876BIRMINGHAM,… 19300… 81 83 89 92 99 106 107 105 102
## 2 03856HUNTSVILLE,… 19580… 79 83 88 92 96 106 105 105 101
## 3 13894MOBILE,AL 19480… 84 85 89 94 100 103 104 106 100
## 4 13895MONTGOMERY,… 19480… 83 86 90 94 98 105 105 106 103
## 5 26451ANCHORAGE,AK 19520… 50 49 53 69 77 85 90 82 73
## 6 25308ANNETTE,AK 19410… 61 65 65 82 88 93 90 90 82
## # ℹ 4 more variables: OCT <dbl>, NOV <dbl>, DEC <dbl>, ANN <dbl>
top_cities <- raw %>%
arrange(desc(ANN)) %>%
select(station_city, ANN) %>%
head(10)
top_cities
## # A tibble: 10 × 2
## station_city ANN
## <chr> <dbl>
## 1 23183PHOENIX,AZ 122
## 2 24257REDDING,CA 118
## 3 23160TUCSON,AZ 117
## 4 24155PENDLETON,OR 117
## 5 24232SALEM,OR 117
## 6 13984CONCORDIA,KS 116
## 7 24229PORTLAND,OR 116
## 8 23023MIDLAND-ODESSA,TX 116
## 9 23155BAKERSFIELD,CA 115
## 10 23232SACRAMENTO,CA 115
monthly_avg <- raw %>%
summarise(across(JAN:DEC, \(x) mean(x, na.rm = TRUE)))
monthly_avg
## # A tibble: 1 × 12
## JAN FEB MAR APR MAY JUN JUL AUG SEP OCT NOV DEC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 72.1 76.1 83.7 90.7 96.3 103. 104. 102. 99.4 91.5 80.4 73.7
region_compare <- raw %>%
mutate(region = ifelse(grepl(",AK", station_city), "Alaska", "Other")) %>%
group_by(region) %>%
summarise(
avg_record_high = mean(ANN, na.rm = TRUE),
max_record_high = max(ANN, na.rm = TRUE),
min_record_high = min(ANN, na.rm = TRUE)
)
region_compare
## # A tibble: 2 × 4
## region avg_record_high max_record_high min_record_high
## <chr> <dbl> <dbl> <dbl>
## 1 Alaska 89.6 97 79
## 2 Other 106. 122 91
glimpse(raw)
## Rows: 192
## Columns: 15
## $ station_city <chr> "13876BIRMINGHAM,AL", "03856HUNTSVILLE,AL", "13894MOBILE,…
## $ period <chr> "193001-202312", "195809-202312", "194801-202312", "19480…
## $ JAN <dbl> 81, 79, 84, 83, 50, 61, 36, 49, 42, 52, 52, 57, 60, 54, 4…
## $ FEB <dbl> 83, 83, 85, 86, 49, 65, 36, 51, 40, 50, 46, 53, 57, 56, 4…
## $ MAR <dbl> 89, 88, 89, 90, 53, 65, 34, 53, 49, 56, 54, 54, 61, 57, 4…
## $ APR <dbl> 92, 92, 94, 94, 69, 82, 42, 63, 66, 76, 70, 65, 74, 69, 4…
## $ MAY <dbl> 99, 96, 100, 98, 77, 88, 47, 80, 86, 90, 85, 72, 80, 80, …
## $ JUN <dbl> 106, 106, 103, 105, 85, 93, 73, 90, 92, 96, 90, 80, 86, 8…
## $ JUL <dbl> 107, 105, 104, 105, 90, 90, 79, 89, 93, 94, 97, 81, 90, 8…
## $ AUG <dbl> 105, 105, 106, 106, 82, 90, 76, 87, 88, 93, 88, 78, 84, 8…
## $ SEP <dbl> 102, 101, 100, 103, 73, 82, 62, 76, 79, 84, 76, 69, 78, 7…
## $ OCT <dbl> 101, 100, 98, 102, 64, 71, 44, 65, 59, 72, 69, 64, 63, 62…
## $ NOV <dbl> 88, 88, 88, 91, 54, 67, 39, 60, 45, 54, 48, 58, 56, 59, 4…
## $ DEC <dbl> 80, 79, 85, 85, 51, 62, 40, 49, 38, 58, 49, 52, 54, 65, 3…
## $ ANN <dbl> 107, 106, 106, 106, 90, 93, 79, 90, 93, 96, 97, 81, 90, 8…
summary(raw$ANN)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 79.0 101.0 105.0 104.8 109.0 122.0