Setup
#install.packages(c("odbc", "DBI", "RSQLite"))
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(odbc)
## Warning: package 'odbc' was built under R version 4.5.2
library(DBI)
library(RSQLite)
## Warning: package 'RSQLite' was built under R version 4.5.2
Load data
athlete_events <- read_csv("athlete_events.csv")
## Rows: 271116 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): Name, Sex, Team, NOC, Games, Season, City, Sport, Event, Medal
## dbl (5): ID, Age, Height, Weight, Year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
host_city_locations <- read_csv("host_city_locations.csv")
## Rows: 52 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Season, City
## dbl (3): Year, lon, lat
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Creating dataset connection
con <- dbConnect(RSQLite::SQLite(), "olympics.db")
copy_to(con, athlete_events)
copy_to(con, host_city_locations)
- How many records are in athlete_events?
dbGetQuery(con, '
SELECT COUNT(*)
FROM athlete_events')
## COUNT(*)
## 1 271116
- How many different athletes competed in Olympics held in
London?
dbGetQuery(con, '
SELECT COUNT(DISTINCT Name)
FROM athlete_events
WHERE City = "London"')
## COUNT(DISTINCT Name)
## 1 16924
- Which Olympics host city is furthest north?
dbGetQuery(con, '
SELECT City
FROM host_city_locations
ORDER BY CAST(Lat AS REAL) DESC
LIMIT 1
')
## City
## 1 Lillehammer
- What is the list of host cities (with no duplicates)?
host_cities <- dbGetQuery(con, '
SELECT DISTINCT City
FROM host_city_locations')
host_cities
## City
## 1 Athina
## 2 Paris
## 3 St. Louis
## 4 London
## 5 Stockholm
## 6 Antwerpen
## 7 Chamonix
## 8 Amsterdam
## 9 Sankt Moritz
## 10 Los Angeles
## 11 Lake Placid
## 12 Berlin
## 13 Garmisch-Partenkirchen
## 14 Helsinki
## 15 Oslo
## 16 Cortina d'Ampezzo
## 17 Melbourne
## 18 Squaw Valley
## 19 Roma
## 20 Innsbruck
## 21 Tokyo
## 22 Mexico City
## 23 Grenoble
## 24 Munich
## 25 Sapporo
## 26 Montreal
## 27 Moskva
## 28 Sarajevo
## 29 Calgary
## 30 Seoul
## 31 Barcelona
## 32 Albertville
## 33 Lillehammer
## 34 Atlanta
## 35 Nagano
## 36 Sydney
## 37 Salt Lake City
## 38 Torino
## 39 Beijing
## 40 Vancouver
## 41 Sochi
## 42 Rio de Janeiro
write.csv(host_cities, "host_cities_YourName.csv", row.names = FALSE)
dbDisconnect(con)