Setup

#install.packages(c("odbc", "DBI", "RSQLite"))
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(odbc)
## Warning: package 'odbc' was built under R version 4.5.2
library(DBI)
library(RSQLite)
## Warning: package 'RSQLite' was built under R version 4.5.2

Load data

athlete_events <- read_csv("athlete_events.csv")
## Rows: 271116 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): Name, Sex, Team, NOC, Games, Season, City, Sport, Event, Medal
## dbl  (5): ID, Age, Height, Weight, Year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
host_city_locations <- read_csv("host_city_locations.csv")
## Rows: 52 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Season, City
## dbl (3): Year, lon, lat
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Creating dataset connection

con <- dbConnect(RSQLite::SQLite(), "olympics.db")
copy_to(con, athlete_events)
copy_to(con, host_city_locations)
  1. How many records are in athlete_events?
dbGetQuery(con, '
SELECT COUNT(*) 
FROM athlete_events')
##   COUNT(*)
## 1   271116
  1. How many different athletes competed in Olympics held in London?
dbGetQuery(con, '
SELECT COUNT(DISTINCT Name)
FROM athlete_events
WHERE City = "London"')
##   COUNT(DISTINCT Name)
## 1                16924
  1. Which Olympics host city is furthest north?
dbGetQuery(con, '
SELECT City
FROM host_city_locations
ORDER BY CAST(Lat AS REAL) DESC
LIMIT 1
')
##          City
## 1 Lillehammer
  1. What is the list of host cities (with no duplicates)?
host_cities <- dbGetQuery(con, '
SELECT DISTINCT City
FROM host_city_locations')

host_cities
##                      City
## 1                  Athina
## 2                   Paris
## 3               St. Louis
## 4                  London
## 5               Stockholm
## 6               Antwerpen
## 7                Chamonix
## 8               Amsterdam
## 9            Sankt Moritz
## 10            Los Angeles
## 11            Lake Placid
## 12                 Berlin
## 13 Garmisch-Partenkirchen
## 14               Helsinki
## 15                   Oslo
## 16      Cortina d'Ampezzo
## 17              Melbourne
## 18           Squaw Valley
## 19                   Roma
## 20              Innsbruck
## 21                  Tokyo
## 22            Mexico City
## 23               Grenoble
## 24                 Munich
## 25                Sapporo
## 26               Montreal
## 27                 Moskva
## 28               Sarajevo
## 29                Calgary
## 30                  Seoul
## 31              Barcelona
## 32            Albertville
## 33            Lillehammer
## 34                Atlanta
## 35                 Nagano
## 36                 Sydney
## 37         Salt Lake City
## 38                 Torino
## 39                Beijing
## 40              Vancouver
## 41                  Sochi
## 42         Rio de Janeiro
write.csv(host_cities, "host_cities_YourName.csv", row.names = FALSE)
dbDisconnect(con)