# Load libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(odbc)
## Warning: package 'odbc' was built under R version 4.4.3
library(DBI)
## Warning: package 'DBI' was built under R version 4.4.3
library(RSQLite)
## Warning: package 'RSQLite' was built under R version 4.4.3
host_city_locations <- readr::read_csv('https://raw.githubusercontent.com/rgriff23/Olympic_history/refs/heads/master/data/host_city_locations.csv')
## Rows: 52 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Season, City
## dbl (3): Year, lon, lat
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
athlete_events <- readr::read_csv('https://raw.githubusercontent.com/rgriff23/Olympic_history/refs/heads/master/data/athlete_events.csv')
## Rows: 271116 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): Name, Sex, Team, NOC, Games, Season, City, Sport, Event, Medal
## dbl (5): ID, Age, Height, Weight, Year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
olympics.db <- dbConnect(RSQLite::SQLite(), ":memory:")
copy_to(olympics.db, athlete_events)
copy_to(olympics.db, host_city_locations)
- How many records are in athlete events?
SELECT COUNT(*) FROM athlete_events;
There are 271,116 records in athlete events.
- How many different athletes competed in Olympics held in
London?
SELECT COUNT(DISTINCT name) FROM athlete_events WHERE City = 'London';
16,924 different athletes have competed in Olympics held in
London.
- Which Olympics host city is furthest north?
SELECT CAST("lat" AS NUMERIC) FROM host_city_locations
Displaying records 1 - 10
| 37.98381 |
| 48.85661 |
| 38.62700 |
| 37.98381 |
| 51.50735 |
| 59.32932 |
| 51.21945 |
| 48.85661 |
| 45.92370 |
| 52.37022 |
SELECT MAX(lat) as latitude, city FROM host_city_locations
1 records
| 61.11527 |
Lillehammer |
The Northern most Olympics host city is Lillehammer.
- What is the list of host cities (with no duplicates)?
SELECT DISTINCT city FROM host_city_locations
Displaying records 1 - 10
| Athina |
| Paris |
| St. Louis |
| London |
| Stockholm |
| Antwerpen |
| Chamonix |
| Amsterdam |
| Sankt Moritz |
| Los Angeles |
host_cities_distinct <- dbGetQuery(olympics.db, "SELECT DISTINCT city FROM host_city_locations")
write.csv(host_cities_distinct, file="host_cities_Sampson.csv")
dbDisconnect(olympics.db)