#load libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(odbc)
## Warning: package 'odbc' was built under R version 4.5.2
library(DBI)
## Warning: package 'DBI' was built under R version 4.5.2
library(RSQLite)
## Warning: package 'RSQLite' was built under R version 4.5.2
#getting datasets from github
athletes_events <- readr::read_csv('https://raw.githubusercontent.com/rgriff23/Olympic_history/refs/heads/master/data/athlete_events.csv')
## Rows: 271116 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): Name, Sex, Team, NOC, Games, Season, City, Sport, Event, Medal
## dbl (5): ID, Age, Height, Weight, Year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
host_city_locations <- readr::read_csv('https://raw.githubusercontent.com/rgriff23/Olympic_history/refs/heads/master/data/host_city_locations.csv')
## Rows: 52 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Season, City
## dbl (3): Year, lon, lat
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#store datasets to make a database
olympics.db <- dbConnect(RSQLite::SQLite(), ":memory:")
copy_to(olympics.db, athletes_events)
copy_to(olympics.db, host_city_locations)
#query the number of entries in athletes_events
dbGetQuery(olympics.db, '
SELECT COUNT(*)
FROM athletes_events
')
## COUNT(*)
## 1 271116
#query the number of unique athletes from london olympics
dbGetQuery(olympics.db, '
SELECT COUNT(DISTINCT Name)
FROM athletes_events
WHERE City = "London"
')
## COUNT(DISTINCT Name)
## 1 16924
# query the furthest north olympic venue
dbGetQuery(olympics.db, '
SELECT City
FROM host_city_locations
ORDER BY CAST(lat AS NUMERIC) DESC
LIMIT 1
')
## City
## 1 Lillehammer
#query list of host cities
dbGetQuery(olympics.db, '
SELECT DISTINCT City
FROM host_city_locations
')
## City
## 1 Athina
## 2 Paris
## 3 St. Louis
## 4 London
## 5 Stockholm
## 6 Antwerpen
## 7 Chamonix
## 8 Amsterdam
## 9 Sankt Moritz
## 10 Los Angeles
## 11 Lake Placid
## 12 Berlin
## 13 Garmisch-Partenkirchen
## 14 Helsinki
## 15 Oslo
## 16 Cortina d'Ampezzo
## 17 Melbourne
## 18 Squaw Valley
## 19 Roma
## 20 Innsbruck
## 21 Tokyo
## 22 Mexico City
## 23 Grenoble
## 24 Munich
## 25 Sapporo
## 26 Montreal
## 27 Moskva
## 28 Sarajevo
## 29 Calgary
## 30 Seoul
## 31 Barcelona
## 32 Albertville
## 33 Lillehammer
## 34 Atlanta
## 35 Nagano
## 36 Sydney
## 37 Salt Lake City
## 38 Torino
## 39 Beijing
## 40 Vancouver
## 41 Sochi
## 42 Rio de Janeiro
#disconect
dbDisconnect(olympics.db)