#load libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(odbc)
## Warning: package 'odbc' was built under R version 4.5.2
library(DBI)
## Warning: package 'DBI' was built under R version 4.5.2
library(RSQLite)
## Warning: package 'RSQLite' was built under R version 4.5.2
#getting datasets from github
athletes_events <- readr::read_csv('https://raw.githubusercontent.com/rgriff23/Olympic_history/refs/heads/master/data/athlete_events.csv')
## Rows: 271116 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): Name, Sex, Team, NOC, Games, Season, City, Sport, Event, Medal
## dbl  (5): ID, Age, Height, Weight, Year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
host_city_locations <- readr::read_csv('https://raw.githubusercontent.com/rgriff23/Olympic_history/refs/heads/master/data/host_city_locations.csv')
## Rows: 52 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Season, City
## dbl (3): Year, lon, lat
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#store datasets to make a database
olympics.db <- dbConnect(RSQLite::SQLite(), ":memory:")
copy_to(olympics.db, athletes_events)
copy_to(olympics.db, host_city_locations)
#query the number of entries in athletes_events
dbGetQuery(olympics.db, '
           SELECT COUNT(*)
           FROM athletes_events
           ')
##   COUNT(*)
## 1   271116
#query the number of unique athletes from london olympics
dbGetQuery(olympics.db, '
           SELECT COUNT(DISTINCT Name)
           FROM athletes_events
           WHERE City = "London"
           ')
##   COUNT(DISTINCT Name)
## 1                16924
# query the furthest north olympic venue
dbGetQuery(olympics.db, '
           SELECT City
           FROM host_city_locations
           ORDER BY CAST(lat AS NUMERIC) DESC
           LIMIT 1
           ')
##          City
## 1 Lillehammer
#query list of host cities
dbGetQuery(olympics.db, '
           SELECT DISTINCT City
           FROM host_city_locations
           ')
##                      City
## 1                  Athina
## 2                   Paris
## 3               St. Louis
## 4                  London
## 5               Stockholm
## 6               Antwerpen
## 7                Chamonix
## 8               Amsterdam
## 9            Sankt Moritz
## 10            Los Angeles
## 11            Lake Placid
## 12                 Berlin
## 13 Garmisch-Partenkirchen
## 14               Helsinki
## 15                   Oslo
## 16      Cortina d'Ampezzo
## 17              Melbourne
## 18           Squaw Valley
## 19                   Roma
## 20              Innsbruck
## 21                  Tokyo
## 22            Mexico City
## 23               Grenoble
## 24                 Munich
## 25                Sapporo
## 26               Montreal
## 27                 Moskva
## 28               Sarajevo
## 29                Calgary
## 30                  Seoul
## 31              Barcelona
## 32            Albertville
## 33            Lillehammer
## 34                Atlanta
## 35                 Nagano
## 36                 Sydney
## 37         Salt Lake City
## 38                 Torino
## 39                Beijing
## 40              Vancouver
## 41                  Sochi
## 42         Rio de Janeiro
#disconect
dbDisconnect(olympics.db)