library(DBI)
library(RSQLite)
library(readr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ purrr 1.2.1
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(odbc)
athlete_events <- read_csv("~/Library/CloudStorage/OneDrive-montgomerycollege.edu/DATA 205 - Mais Alraee/HW 6/athlete_events.csv")
## Rows: 271116 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): Name, Sex, Team, NOC, Games, Season, City, Sport, Event, Medal
## dbl (5): ID, Age, Height, Weight, Year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
host_city_locations <- read_csv("~/Library/CloudStorage/OneDrive-montgomerycollege.edu/DATA 205 - Mais Alraee/HW 6/host_city_locations.csv")
## Rows: 52 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Season, City
## dbl (3): Year, lon, lat
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
con <- dbConnect(RSQLite::SQLite(), "olympics.db")
copy_to(con, athlete_events, overwrite = TRUE)
copy_to(con, host_city_locations, overwrite = TRUE)
dbListTables(con)
## [1] "athlete_events" "host_city_locations" "sqlite_stat1"
## [4] "sqlite_stat4"
Question 1: How many records are in athlete_events?
SELECT COUNT(*) AS total_records_athlete_events
FROM athlete_events;
| total_records_athlete_events |
|---|
| 271116 |
Question 2: How many different athletes competed in Olympics held in London?
SELECT COUNT(DISTINCT ID) AS athletes
FROM athlete_events
WHERE City = 'London';
| athletes |
|---|
| 16941 |
Question 3: Which Olympics host city is furthest north?
SELECT city, lat
FROM host_city_locations
WHERE CAST(lat AS REAL) = (SELECT MAX(CAST(lat AS REAL))
FROM host_city_locations)
| City | lat |
|---|---|
| Lillehammer | 61.11527 |
Question 4: What is the list of host cities (with no duplicates)?
SELECT DISTINCT city
FROM host_city_locations
ORDER BY city;
| City |
|---|
| Albertville |
| Amsterdam |
| Antwerpen |
| Athina |
| Atlanta |
| Barcelona |
| Beijing |
| Berlin |
| Calgary |
| Chamonix |
host_cities <- dbGetQuery(con, "
SELECT DISTINCT City
FROM host_city_locations
ORDER BY City;
")
write_csv(host_cities, "host_cities_JacquelineMcCarty.csv")
dbDisconnect(con)