setwd("~/Downloads")
host_city <- read_csv("host_city_locations.csv")
## Rows: 52 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Season, City
## dbl (3): Year, lon, lat
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
athlete_events <- read_csv("athlete_events.csv")
## Rows: 271116 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): Name, Sex, Team, NOC, Games, Season, City, Sport, Event, Medal
## dbl (5): ID, Age, Height, Weight, Year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
olympic_con <- dbConnect(RSQLite::SQLite(), "olympics.db")
dbWriteTable(olympic_con, "athlete_events", athlete_events, overwrite = TRUE)
dbWriteTable(olympic_con, "host_city_locations", host_city, overwrite = TRUE)
QUESTION 1: How many records are in athlete_events?
SELECT COUNT (*) AS records_num
FROM athlete_events;
| records_num |
|---|
| 271116 |
QUESTION 2: How many different athletes competed in Olympics held in London?
SELECT COUNT(DISTINCT Name) AS athletes_london FROM athlete_events
WHERE City = "London";
| athletes_london |
|---|
| 16924 |
QUESTION 3: Which Olympics host city is furthest north?
SELECT City FROM host_city_locations
ORDER BY CAST(lat AS REAL) DESC
LIMIT 1;
| City |
|---|
| Lillehammer |
QUESTION 4: List of host cities (no duplicates)
SELECT DISTINCT City FROM host_city_locations
| City |
|---|
| Athina |
| Paris |
| St. Louis |
| London |
| Stockholm |
| Antwerpen |
| Chamonix |
| Amsterdam |
| Sankt Moritz |
| Los Angeles |
host_cities <- dbGetQuery(olympic_con, "
SELECT DISTINCT City FROM host_city_locations")
write.csv(host_cities, "host_cities_JuliaMC.csv")
dbDisconnect(olympic_con)