This homework creates a SQLite database called olympics.db using two CSV files, then uses SQL queries to answer the assigned questions.

library(DBI)
## Warning: package 'DBI' was built under R version 4.3.3
library(RSQLite)
library(readr)
## Warning: package 'readr' was built under R version 4.3.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Download the datasets and set working directory

athlete_events <- read_csv("athlete_events.csv")
## Rows: 271116 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): Name, Sex, Team, NOC, Games, Season, City, Sport, Event, Medal
## dbl  (5): ID, Age, Height, Weight, Year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
host_city_locations <- read_csv("host_city_locations.csv")
## Rows: 52 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Season, City
## dbl (3): Year, lon, lat
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
con <- dbConnect(SQLite(), "olympics.db")

Write both tables into the database:

#dbWriteTable(con, "athlete_events", athlete_events, overwrite = TRUE)
#dbWriteTable(con, "host_city_locations", host_city_locations, overwrite = TRUE)

Or copy_to as in class activity

copy_to(con, athlete_events, "athlete_events", overwrite = TRUE)
copy_to(con, host_city_locations, "host_city_locations", overwrite = TRUE)

Question 1: How many records are in athlete_events?

SELECT COUNT(*) AS total_records
FROM athlete_events;
1 records
total_records
271116

Question 2: How many different athletes competed in Olympics held in London?


SELECT COUNT(DISTINCT ID) AS different_athletes
FROM athlete_events
WHERE City = 'London';
1 records
different_athletes
16941

Question 3:Which Olympics host city is furthest north?

Method 1: Query and sub-query:

SELECT City, lat
FROM host_city_locations
WHERE CAST(lat AS REAL) = (
  SELECT MAX(CAST(lat AS REAL))
  FROM host_city_locations
);
1 records
City lat
Lillehammer 61.11527

Method 2: two separate queries

SELECT MAX(CAST(lat AS REAL)) AS max_latitude
FROM host_city_locations;
1 records
max_latitude
61.11527
SELECT City, lat
FROM host_city_locations
ORDER BY CAST(lat AS REAL) DESC
LIMIT 1;
1 records
City lat
Lillehammer 61.11527

Question 4: What is the list of host cities with no duplicates?

SELECT DISTINCT City
FROM host_city_locations
ORDER BY City;
Displaying records 1 - 10
City
Albertville
Amsterdam
Antwerpen
Athina
Atlanta
Barcelona
Beijing
Berlin
Calgary
Chamonix
q4 <- "
SELECT DISTINCT City
FROM host_city_locations
ORDER BY City;
"

answer4 <- dbGetQuery(con, q4)

write.csv(answer4, "host_cities_MA_in_R.csv", row.names = FALSE)