DATA205_Spring2026_Homework6_JUrquilla

Author

Jhonathan Urquilla

library(tidyverse)  # For read_csv() and write_csv()
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Step 1: Set the working directory

setwd("C:/Users/ubjho/Downloads") 

Step 2: Read CSV files from path selected using read_csv()

athlete_events <- read_csv("athlete_events.csv")
Rows: 271116 Columns: 15
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (10): Name, Sex, Team, NOC, Games, Season, City, Sport, Event, Medal
dbl  (5): ID, Age, Height, Weight, Year

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
host_city_locations <- read_csv("host_city_locations.csv")
Rows: 52 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): Season, City
dbl (3): Year, lon, lat

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(athlete_events)
# A tibble: 6 × 15
     ID Name      Sex     Age Height Weight Team  NOC   Games  Year Season City 
  <dbl> <chr>     <chr> <dbl>  <dbl>  <dbl> <chr> <chr> <chr> <dbl> <chr>  <chr>
1     1 A Dijiang M        24    180     80 China CHN   1992…  1992 Summer Barc…
2     2 A Lamusi  M        23    170     60 China CHN   2012…  2012 Summer Lond…
3     3 Gunnar N… M        24     NA     NA Denm… DEN   1920…  1920 Summer Antw…
4     4 Edgar Li… M        34     NA     NA Denm… DEN   1900…  1900 Summer Paris
5     5 Christin… F        21    185     82 Neth… NED   1988…  1988 Winter Calg…
6     5 Christin… F        21    185     82 Neth… NED   1988…  1988 Winter Calg…
# ℹ 3 more variables: Sport <chr>, Event <chr>, Medal <chr>
head(host_city_locations)
# A tibble: 6 × 5
   Year Season City          lon   lat
  <dbl> <chr>  <chr>       <dbl> <dbl>
1  1896 Summer Athina     23.7    38.0
2  1900 Summer Paris       2.35   48.9
3  1904 Summer St. Louis -90.2    38.6
4  1906 Summer Athina     23.7    38.0
5  1908 Summer London     -0.128  51.5
6  1912 Summer Stockholm  18.1    59.3

Step 3: Answer the questions using base R

# Question 1: How many records are in athlete_events?
total_records <- nrow(athlete_events)
print("Answer to Question 1:")
[1] "Answer to Question 1:"
total_records
[1] 271116
# Question 2: How many different athletes competed in Olympics held in London?
athletes <- length(unique(athlete_events$Name[athlete_events$City == "London"]))
print("Answer to Question 2:")
[1] "Answer to Question 2:"
athletes
[1] 16924
# Question 3: Which Olympics host city is furthest north?
# Convert Latitude to numeric, then find the city with the maximum latitude
host_city_locations$lat <- as.numeric(host_city_locations$lat)
furthest_city <- host_city_locations[which.max(host_city_locations$lat), "City"]
head("Answer to Question 3:")
[1] "Answer to Question 3:"
head(furthest_city)
# A tibble: 1 × 1
  City       
  <chr>      
1 Lillehammer
# Question 4: What is the list of host cities (with no duplicates)?
host_city <- distinct(host_city_locations, City, .keep_all = TRUE)  # Remove duplicates from host_city_locations based on 'City'
host_city <- select(host_city_locations, City)  # Select only 'City' column
print("Answer to Question 4:")
[1] "Answer to Question 4:"
host_city
# A tibble: 52 × 1
   City     
   <chr>    
 1 Athina   
 2 Paris    
 3 St. Louis
 4 Athina   
 5 London   
 6 Stockholm
 7 Antwerpen
 8 Paris    
 9 Chamonix 
10 Amsterdam
# ℹ 42 more rows
# Step 4: Export the list of unique host cities to a CSV file
write_csv(data.frame(City = host_city), "host_cities_JUrquilla.csv")