# Load Required Libraries
library(tidyverse)  # Includes dplyr and tidyr
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)      # For reading CSV files

# Read the CSV file
weather_data <- read_csv("Weather_Data.csv")
## Rows: 3 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): City, Temp_Jan, Temp_Feb, Temp_Mar, Humid_Jan, Humid_Feb, Humid_Mar
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Print the CSV file
print(weather_data)
## # A tibble: 3 × 7
##   City        Temp_Jan Temp_Feb Temp_Mar Humid_Jan Humid_Feb Humid_Mar
##   <chr>       <chr>    <chr>    <chr>    <chr>     <chr>     <chr>    
## 1 New York    32°F     35°F     42°F     75%       72%       68%      
## 2 Los Angeles 58°F     60°F     65°F     65%       63%       60%      
## 3 Chicago     28°F     30°F     40°F     80%       78%       75%
# Convert to long format using pivot_longer
weather_tidy <- weather_data %>%
  pivot_longer(
    cols = -City, 
    names_to = c("Measure", "Month"),
    names_sep = "_",
    values_to = "Value"
  ) %>%
  mutate(
    Value = as.numeric(str_replace(Value, "°F|%", "")),  # Convert to numeric
    Month = factor(Month, levels = c("Jan", "Feb", "Mar")) # Order months
  )

# Display cleaned data
head(weather_tidy)
## # A tibble: 6 × 4
##   City     Measure Month Value
##   <chr>    <chr>   <fct> <dbl>
## 1 New York Temp    Jan      32
## 2 New York Temp    Feb      35
## 3 New York Temp    Mar      42
## 4 New York Humid   Jan      75
## 5 New York Humid   Feb      72
## 6 New York Humid   Mar      68
print(weather_tidy)
## # A tibble: 18 × 4
##    City        Measure Month Value
##    <chr>       <chr>   <fct> <dbl>
##  1 New York    Temp    Jan      32
##  2 New York    Temp    Feb      35
##  3 New York    Temp    Mar      42
##  4 New York    Humid   Jan      75
##  5 New York    Humid   Feb      72
##  6 New York    Humid   Mar      68
##  7 Los Angeles Temp    Jan      58
##  8 Los Angeles Temp    Feb      60
##  9 Los Angeles Temp    Mar      65
## 10 Los Angeles Humid   Jan      65
## 11 Los Angeles Humid   Feb      63
## 12 Los Angeles Humid   Mar      60
## 13 Chicago     Temp    Jan      28
## 14 Chicago     Temp    Feb      30
## 15 Chicago     Temp    Mar      40
## 16 Chicago     Humid   Jan      80
## 17 Chicago     Humid   Feb      78
## 18 Chicago     Humid   Mar      75
#transforms the long-format data back into a wide format using pivot wider
weather_clean <- weather_tidy %>%
  pivot_wider(names_from = Measure, values_from = Value)

# Display cleaned data
head(weather_clean)
## # A tibble: 6 × 4
##   City        Month  Temp Humid
##   <chr>       <fct> <dbl> <dbl>
## 1 New York    Jan      32    75
## 2 New York    Feb      35    72
## 3 New York    Mar      42    68
## 4 Los Angeles Jan      58    65
## 5 Los Angeles Feb      60    63
## 6 Los Angeles Mar      65    60
# Temperature Trends
ggplot(weather_clean, aes(x = Month, y = Temp, group = City, color = City)) +
  geom_line(size = 1) +
  geom_point(size = 3) +
  labs(title = "Temperature Trends (Jan-Mar)", y = "Temperature (°F)") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Humidity Trends
ggplot(weather_clean, aes(x = Month, y = Humid, group = City, color = City)) +
  geom_line(size = 1) +
  geom_point(size = 3) +
  labs(title = "Humidity Trends (Jan-Mar)", y = "Humidity (%)") +
  theme_minimal()

# Calculate mean temperature and humidity per city
city_summary <- weather_clean %>%
  group_by(City) %>%
  summarise(
    Avg_Temp = mean(Temp, na.rm = TRUE),
    Avg_Humid = mean(Humid, na.rm = TRUE)
  )

# Display summary
print(city_summary)
## # A tibble: 3 × 3
##   City        Avg_Temp Avg_Humid
##   <chr>          <dbl>     <dbl>
## 1 Chicago         32.7      77.7
## 2 Los Angeles     61        62.7
## 3 New York        36.3      71.7
#as we can see Los Angeles is a much warmer city than NY or Chicago. Interestingly, Chicago is a more humid city in the Jan-Mar months, followed by NY and then LA.