# Load Required Libraries
library(tidyverse) # Includes dplyr and tidyr
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr) # For reading CSV files
# Read the CSV file
weather_data <- read_csv("Weather_Data.csv")
## Rows: 3 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): City, Temp_Jan, Temp_Feb, Temp_Mar, Humid_Jan, Humid_Feb, Humid_Mar
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Print the CSV file
print(weather_data)
## # A tibble: 3 × 7
## City Temp_Jan Temp_Feb Temp_Mar Humid_Jan Humid_Feb Humid_Mar
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 New York 32°F 35°F 42°F 75% 72% 68%
## 2 Los Angeles 58°F 60°F 65°F 65% 63% 60%
## 3 Chicago 28°F 30°F 40°F 80% 78% 75%
# Convert to long format using pivot_longer
weather_tidy <- weather_data %>%
pivot_longer(
cols = -City,
names_to = c("Measure", "Month"),
names_sep = "_",
values_to = "Value"
) %>%
mutate(
Value = as.numeric(str_replace(Value, "°F|%", "")), # Convert to numeric
Month = factor(Month, levels = c("Jan", "Feb", "Mar")) # Order months
)
# Display cleaned data
head(weather_tidy)
## # A tibble: 6 × 4
## City Measure Month Value
## <chr> <chr> <fct> <dbl>
## 1 New York Temp Jan 32
## 2 New York Temp Feb 35
## 3 New York Temp Mar 42
## 4 New York Humid Jan 75
## 5 New York Humid Feb 72
## 6 New York Humid Mar 68
print(weather_tidy)
## # A tibble: 18 × 4
## City Measure Month Value
## <chr> <chr> <fct> <dbl>
## 1 New York Temp Jan 32
## 2 New York Temp Feb 35
## 3 New York Temp Mar 42
## 4 New York Humid Jan 75
## 5 New York Humid Feb 72
## 6 New York Humid Mar 68
## 7 Los Angeles Temp Jan 58
## 8 Los Angeles Temp Feb 60
## 9 Los Angeles Temp Mar 65
## 10 Los Angeles Humid Jan 65
## 11 Los Angeles Humid Feb 63
## 12 Los Angeles Humid Mar 60
## 13 Chicago Temp Jan 28
## 14 Chicago Temp Feb 30
## 15 Chicago Temp Mar 40
## 16 Chicago Humid Jan 80
## 17 Chicago Humid Feb 78
## 18 Chicago Humid Mar 75
#transforms the long-format data back into a wide format using pivot wider
weather_clean <- weather_tidy %>%
pivot_wider(names_from = Measure, values_from = Value)
# Display cleaned data
head(weather_clean)
## # A tibble: 6 × 4
## City Month Temp Humid
## <chr> <fct> <dbl> <dbl>
## 1 New York Jan 32 75
## 2 New York Feb 35 72
## 3 New York Mar 42 68
## 4 Los Angeles Jan 58 65
## 5 Los Angeles Feb 60 63
## 6 Los Angeles Mar 65 60
# Temperature Trends
ggplot(weather_clean, aes(x = Month, y = Temp, group = City, color = City)) +
geom_line(size = 1) +
geom_point(size = 3) +
labs(title = "Temperature Trends (Jan-Mar)", y = "Temperature (°F)") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Humidity Trends
ggplot(weather_clean, aes(x = Month, y = Humid, group = City, color = City)) +
geom_line(size = 1) +
geom_point(size = 3) +
labs(title = "Humidity Trends (Jan-Mar)", y = "Humidity (%)") +
theme_minimal()

# Calculate mean temperature and humidity per city
city_summary <- weather_clean %>%
group_by(City) %>%
summarise(
Avg_Temp = mean(Temp, na.rm = TRUE),
Avg_Humid = mean(Humid, na.rm = TRUE)
)
# Display summary
print(city_summary)
## # A tibble: 3 × 3
## City Avg_Temp Avg_Humid
## <chr> <dbl> <dbl>
## 1 Chicago 32.7 77.7
## 2 Los Angeles 61 62.7
## 3 New York 36.3 71.7
#as we can see Los Angeles is a much warmer city than NY or Chicago. Interestingly, Chicago is a more humid city in the Jan-Mar months, followed by NY and then LA.