data

load data

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
weather_data <- read.csv("weather_data.csv")
glimpse(weather_data)
## Rows: 1,000,000
## Columns: 6
## $ Location         <chr> "San Diego", "San Diego", "San Diego", "Philadelphia"…
## $ Date_Time        <chr> "2024-01-14 21:12:46", "2024-05-17 15:22:10", "2024-0…
## $ Temperature_C    <dbl> 10.6830011, 8.7341398, 11.6324363, -8.6289759, 39.808…
## $ Humidity_pct     <dbl> 41.19575, 58.31911, 38.82018, 54.07447, 72.89991, 49.…
## $ Precipitation_mm <dbl> 4.0201187, 9.1116234, 4.6075114, 3.1837197, 9.5982821…
## $ Wind_Speed_kmh   <dbl> 8.233540, 27.715161, 28.732951, 26.367303, 29.898622,…

convert to american units

weather_data2 <- weather_data %>% 
  mutate(Temp_f = (Temperature_C * 9/5) + 32) %>%
  mutate(Wind_Speed_mph = (Wind_Speed_kmh/1.609)) %>%
  mutate(Precipitation_in = (Precipitation_mm/25.4))

delete columns

weather_data3 <- weather_data2 %>%
  select(-c(Temperature_C, Wind_Speed_kmh, Precipitation_mm))

reorder columns

weather_data4 <- weather_data3 [c("Location", "Date_Time", "Temp_f", "Precipitation_in", "Wind_Speed_mph", "Humidity_pct")]

investigative questions

#Does higher humidity increase the likelihood or amount of precipitation? 
#How does wind speed affect temp?
#Do cooler temps correlate to less precipitation?
#Do temp drops occur more often before or after precipitation events?
#Which variable (temp, humidity, or wind speed) best predicts rainfall events?