load data
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
weather_data <- read.csv("weather_data.csv")
glimpse(weather_data)
## Rows: 1,000,000
## Columns: 6
## $ Location <chr> "San Diego", "San Diego", "San Diego", "Philadelphia"…
## $ Date_Time <chr> "2024-01-14 21:12:46", "2024-05-17 15:22:10", "2024-0…
## $ Temperature_C <dbl> 10.6830011, 8.7341398, 11.6324363, -8.6289759, 39.808…
## $ Humidity_pct <dbl> 41.19575, 58.31911, 38.82018, 54.07447, 72.89991, 49.…
## $ Precipitation_mm <dbl> 4.0201187, 9.1116234, 4.6075114, 3.1837197, 9.5982821…
## $ Wind_Speed_kmh <dbl> 8.233540, 27.715161, 28.732951, 26.367303, 29.898622,…
reorder columns
weather_data4 <- weather_data3 [c("Location", "Date_Time", "Temp_f", "Precipitation_in", "Wind_Speed_mph", "Humidity_pct")]