W2
We focused on weather data from the KSANmpia Airport in our analysis. I would like you to do a small analysis of data from the San Diego International Airport. Get the data from NOAAA and use it to recreate any five graphs, your choice, from our analysis. Don’t forget to clean the data. Submit your analysis as a link to an RPubs document. ## Setup
library(tidyverse)
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'pillar'
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'tibble'
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'hms'
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.2 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readr)
library(ggplot2)
Import into R Use readr and change the type of the Date column to “character”.
**A custom file was received from NOAA since the original didn’t have PRCP.
KSAN_airport <- read_csv("2941025.csv", col_types = cols(DATE = col_character()))
glimpse(KSAN_airport)
## Rows: 29,488
## Columns: 9
## $ STATION <chr> "USW00023188", "USW00023188", "USW00023188", "USW00023188", "U~
## $ NAME <chr> "SAN DIEGO INTERNATIONAL AIRPORT, CA US", "SAN DIEGO INTERNATI~
## $ DATE <chr> "1941-05-13", "1941-05-14", "1941-05-15", "1941-05-16", "1941-~
## $ PRCP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ SNOW <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ SNWD <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ TAVG <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
## $ TMAX <dbl> 74, 73, 74, 74, 70, 68, 81, 84, 85, 75, 75, 74, 86, 80, 75, 73~
## $ TMIN <dbl> 59, 60, 58, 61, 60, 59, 54, 56, 58, 60, 63, 61, 60, 60, 58, 62~
KSAN_airport$DATE = as.Date(KSAN_airport$DATE)
glimpse(KSAN_airport)
## Rows: 29,488
## Columns: 9
## $ STATION <chr> "USW00023188", "USW00023188", "USW00023188", "USW00023188", "U~
## $ NAME <chr> "SAN DIEGO INTERNATIONAL AIRPORT, CA US", "SAN DIEGO INTERNATI~
## $ DATE <date> 1941-05-13, 1941-05-14, 1941-05-15, 1941-05-16, 1941-05-17, 1~
## $ PRCP <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ SNOW <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ SNWD <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ TAVG <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
## $ TMAX <dbl> 74, 73, 74, 74, 70, 68, 81, 84, 85, 75, 75, 74, 86, 80, 75, 73~
## $ TMIN <dbl> 59, 60, 58, 61, 60, 59, 54, 56, 58, 60, 63, 61, 60, 60, 58, 62~
summary(KSAN_airport)
## STATION NAME DATE PRCP
## Length:29488 Length:29488 Min. :1941-05-13 Min. :0.00000
## Class :character Class :character 1st Qu.:1961-07-21 1st Qu.:0.00000
## Mode :character Mode :character Median :1981-09-26 Median :0.00000
## Mean :1981-09-26 Mean :0.02638
## 3rd Qu.:2001-12-02 3rd Qu.:0.00000
## Max. :2022-02-07 Max. :2.70000
##
## SNOW SNWD TAVG TMAX TMIN
## Min. :0 Min. :0 Min. : 0.00 Min. : 46.0 Min. :29.00
## 1st Qu.:0 1st Qu.:0 1st Qu.:60.00 1st Qu.: 66.0 1st Qu.:52.00
## Median :0 Median :0 Median :64.00 Median : 70.0 Median :58.00
## Mean :0 Mean :0 Mean :64.56 Mean : 70.7 Mean :57.28
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:69.00 3rd Qu.: 75.0 3rd Qu.:63.00
## Max. :0 Max. :0 Max. :87.00 Max. :111.0 Max. :78.00
## NA's :8288 NA's :8349 NA's :23586 NA's :1 NA's :1
KSAN_airport %>% filter(is.na(TMAX) |
is.na(TMIN) |
is.na(PRCP))
KSAN_airport = KSAN_airport %>% drop_na()
summary(KSAN_airport)
## STATION NAME DATE PRCP
## Length:790 Length:790 Min. :1998-04-01 Min. :0.00000
## Class :character Class :character 1st Qu.:1998-10-15 1st Qu.:0.00000
## Mode :character Mode :character Median :1999-05-01 Median :0.00000
## Mean :1999-05-01 Mean :0.01791
## 3rd Qu.:1999-11-15 3rd Qu.:0.00000
## Max. :2000-05-31 Max. :1.59000
## SNOW SNWD TAVG TMAX TMIN
## Min. :0 Min. :0 Min. :50.00 Min. :55.00 Min. :39.00
## 1st Qu.:0 1st Qu.:0 1st Qu.:59.00 1st Qu.:64.00 1st Qu.:52.00
## Median :0 Median :0 Median :62.00 Median :68.00 Median :57.00
## Mean :0 Mean :0 Mean :62.93 Mean :68.51 Mean :56.85
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:67.00 3rd Qu.:72.00 3rd Qu.:62.00
## Max. :0 Max. :0 Max. :82.00 Max. :92.00 Max. :72.00
Graphics Gets density with rug plots for TMAX and TMIN.
KSAN_airport %>%
ggplot(aes(x = TMAX)) +
geom_density() +
geom_rug() +
ggtitle("TMAX")
KSAN_airport %>%
ggplot(aes(x = TMIN)) +
geom_density() +
geom_rug() +
ggtitle("TMIN")
KSAN_airport %>%
ggplot(aes(x = PRCP)) +
geom_density() +
geom_rug() +
ggtitle("PRCP")
TIME SERIES LINE
KSAN_airport %>%
ggplot(aes(x = TMAX, y=DATE)) +
geom_line() +
ggtitle("TMAX")
KSAN_airport %>%
ggplot(aes(x = TMIN, y=DATE)) +
geom_line() +
ggtitle("TMIN")
KSAN_airport %>%
ggplot(aes(x = PRCP, y=DATE)) +
geom_line() +
ggtitle("PRCP")
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
KSAN_airport = KSAN_airport %>%
mutate(yr = factor(year(DATE)),
mo = factor(month(DATE)),
dy = factor(day(DATE)))
glimpse(KSAN_airport)
## Rows: 790
## Columns: 12
## $ STATION <chr> "USW00023188", "USW00023188", "USW00023188", "USW00023188", "U~
## $ NAME <chr> "SAN DIEGO INTERNATIONAL AIRPORT, CA US", "SAN DIEGO INTERNATI~
## $ DATE <date> 1998-04-01, 1998-04-02, 1998-04-03, 1998-04-04, 1998-04-05, 1~
## $ PRCP <dbl> 0.39, 0.00, 0.00, 0.00, 0.00, 0.01, 0.04, 0.00, 0.00, 0.00, 0.~
## $ SNOW <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ SNWD <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ TAVG <dbl> 53, 57, 58, 57, 57, 59, 59, 57, 59, 59, 60, 58, 58, 57, 56, 57~
## $ TMAX <dbl> 58, 63, 64, 63, 64, 63, 63, 64, 66, 65, 63, 62, 62, 61, 61, 64~
## $ TMIN <dbl> 48, 51, 52, 51, 50, 55, 54, 50, 52, 53, 56, 54, 53, 53, 51, 49~
## $ yr <fct> 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 19~
## $ mo <fct> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,~
## $ dy <fct> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,~
Summary
summary(KSAN_airport)
## STATION NAME DATE PRCP
## Length:790 Length:790 Min. :1998-04-01 Min. :0.00000
## Class :character Class :character 1st Qu.:1998-10-15 1st Qu.:0.00000
## Mode :character Mode :character Median :1999-05-01 Median :0.00000
## Mean :1999-05-01 Mean :0.01791
## 3rd Qu.:1999-11-15 3rd Qu.:0.00000
## Max. :2000-05-31 Max. :1.59000
##
## SNOW SNWD TAVG TMAX TMIN
## Min. :0 Min. :0 Min. :50.00 Min. :55.00 Min. :39.00
## 1st Qu.:0 1st Qu.:0 1st Qu.:59.00 1st Qu.:64.00 1st Qu.:52.00
## Median :0 Median :0 Median :62.00 Median :68.00 Median :57.00
## Mean :0 Mean :0 Mean :62.93 Mean :68.51 Mean :56.85
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:67.00 3rd Qu.:72.00 3rd Qu.:62.00
## Max. :0 Max. :0 Max. :82.00 Max. :92.00 Max. :72.00
##
## yr mo dy
## 1998:275 5 : 93 1 : 26
## 1999:363 4 : 90 2 : 26
## 2000:152 1 : 62 3 : 26
## 3 : 62 6 : 26
## 7 : 62 7 : 26
## 8 : 62 8 : 26
## (Other):359 (Other):634
Save the File. You will be able to get the data without rerunning this.
save(KSAN_airport, file = "KSAN_airport.Rdata")