W2

We focused on weather data from the KSANmpia Airport in our analysis. I would like you to do a small analysis of data from the San Diego International Airport. Get the data from NOAAA and use it to recreate any five graphs, your choice, from our analysis. Don’t forget to clean the data. Submit your analysis as a link to an RPubs document. ## Setup

library(tidyverse)
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'pillar'
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'tibble'
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'hms'
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.2     v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readr)
library(ggplot2)

Import into R Use readr and change the type of the Date column to “character”.

**A custom file was received from NOAA since the original didn’t have PRCP.

KSAN_airport <- read_csv("2941025.csv", col_types = cols(DATE = col_character()))
glimpse(KSAN_airport)
## Rows: 29,488
## Columns: 9
## $ STATION <chr> "USW00023188", "USW00023188", "USW00023188", "USW00023188", "U~
## $ NAME    <chr> "SAN DIEGO INTERNATIONAL AIRPORT, CA US", "SAN DIEGO INTERNATI~
## $ DATE    <chr> "1941-05-13", "1941-05-14", "1941-05-15", "1941-05-16", "1941-~
## $ PRCP    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ SNOW    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ SNWD    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ TAVG    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
## $ TMAX    <dbl> 74, 73, 74, 74, 70, 68, 81, 84, 85, 75, 75, 74, 86, 80, 75, 73~
## $ TMIN    <dbl> 59, 60, 58, 61, 60, 59, 54, 56, 58, 60, 63, 61, 60, 60, 58, 62~
KSAN_airport$DATE = as.Date(KSAN_airport$DATE)
glimpse(KSAN_airport)
## Rows: 29,488
## Columns: 9
## $ STATION <chr> "USW00023188", "USW00023188", "USW00023188", "USW00023188", "U~
## $ NAME    <chr> "SAN DIEGO INTERNATIONAL AIRPORT, CA US", "SAN DIEGO INTERNATI~
## $ DATE    <date> 1941-05-13, 1941-05-14, 1941-05-15, 1941-05-16, 1941-05-17, 1~
## $ PRCP    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ SNOW    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ SNWD    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ TAVG    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
## $ TMAX    <dbl> 74, 73, 74, 74, 70, 68, 81, 84, 85, 75, 75, 74, 86, 80, 75, 73~
## $ TMIN    <dbl> 59, 60, 58, 61, 60, 59, 54, 56, 58, 60, 63, 61, 60, 60, 58, 62~
summary(KSAN_airport)
##    STATION              NAME                DATE                 PRCP        
##  Length:29488       Length:29488       Min.   :1941-05-13   Min.   :0.00000  
##  Class :character   Class :character   1st Qu.:1961-07-21   1st Qu.:0.00000  
##  Mode  :character   Mode  :character   Median :1981-09-26   Median :0.00000  
##                                        Mean   :1981-09-26   Mean   :0.02638  
##                                        3rd Qu.:2001-12-02   3rd Qu.:0.00000  
##                                        Max.   :2022-02-07   Max.   :2.70000  
##                                                                              
##       SNOW           SNWD           TAVG            TMAX            TMIN      
##  Min.   :0      Min.   :0      Min.   : 0.00   Min.   : 46.0   Min.   :29.00  
##  1st Qu.:0      1st Qu.:0      1st Qu.:60.00   1st Qu.: 66.0   1st Qu.:52.00  
##  Median :0      Median :0      Median :64.00   Median : 70.0   Median :58.00  
##  Mean   :0      Mean   :0      Mean   :64.56   Mean   : 70.7   Mean   :57.28  
##  3rd Qu.:0      3rd Qu.:0      3rd Qu.:69.00   3rd Qu.: 75.0   3rd Qu.:63.00  
##  Max.   :0      Max.   :0      Max.   :87.00   Max.   :111.0   Max.   :78.00  
##  NA's   :8288   NA's   :8349   NA's   :23586   NA's   :1       NA's   :1
KSAN_airport %>% filter(is.na(TMAX) |
                       is.na(TMIN) |
                       is.na(PRCP))
KSAN_airport = KSAN_airport %>% drop_na()
summary(KSAN_airport)
##    STATION              NAME                DATE                 PRCP        
##  Length:790         Length:790         Min.   :1998-04-01   Min.   :0.00000  
##  Class :character   Class :character   1st Qu.:1998-10-15   1st Qu.:0.00000  
##  Mode  :character   Mode  :character   Median :1999-05-01   Median :0.00000  
##                                        Mean   :1999-05-01   Mean   :0.01791  
##                                        3rd Qu.:1999-11-15   3rd Qu.:0.00000  
##                                        Max.   :2000-05-31   Max.   :1.59000  
##       SNOW        SNWD        TAVG            TMAX            TMIN      
##  Min.   :0   Min.   :0   Min.   :50.00   Min.   :55.00   Min.   :39.00  
##  1st Qu.:0   1st Qu.:0   1st Qu.:59.00   1st Qu.:64.00   1st Qu.:52.00  
##  Median :0   Median :0   Median :62.00   Median :68.00   Median :57.00  
##  Mean   :0   Mean   :0   Mean   :62.93   Mean   :68.51   Mean   :56.85  
##  3rd Qu.:0   3rd Qu.:0   3rd Qu.:67.00   3rd Qu.:72.00   3rd Qu.:62.00  
##  Max.   :0   Max.   :0   Max.   :82.00   Max.   :92.00   Max.   :72.00

Graphics Gets density with rug plots for TMAX and TMIN.

KSAN_airport %>% 
  ggplot(aes(x = TMAX)) +
  geom_density() +
  geom_rug() +
  ggtitle("TMAX")

KSAN_airport %>% 
  ggplot(aes(x = TMIN)) +
  geom_density() +
  geom_rug() +
  ggtitle("TMIN")

KSAN_airport %>% 
  ggplot(aes(x = PRCP)) +
  geom_density() +
  geom_rug() +
  ggtitle("PRCP")

TIME SERIES LINE

KSAN_airport %>% 
  ggplot(aes(x = TMAX, y=DATE)) +
  geom_line() +
  ggtitle("TMAX")

KSAN_airport %>% 
  ggplot(aes(x = TMIN, y=DATE)) +
  geom_line() +
  ggtitle("TMIN")

KSAN_airport %>% 
  ggplot(aes(x = PRCP, y=DATE)) +
  geom_line() +
  ggtitle("PRCP")

library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
KSAN_airport  = KSAN_airport %>% 
  mutate(yr = factor(year(DATE)),
         mo = factor(month(DATE)),
         dy = factor(day(DATE)))

glimpse(KSAN_airport)
## Rows: 790
## Columns: 12
## $ STATION <chr> "USW00023188", "USW00023188", "USW00023188", "USW00023188", "U~
## $ NAME    <chr> "SAN DIEGO INTERNATIONAL AIRPORT, CA US", "SAN DIEGO INTERNATI~
## $ DATE    <date> 1998-04-01, 1998-04-02, 1998-04-03, 1998-04-04, 1998-04-05, 1~
## $ PRCP    <dbl> 0.39, 0.00, 0.00, 0.00, 0.00, 0.01, 0.04, 0.00, 0.00, 0.00, 0.~
## $ SNOW    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ SNWD    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ TAVG    <dbl> 53, 57, 58, 57, 57, 59, 59, 57, 59, 59, 60, 58, 58, 57, 56, 57~
## $ TMAX    <dbl> 58, 63, 64, 63, 64, 63, 63, 64, 66, 65, 63, 62, 62, 61, 61, 64~
## $ TMIN    <dbl> 48, 51, 52, 51, 50, 55, 54, 50, 52, 53, 56, 54, 53, 53, 51, 49~
## $ yr      <fct> 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 1998, 19~
## $ mo      <fct> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,~
## $ dy      <fct> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,~

Summary

summary(KSAN_airport)
##    STATION              NAME                DATE                 PRCP        
##  Length:790         Length:790         Min.   :1998-04-01   Min.   :0.00000  
##  Class :character   Class :character   1st Qu.:1998-10-15   1st Qu.:0.00000  
##  Mode  :character   Mode  :character   Median :1999-05-01   Median :0.00000  
##                                        Mean   :1999-05-01   Mean   :0.01791  
##                                        3rd Qu.:1999-11-15   3rd Qu.:0.00000  
##                                        Max.   :2000-05-31   Max.   :1.59000  
##                                                                              
##       SNOW        SNWD        TAVG            TMAX            TMIN      
##  Min.   :0   Min.   :0   Min.   :50.00   Min.   :55.00   Min.   :39.00  
##  1st Qu.:0   1st Qu.:0   1st Qu.:59.00   1st Qu.:64.00   1st Qu.:52.00  
##  Median :0   Median :0   Median :62.00   Median :68.00   Median :57.00  
##  Mean   :0   Mean   :0   Mean   :62.93   Mean   :68.51   Mean   :56.85  
##  3rd Qu.:0   3rd Qu.:0   3rd Qu.:67.00   3rd Qu.:72.00   3rd Qu.:62.00  
##  Max.   :0   Max.   :0   Max.   :82.00   Max.   :92.00   Max.   :72.00  
##                                                                         
##     yr            mo            dy     
##  1998:275   5      : 93   1      : 26  
##  1999:363   4      : 90   2      : 26  
##  2000:152   1      : 62   3      : 26  
##             3      : 62   6      : 26  
##             7      : 62   7      : 26  
##             8      : 62   8      : 26  
##             (Other):359   (Other):634

Save the File. You will be able to get the data without rerunning this.

save(KSAN_airport, file = "KSAN_airport.Rdata")