stat450_hw9_allisongalon

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(ggplot2)
library(maps)
library(mapproj)
library(viridis) # Optional
Loading required package: viridisLite

Attaching package: 'viridis'
The following object is masked from 'package:maps':

    unemp
library(leaflet)
data("storms", package = "dplyr")
janitor::get_dupes
function (dat, ...) 
{
    expr <- rlang::expr(c(...))
    pos <- tidyselect::eval_select(expr, data = dat)
    is_grouped <- dplyr::is_grouped_df(dat)
    if (is_grouped) {
        dat_groups <- dplyr::group_vars(dat)
        dat <- dat %>% dplyr::ungroup()
        if (getOption("get_dupes.grouped_warning", TRUE) & interactive()) {
            message(paste0("Data is grouped by [", paste(dat_groups, 
                collapse = "|"), "]. Note that get_dupes() is not group aware and does not limit duplicate detection to within-groups, but rather checks over the entire data frame. However grouping structure is preserved.\nThis message is shown once per session and may be disabled by setting options(\"get_dupes.grouped_warning\" = FALSE)."))
            options(get_dupes.grouped_warning = FALSE)
        }
    }
    if (rlang::dots_n(...) == 0) {
        var_names <- names(dat)
        nms <- rlang::syms(var_names)
        message("No variable names specified - using all columns.\n")
    }
    else {
        var_names <- names(pos)
        nms <- rlang::syms(var_names)
    }
    dupe_count <- NULL
    dupes <- dat %>% dplyr::add_count(!!!nms, name = "dupe_count") %>% 
        dplyr::filter(dupe_count > 1) %>% dplyr::select(!!!nms, 
        dupe_count, dplyr::everything()) %>% dplyr::arrange(dplyr::desc(dupe_count), 
        !!!nms)
    if (length(var_names) > 10) {
        var_names <- c(var_names[1:9], paste("... and", length(var_names) - 
            9, "other variables"))
    }
    if (nrow(dupes) == 0) {
        message(paste0("No duplicate combinations found of: ", 
            paste(var_names, collapse = ", ")))
    }
    if (is_grouped) 
        dupes <- dupes %>% dplyr::group_by(!!!rlang::syms(dat_groups))
    return(dupes)
}
<bytecode: 0x11e012f98>
<environment: namespace:janitor>
#?get_dupes

Question 1

distinct_storms <- storms |>
  distinct()

distinct_storms
# A tibble: 19,535 × 13
   name   year month   day  hour   lat  long status      category  wind pressure
   <chr> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <fct>          <dbl> <int>    <int>
 1 Amy    1975     6    27     0  27.5 -79   tropical d…       NA    25     1013
 2 Amy    1975     6    27     6  28.5 -79   tropical d…       NA    25     1013
 3 Amy    1975     6    27    12  29.5 -79   tropical d…       NA    25     1013
 4 Amy    1975     6    27    18  30.5 -79   tropical d…       NA    25     1013
 5 Amy    1975     6    28     0  31.5 -78.8 tropical d…       NA    25     1012
 6 Amy    1975     6    28     6  32.4 -78.7 tropical d…       NA    25     1012
 7 Amy    1975     6    28    12  33.3 -78   tropical d…       NA    25     1011
 8 Amy    1975     6    28    18  34   -77   tropical d…       NA    30     1006
 9 Amy    1975     6    29     0  34.4 -75.8 tropical s…       NA    35     1004
10 Amy    1975     6    29     6  34   -74.8 tropical s…       NA    40     1002
# ℹ 19,525 more rows
# ℹ 2 more variables: tropicalstorm_force_diameter <int>,
#   hurricane_force_diameter <int>
storms
# A tibble: 19,537 × 13
   name   year month   day  hour   lat  long status      category  wind pressure
   <chr> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <fct>          <dbl> <int>    <int>
 1 Amy    1975     6    27     0  27.5 -79   tropical d…       NA    25     1013
 2 Amy    1975     6    27     6  28.5 -79   tropical d…       NA    25     1013
 3 Amy    1975     6    27    12  29.5 -79   tropical d…       NA    25     1013
 4 Amy    1975     6    27    18  30.5 -79   tropical d…       NA    25     1013
 5 Amy    1975     6    28     0  31.5 -78.8 tropical d…       NA    25     1012
 6 Amy    1975     6    28     6  32.4 -78.7 tropical d…       NA    25     1012
 7 Amy    1975     6    28    12  33.3 -78   tropical d…       NA    25     1011
 8 Amy    1975     6    28    18  34   -77   tropical d…       NA    30     1006
 9 Amy    1975     6    29     0  34.4 -75.8 tropical s…       NA    35     1004
10 Amy    1975     6    29     6  34   -74.8 tropical s…       NA    40     1002
# ℹ 19,527 more rows
# ℹ 2 more variables: tropicalstorm_force_diameter <int>,
#   hurricane_force_diameter <int>
cat(nrow(storms)- nrow(distinct_storms), "rows removed.")
2 rows removed.

Question 2

distinct_storms_year <- distinct_storms |>
  group_by(year) |>
  summarise(count = n(), 
            .groups= "drop")

distinct_storms_year
# A tibble: 48 × 2
    year count
   <dbl> <int>
 1  1975   238
 2  1976   126
 3  1977    92
 4  1978   152
 5  1979   324
 6  1980   335
 7  1981   311
 8  1982   111
 9  1983    88
10  1984   342
# ℹ 38 more rows

Question 3

ggplot(data=distinct_storms_year, aes(x= year, y= count)) +
  geom_col() + 
  labs(title= "Counts of Storms by Year",
x= "Year", 
y= "Frequency") +
    theme_bw()

Question 4

average_windstorms_byyear <-distinct_storms |>
  group_by(year) |>
  summarise(mean_wind = mean(wind, na.rm = TRUE), .groups = "drop") |>
  ggplot(aes(x = year, y = mean_wind)) +
  geom_line() +
  #facet_wrap(~ month, ncol = 4) +
  labs(x = "Years", 
       y = "Average Wind Speeds", 
       title = "Averaged Wind Speeds (of storm) by Years")

average_windstorms_byyear

Question 5

states <- map_data("state")
katrina <- storms |>
filter(name == "Katrina", year == 2005) |> 
  # ^This code here makes it so that we only look at the data from the storm Katrina 
  #which happened in the year 2005
arrange(year, month, day, hour) 
#^This groups all these details 
ggplot() +
geom_polygon(data = states,
aes(x = long, y = lat, group = group), 
#^this places the location on the map (via the x and y)
fill = "pink", col = "white") +
  #^This fills in the color for the state area and outline
geom_path(data = katrina,
aes(x = long, y = lat, col = wind)) + 
  #^This whole part shows the wind speeds as a line on the map
coord_map() + 
  #^It flattens out the map
scale_color_viridis_c(option = "D") 

#^This changes the scale color of the line

Question 6

We did not need to join two datasets before plotting the storm path because everything that we already needed is in the dataset. The dataset had the geographic locations, the wind speeds, and the name of the storm.

Question 7

storm_track <- function(storm_name,storm_year,variable){
  states <- map_data("state")
katrina <- storms |>
filter(name == storm_name, year == storm_year) |> 
arrange(year, month, day, hour) 
ggplot() +
geom_polygon(data = states,
aes(x = long, y = lat, group = group), 
fill = "pink", col = "white") +
geom_path(data = katrina,
aes(x = long, y = lat, col = wind)) + 
coord_map() + 
scale_color_viridis_c(option = "D") 

}
storm_track("Katrina",2005,wind)

Question 8

#THIS CODE RUNS... IT WORKS 

#storms_report <- function(storm_name,storm_year,variable) 
#{
  
 # if(variable == pugs)
#  {
 # distinct_storms|>
  #  filter(name == storm_name, year == storm_year) |> 
  #group_by(name) |>
  #summarise (
   # max_winds = max(wind,na.rm=TRUE)
  #)
   # max_winds = max(wind,na.rm=TRUE)
    #print("Maximum wind speed:", max_winds ," knots ", name , " , " , year)
 # }
#}
#GILBERT FOCUSED TO SHOW THAT IT WORKS 

#distinct_storms|>
#filter(name == "Gilbert", year == "1988") |> 
  #group_by(name) |>
  #summarise (
   # max_wind = max(wind,na.rm=TRUE)
  #)
    #max_wind = max(wind,na.rm=TRUE)

#print("Maximum wind speed:"+ max_winds +" knots "+ name + " , " + year)



#|> 
  #summarise(max_wind = max(wind), 
            #.groups= "drop")
  #print("Maximum wind speed: " + m)
#storms_report <- function(storm_name,storm_year,variable) 
#{
 # if(variable == pugs)
 # {
  #distinct_storms|>
   # filter(name == storm_name, year == storm_year) |> 
  #group_by(name) |>
 # summarise (
    #max_wind = max(wind,na.rm=TRUE)
  #)
#  }
#}



#print("Maximum wind speed:"+ max_winds +" knots "+ name + " , " + year)
#}
  
  #{  
  #storm_part <- distinct_storms |>
  #filter(name == storm_name, year == storm_year)
  
  #arrange(year, month, day, hour) 

      #if(variable == wind)
       # {
        #storm_part |>
        #group_by(storm_year) |>
         # max_wind() <-max(variable,na.rm=FALSE) |>
          #  print("The highest reconded wind speed of" + storm_name + " is "+ max_wind)
      #}
    #}

    #} 
  #}
  #if (variable == pressure)
  #{ 
    #}
#else{
  #print("Please choose 'wind' or 'pressure'" )
#}}
  #}
#pugs<- "wind"

#pugs
#storms_report("Katrina",2005, pugs)
#storm_report <- function(storm_name,storm_year,variable)
  #{  
  #filter(name == storm_name, year == storm_year) |> 
   # distinct_storms |>
  #group_by(year) |>
  #summarise(mean_wind = mean(wind, na.rm = TRUE), .groups = "drop") |>
    #print("it works")
    
#}

    #{distinct_storms
      #if(variable == wind)
        #{
        #distinct_storms |>
          #group_by(storm_name) |>
          #max_wind() <-max(wind,na.rm=FALSE) |#>
            #print("The highest reconded wind speed of" + storm_name + " is "+ max_wind)
         # } 
    #} 

#else
#}

Question 9

Unable to answer because I couldn’t reach the end of Question 8. I keep getting errors about objects that do and don’t exist as an input even though it went clear/all the way when I ran the function by itself.

Question 10

leaflet_stormk_counts <- distinct_storms |>
  filter(year == 2005,!is.na(long), !is.na(lat)) |>
  group_by(name)|>
  summarise(n = n(),
            lon = mean(long, na.rm = TRUE),
            lat = mean(lat,  na.rm = TRUE),
            .groups = "drop"
  )
  
leaflet(leaflet_stormk_counts) |>
  addTiles() |>
  addPolylines(lng = ~lon, lat = ~lat, weight = 3, color = "#444444") |>
  addCircleMarkers(
    lng = ~lon, lat = ~lat,
    radius = ~scales::rescale(n, to = c(3, 10)),
    popup = ~paste0(name, ": ", n, " storms")
  )

Reflection

  1. I found Question 8 to be difficult because I keep running into syntax errors. I understand the logic behind making that function and also on how to use the if/error statements. But, I keep running into errors by the system on how the object does/doesn’t exist and about RHS and such. Also, it would work sometimes and stop working at other times. I can’t figure out what exactly is triggering the error so I commented the whole code out in order for the submission to work. I was able to partially do parts of Question 8, but not all of it.. I also need to tweak my graph in Question 10 because it needs a lot of improvements. Those are the things I need to work more on.
  2. I give myself a R rating