Project 2 - Wind turbines

Author

Leah Marshall

From Amarillo Economic Development : https://amarilloedc.com/blog/wind-industry-helps-amarillo-soar/

Intro Essay

# Load all libraries 
library(readr)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(ggplot2)
Warning: package 'ggplot2' was built under R version 4.5.1
library(leaflet)
Warning: package 'leaflet' was built under R version 4.5.1
setwd("C:/Users/rsaidi/Downloads")
wind <- read_csv("wind_turbines.csv")
Rows: 63961 Columns: 12
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (2): Site.State, Site.County
dbl (10): Year, Turbine.Capacity, Turbine.Hub_Height, Turbine.Rotor_Diameter...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
wind |> head()
# A tibble: 6 × 12
  Site.State Site.County    Year Turbine.Capacity Turbine.Hub_Height
  <chr>      <chr>         <dbl>            <dbl>              <dbl>
1 IA         Story County   2017             3000               87.5
2 IA         Hardin County  2017             3000               87.5
3 IA         Story County   2017             3000               87.5
4 IA         Story County   2017             3000               87.5
5 IA         Story County   2017             3000               87.5
6 IA         Story County   2017             3000               87.5
# ℹ 7 more variables: Turbine.Rotor_Diameter <dbl>, Turbine.Swept_Area <dbl>,
#   Turbine.Total_Height <dbl>, Project.Capacity <dbl>,
#   Project.Number_Turbines <dbl>, Site.Latitude <dbl>, Site.Longitude <dbl>
# check missing values and clean
wind |> summarize(
  missing_capacity = sum(is.na(Turbine.Capacity)),
  missing_height = sum(is.na(Turbine.Hub_Height)),
  missing_lat = sum(is.na(Site.Latitude)),
  missing_long = sum(is.na(Site.Longitude))
  )
# A tibble: 1 × 4
  missing_capacity missing_height missing_lat missing_long
             <int>          <int>       <int>        <int>
1                0              0           0            0
wind_clean <- wind |>
  select(
    Site.State,
    Site.County,
    Turbine.Capacity,
    Turbine.Hub_Height,
    Site.Latitude,
    Site.Longitude
    )
wind_clean |> summarize(
  avg_capacity = mean(Turbine.Capacity, na.rm = TRUE),
  avg_height = mean(Turbine.Hub_Height, na.rm = TRUE)
  )
# A tibble: 1 × 2
  avg_capacity avg_height
         <dbl>      <dbl>
1        1945.       80.3
# Graph 1
ggplot(wind_clean, aes(x = Turbine.Capacity)) +
  geom_histogram(bins = 30, fill = "skyblue",
                 color = "black") +
  labs(
    title = "Distribution of Turbine Capacity (megawatts)",
    x = "Capacity (MW)",
    y = "Number of Turbines"
    )

# fitler out states with 10 or less and non us states
wind_nonzero <- wind_clean |>
  (\(df) {
    counts <- table(df$Site.State)
    counts <- counts[counts > 10]
    counts <- counts[names(counts) != "PR"]
    data.frame(Site.State = names(counts), Num_Turbines = as.numeric(counts))
  })()
# took out CT (Connecticut), DE (Delaware), GU (Guam), NJ (New Jersey), VA (Virginia), and PR (Puerto Rico)

# Barchart
ggplot(wind_nonzero, aes(x = Site.State, y = Num_Turbines)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(
    title = "Number of Turbines by State (States with >0 turbines)",
    x = "State",
    y = "Count"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# top 8 
top_states <- wind_clean |>
  count(Site.State) |>
  arrange(desc(n)) |>
  slice_head(n = 8) |>
  pull(Site.State)

wind_top_states <- wind_clean |>
  filter(Site.State %in% top_states)
# 
ggplot(wind_top_states, aes(x = Turbine.Hub_Height, y = Turbine.Capacity, color = Site.State)) +
  geom_point(size = 4, alpha = .6) +
  labs(
    title = "Relationship Between Turbine Height and Capacity",
    x = "Hub Height (meters)",
    y = "Capacity (MW)",
    caption = "Data Source: wind_turbines.csv"
  ) +
  annotate(
    "text",
    x = 60,
    y = max(wind_top_states$Turbine.Capacity, na.rm = TRUE),
    label = "Taller turbines often generate more power.",
    color = "black"
  ) +
  theme_minimal() +
  scale_color_brewer(palette = "Set3")

# Leaflet Map
leaflet(wind_clean) |>
  setView(lng =-93.51808, lat = 42.01363, zoom = 5 ) |>
  addProviderTiles("Esri.WorldStreetMap") |>
  addCircles(
    data = wind_clean,
    lng = wind_clean$Site.Latitude,
    color =  "darkcyan",
    lat =wind_clean$Site.Longitude,
    radius = wind_clean$Turbine.Capacity * 3,
    fillOpacity = 0.6,
    stroke = FALSE,
    popup = ~paste0(
      "<b>State:</b> ", Site.State, "<br>",
      "<b>County:</b> ", Site.County, "<br>",
      "<b>Capacity:</b> ", Turbine.Capacity, " MW<br>",
      "<b>Hub Height:</b> ", Turbine.Hub_Height, " m"
    ),
    label = ~paste0(Site.State, ": ", Turbine.Capacity, " MW"),
    labelOptions = labelOptions(
      direction = "auto",
      textsize = "13px",
      opacity = 0.8))