Loading Libraries

All installed packages were loaded to be used in the R Program.

library(plm)
library(knitr)
library(stringr)
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plm':
## 
##     between, lag, lead
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

—- Data —-

df <- load('Ozone_Drought_Final.RData')

df2 <- combinedAir.final %>%
    mutate(month = as.numeric(month)) %>%
    filter(between(month, 5, 9))

df3 <- read.csv("region_code.csv")

df2$State.Code = as.numeric(df2$State.Code)
df4 <- df2 %>%
    merge(df3, by = "State.Code")

— Calculating the Statistics (Total Monitor, Mean, SD) —

df5 <- df4 %>%
    group_by(noaa_region,USDM.categorical) %>%
    summarize(total_monitor = n(),
        ozone_max = mean(Max.Ozone),
        sd = sd(Max.Ozone))
## `summarise()` has grouped output by 'noaa_region'. You can override using the
## `.groups` argument.
df6 <- df5 %>%
    group_by(noaa_region) %>%
    mutate(percentage = (total_monitor/sum(total_monitor))*100)

df8 <- df4 %>%
    group_by(USDM.categorical) %>%
    summarize(total_monitor = n(),
        ozone_max = mean(Max.Ozone),
        sd = sd(Max.Ozone)) %>%
    ungroup() %>%
    mutate(percentage = (total_monitor/sum(total_monitor))*100,
    noaa_region = "Overall")

df9 <- df6 %>%
    rbind(df8)

df7 <- df9 %>%
    pivot_wider(names_from = USDM.categorical, values_from = c(total_monitor, ozone_max, percentage, sd)) %>%
    mutate(noaa_region = str_to_sentence(noaa_region)) %>%
    mutate_if(is.numeric, funs(round(., 2)))
## `mutate_if()` ignored the following grouping variables:
## • Column `noaa_region`
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## ℹ Please use a list of either functions or lambdas:
## 
## # Simple named list: list(mean = mean, median = median)
## 
## # Auto named with `tibble::lst()`: tibble::lst(mean, median)
## 
## # Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

— Statistics Table —

# Move No Drought column to first column
df10 <- df7[, 1:4]
df11 <- df10[, c(1, 3, 2, 4)]

# Move the Overall row to the first row
ovr <- df11[df11$noaa_region == "Overall", ]
df12 <- rbind(ovr, df11[df11$noaa_region != "Overall", ])

kable(df12, caption = "Total Monitors", align = c("l", "c", "c", "c"))
Total Monitors
noaa_region total_monitor_NoDrought total_monitor_ModerateDrought total_monitor_SevereDrought
Overall 2396798 714055 319251
Northeast 467861 49527 2371
Northern_rockies 66717 25237 9242
Northwest 54917 21232 2903
Ohio_valley 546239 69599 12344
South 274257 96036 43374
Southeast 384492 99951 45652
Southwest 130724 114636 79949
Upper_midwest 232698 30410 2777
West 238893 207427 120639
# Move No Drought column to first column
df13 <- df7[, c(1,5:7)]
df14 <- df13[, c(1, 3, 2, 4)]

# Move the Overall row to the first row
ovr <- df14[df14$noaa_region == "Overall", ]
df15 <- rbind(ovr, df14[df14$noaa_region != "Overall", ])

kable(df15, caption = "Ozone Max", align = c("l", "c", "c", "c"))
Ozone Max
noaa_region ozone_max_NoDrought ozone_max_ModerateDrought ozone_max_SevereDrought
Overall 45.55 50.05 51.86
Northeast 44.94 47.11 45.93
Northern_rockies 44.26 46.32 48.41
Northwest 37.20 40.32 43.53
Ohio_valley 47.37 51.66 54.08
South 43.47 46.43 46.72
Southeast 42.17 48.11 50.84
Southwest 53.05 53.79 55.13
Upper_midwest 43.52 43.70 44.30
West 50.54 53.12 52.44
# Move No Drought column to first column
df16 <- df7[, c(1,8:10)]
df17 <- df16[, c(1, 3, 2, 4)]

# Move the Overall row to the first row
ovr <- df17[df17$noaa_region == "Overall", ]
df18 <- rbind(ovr, df17[df17$noaa_region != "Overall", ])

kable(df18, caption = "Percentage Monitor", align = c("l", "c", "c", "c"))
Percentage Monitor
noaa_region percentage_NoDrought percentage_ModerateDrought percentage_SevereDrought
Overall 69.88 20.82 9.31
Northeast 90.01 9.53 0.46
Northern_rockies 65.93 24.94 9.13
Northwest 69.47 26.86 3.67
Ohio_valley 86.96 11.08 1.97
South 66.30 23.22 10.49
Southeast 72.53 18.86 8.61
Southwest 40.18 35.24 24.58
Upper_midwest 87.52 11.44 1.04
West 42.14 36.59 21.28
# Move No Drought column to first column
df18 <- df7[, c(1,11:13)]
df19 <- df18[, c(1, 3, 2, 4)]

# Move the Overall row to the first row
ovr <- df19[df19$noaa_region == "Overall", ]
df20 <- rbind(ovr, df19[df19$noaa_region != "Overall", ])
kable(df20, caption = "Standard Deviation", align = c("l", "c", "c", "c"))
Standard Deviation
noaa_region sd_NoDrought sd_ModerateDrought sd_SevereDrought
Overall 14.51 15.51 14.94
Northeast 14.79 17.02 20.12
Northern_rockies 9.87 10.75 9.80
Northwest 12.15 12.80 11.79
Ohio_valley 13.42 15.06 14.90
South 15.40 16.05 16.67
Southeast 14.22 15.39 17.24
Southwest 9.89 10.14 10.22
Upper_midwest 13.09 14.21 14.47
West 16.87 16.93 15.49