Loading Libraries

All installed packages were loaded to be used in the R Program.

library(plm)
library(knitr)
library(stringr)
library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer

library(tidyr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:plm':
## 
##     between, lag, lead

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

—- Data —-

df <- load('Ozone_Drought_Final.RData')

df2 <- combinedAir.final %>%
    mutate(month = as.numeric(month))

df3 <- read.csv("region_code.csv")

df2$State.Code = as.numeric(df2$State.Code)
df4 <- df2 %>%
    merge(df3, by = "State.Code")

— Calculating the Statistics (Total Monitor, Mean, SD) —

df5 <- df4 %>%
    group_by(noaa_region,USDM.categorical) %>%
    summarize(total_monitor = n(),
        ozone_max = mean(Max.Ozone),
        sd = sd(Max.Ozone))

## `summarise()` has grouped output by 'noaa_region'. You can override using the
## `.groups` argument.

df6 <- df5 %>%
    group_by(noaa_region) %>%
    mutate(percentage = (total_monitor/sum(total_monitor))*100)

df8 <- df4 %>%
    group_by(USDM.categorical) %>%
    summarize(total_monitor = n(),
        ozone_max = mean(Max.Ozone),
        sd = sd(Max.Ozone)) %>%
    ungroup() %>%
    mutate(percentage = (total_monitor/sum(total_monitor))*100,
    noaa_region = "Overall")

df9 <- df6 %>%
    rbind(df8)

df7 <- df9 %>%
    pivot_wider(names_from = USDM.categorical, values_from = c(total_monitor, ozone_max, percentage, sd)) %>%
    mutate(noaa_region = str_to_sentence(noaa_region)) %>%
    mutate_if(is.numeric, funs(round(., 2)))

## `mutate_if()` ignored the following grouping variables:
## • Column `noaa_region`

## Warning: `funs()` was deprecated in dplyr 0.8.0.
## ℹ Please use a list of either functions or lambdas:
## 
## # Simple named list: list(mean = mean, median = median)
## 
## # Auto named with `tibble::lst()`: tibble::lst(mean, median)
## 
## # Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

— Statistics Table —

# Move No Drought column to first column
df10 <- df7[, 1:4]
df11 <- df10[, c(1, 3, 2, 4)]

# Move the Overall row to the first row
ovr <- df11[df11$noaa_region == "Overall", ]
df12 <- rbind(ovr, df11[df11$noaa_region != "Overall", ])

kable(df12, caption = "Total Monitors", align = c("l", "c", "c", "c"))

Total Monitors
noaa_region	total_monitor_NoDrought	total_monitor_ModerateDrought	total_monitor_SevereDrought
Overall	4488807	1489317	611303
Northeast	860506	91902	7817
Northern_rockies	148140	59525	19241
Northwest	74991	26302	4045
Ohio_valley	875564	112532	18384
South	640348	224583	86555
Southeast	711162	195009	76220
Southwest	273350	280657	129523
Upper_midwest	329053	49459	3843
West	575693	449348	265675

# Move No Drought column to first column
df13 <- df7[, c(1,5:7)]
df14 <- df13[, c(1, 3, 2, 4)]

# Move the Overall row to the first row
ovr <- df14[df14$noaa_region == "Overall", ]
df15 <- rbind(ovr, df14[df14$noaa_region != "Overall", ])

kable(df15, caption = "Ozone Max", align = c("l", "c", "c", "c"))

Ozone Max
noaa_region	ozone_max_NoDrought	ozone_max_ModerateDrought	ozone_max_SevereDrought
Overall	41.53	44.03	45.99
Northeast	40.27	40.69	38.84
Northern_rockies	40.45	41.71	43.53
Northwest	35.59	39.12	41.76
Ohio_valley	43.45	45.42	47.38
South	39.51	40.97	42.37
Southeast	41.12	45.10	46.90
Southwest	46.40	46.33	50.41
Upper_midwest	41.72	40.97	40.68
West	41.86	44.91	45.18

# Move No Drought column to first column
df16 <- df7[, c(1,8:10)]
df17 <- df16[, c(1, 3, 2, 4)]

# Move the Overall row to the first row
ovr <- df17[df17$noaa_region == "Overall", ]
df18 <- rbind(ovr, df17[df17$noaa_region != "Overall", ])

kable(df18, caption = "Percentage Monitor", align = c("l", "c", "c", "c"))

Percentage Monitor
noaa_region	percentage_NoDrought	percentage_ModerateDrought	percentage_SevereDrought
Overall	68.12	22.60	9.28
Northeast	89.62	9.57	0.81
Northern_rockies	65.29	26.23	8.48
Northwest	71.19	24.97	3.84
Ohio_valley	86.99	11.18	1.83
South	67.30	23.60	9.10
Southeast	72.39	19.85	7.76
Southwest	39.99	41.06	18.95
Upper_midwest	86.06	12.94	1.01
West	44.60	34.81	20.58

# Move No Drought column to first column
df18 <- df7[, c(1,11:13)]
df19 <- df18[, c(1, 3, 2, 4)]

# Move the Overall row to the first row
ovr <- df19[df19$noaa_region == "Overall", ]
df20 <- rbind(ovr, df19[df19$noaa_region != "Overall", ])
kable(df20, caption = "Standard Deviation", align = c("l", "c", "c", "c"))

Standard Deviation
noaa_region	sd_NoDrought	sd_ModerateDrought	sd_SevereDrought
Overall	14.21	15.06	15.02
Northeast	14.45	16.38	15.66
Northern_rockies	10.26	10.99	10.53
Northwest	12.33	12.84	12.05
Ohio_valley	14.20	16.49	17.68
South	14.21	14.58	15.11
Southeast	13.24	14.21	16.20
Southwest	12.56	12.23	12.37
Upper_midwest	12.93	13.76	14.57
West	16.38	16.73	15.33

Summary Statistics (All Data)

Loading Libraries

—- Data —-

— Calculating the Statistics (Total Monitor, Mean, SD) —

— Statistics Table —