Loading Libraries
All installed packages were loaded to be used in the R Program.
library(plm)
library(knitr)
library(stringr)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plm':
##
## between, lag, lead
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
—- Data —-
df <- load('Ozone_Drought_Final.RData')
df2 <- combinedAir.final %>%
mutate(month = as.numeric(month)) %>%
filter(between(month, 5, 9))
df3 <- read.csv("region_code.csv")
df2$State.Code = as.numeric(df2$State.Code)
df4 <- df2 %>%
merge(df3, by = "State.Code")
— Calculating the Statistics (Total Monitor, Mean, SD) —
df5 <- df4 %>%
group_by(noaa_region,USDM.categorical) %>%
summarize(total_monitor = n(),
ozone_max = mean(Max.Ozone),
sd = sd(Max.Ozone))
## `summarise()` has grouped output by 'noaa_region'. You can override using the
## `.groups` argument.
df6 <- df5 %>%
group_by(noaa_region) %>%
mutate(percentage = (total_monitor/sum(total_monitor))*100)
df8 <- df4 %>%
group_by(USDM.categorical) %>%
summarize(total_monitor = n(),
ozone_max = mean(Max.Ozone),
sd = sd(Max.Ozone)) %>%
ungroup() %>%
mutate(percentage = (total_monitor/sum(total_monitor))*100,
noaa_region = "Overall")
df9 <- df6 %>%
rbind(df8)
df7 <- df9 %>%
pivot_wider(names_from = USDM.categorical, values_from = c(total_monitor, ozone_max, percentage, sd)) %>%
mutate(noaa_region = str_to_sentence(noaa_region)) %>%
mutate_if(is.numeric, funs(round(., 2)))
## `mutate_if()` ignored the following grouping variables:
## • Column `noaa_region`
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## ℹ Please use a list of either functions or lambdas:
##
## # Simple named list: list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`: tibble::lst(mean, median)
##
## # Using lambdas list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
— Statistics Table —
# Move No Drought column to first column
df10 <- df7[, 1:4]
df11 <- df10[, c(1, 3, 2, 4)]
# Move the Overall row to the first row
ovr <- df11[df11$noaa_region == "Overall", ]
df12 <- rbind(ovr, df11[df11$noaa_region != "Overall", ])
kable(df12, caption = "Total Monitors", align = c("l", "c", "c", "c"))
Total Monitors
Overall |
2396798 |
714055 |
319251 |
Northeast |
467861 |
49527 |
2371 |
Northern_rockies |
66717 |
25237 |
9242 |
Northwest |
54917 |
21232 |
2903 |
Ohio_valley |
546239 |
69599 |
12344 |
South |
274257 |
96036 |
43374 |
Southeast |
384492 |
99951 |
45652 |
Southwest |
130724 |
114636 |
79949 |
Upper_midwest |
232698 |
30410 |
2777 |
West |
238893 |
207427 |
120639 |
# Move No Drought column to first column
df13 <- df7[, c(1,5:7)]
df14 <- df13[, c(1, 3, 2, 4)]
# Move the Overall row to the first row
ovr <- df14[df14$noaa_region == "Overall", ]
df15 <- rbind(ovr, df14[df14$noaa_region != "Overall", ])
kable(df15, caption = "Ozone Max", align = c("l", "c", "c", "c"))
Ozone Max
Overall |
45.55 |
50.05 |
51.86 |
Northeast |
44.94 |
47.11 |
45.93 |
Northern_rockies |
44.26 |
46.32 |
48.41 |
Northwest |
37.20 |
40.32 |
43.53 |
Ohio_valley |
47.37 |
51.66 |
54.08 |
South |
43.47 |
46.43 |
46.72 |
Southeast |
42.17 |
48.11 |
50.84 |
Southwest |
53.05 |
53.79 |
55.13 |
Upper_midwest |
43.52 |
43.70 |
44.30 |
West |
50.54 |
53.12 |
52.44 |
# Move No Drought column to first column
df16 <- df7[, c(1,8:10)]
df17 <- df16[, c(1, 3, 2, 4)]
# Move the Overall row to the first row
ovr <- df17[df17$noaa_region == "Overall", ]
df18 <- rbind(ovr, df17[df17$noaa_region != "Overall", ])
kable(df18, caption = "Percentage Monitor", align = c("l", "c", "c", "c"))
Percentage Monitor
Overall |
69.88 |
20.82 |
9.31 |
Northeast |
90.01 |
9.53 |
0.46 |
Northern_rockies |
65.93 |
24.94 |
9.13 |
Northwest |
69.47 |
26.86 |
3.67 |
Ohio_valley |
86.96 |
11.08 |
1.97 |
South |
66.30 |
23.22 |
10.49 |
Southeast |
72.53 |
18.86 |
8.61 |
Southwest |
40.18 |
35.24 |
24.58 |
Upper_midwest |
87.52 |
11.44 |
1.04 |
West |
42.14 |
36.59 |
21.28 |
# Move No Drought column to first column
df18 <- df7[, c(1,11:13)]
df19 <- df18[, c(1, 3, 2, 4)]
# Move the Overall row to the first row
ovr <- df19[df19$noaa_region == "Overall", ]
df20 <- rbind(ovr, df19[df19$noaa_region != "Overall", ])
kable(df20, caption = "Standard Deviation", align = c("l", "c", "c", "c"))
Standard Deviation
Overall |
14.51 |
15.51 |
14.94 |
Northeast |
14.79 |
17.02 |
20.12 |
Northern_rockies |
9.87 |
10.75 |
9.80 |
Northwest |
12.15 |
12.80 |
11.79 |
Ohio_valley |
13.42 |
15.06 |
14.90 |
South |
15.40 |
16.05 |
16.67 |
Southeast |
14.22 |
15.39 |
17.24 |
Southwest |
9.89 |
10.14 |
10.22 |
Upper_midwest |
13.09 |
14.21 |
14.47 |
West |
16.87 |
16.93 |
15.49 |