# Import the CSV file
dat <- read_csv("~/Documents/WEAct/weact/weact/camp_q2_data.csv")
## Rows: 45626 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): sensor_name
## dbl (8): humidity_a, temperature_a, pressure_a, pm2.5_alt_a, pm2.5_alt_b, s...
## dttm (1): time_stamp
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Define the expected time format pattern: "YYYY-MM-DD HH:MM:SS"
pattern <- "^\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}$"
# Note: Update the column name if your CSV uses "time" instead of "time_stamp_parsed"
bad_time_rows <- dat %>%
filter(!str_detect(.data$time_stamp, pattern))
# Count and print the number of rows with non-exact time format
bad_time_count <- nrow(bad_time_rows)
cat("Number of rows with non-exact time format:", bad_time_count, "\n")
## Number of rows with non-exact time format: 950
print(bad_time_rows)
## # A tibble: 950 × 10
## time_stamp humidity_a temperature_a pressure_a pm2.5_alt_a
## <dttm> <dbl> <dbl> <dbl> <dbl>
## 1 2024-01-01 00:00:00 44 47.8 1012. 12.8
## 2 2024-01-02 00:00:00 49.3 40 1014. 3.8
## 3 2024-01-03 00:00:00 42.8 42.9 1012. 6.7
## 4 2024-01-04 00:00:00 41.2 47 1011. 10
## 5 2024-01-05 00:00:00 37.7 33.9 1019. 1
## 6 2024-01-06 00:00:00 41.9 39 1019. 10.3
## 7 2024-01-07 00:00:00 64.8 40.9 1001. 5
## 8 2024-01-08 00:00:00 51 41.3 1012. 4.6
## 9 2024-01-09 00:00:00 46.1 45 1026. 8.8
## 10 2024-01-10 00:00:00 73.1 60 985. 3.1
## # ℹ 940 more rows
## # ℹ 5 more variables: pm2.5_alt_b <dbl>, sensor_index <dbl>, sensor_name <chr>,
## # pm2.5_alt <dbl>, pm2.5_aqi <dbl>
# 1. Load the datasets
camp_q2_data <- read_csv("camp_q2_data.csv")
## Rows: 45626 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): sensor_name
## dbl (8): humidity_a, temperature_a, pressure_a, pm2.5_alt_a, pm2.5_alt_b, s...
## dttm (1): time_stamp
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sensor_info <- read_csv("sensorindex_name.csv")
## Rows: 11 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): name_on_map
## dbl (3): sensor_index, height (ft), altitude (ft)
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# 2. Inspect the datasets
glimpse(camp_q2_data)
## Rows: 45,626
## Columns: 10
## $ time_stamp <dttm> 2023-12-31 19:00:00, 2023-12-31 19:30:00, 2023-12-31 20…
## $ humidity_a <dbl> 37.000, 38.467, 39.000, 39.867, 40.867, 41.933, 42.000, …
## $ temperature_a <dbl> 49.733, 48.933, 48.400, 48.333, 48.067, 48.000, 48.067, …
## $ pressure_a <dbl> 1011.904, 1011.888, 1011.887, 1011.731, 1011.724, 1011.7…
## $ pm2.5_alt_a <dbl> 9.4, 8.0, 8.3, 8.8, 9.0, 9.1, 9.2, 9.7, 11.1, 12.0, 12.8…
## $ pm2.5_alt_b <dbl> 13.5, 11.2, 11.2, 10.8, 12.6, 8.2, 9.9, 12.0, 15.7, 15.4…
## $ sensor_index <dbl> 178411, 178411, 178411, 178411, 178411, 178411, 178411, …
## $ sensor_name <chr> "Office", "Office", "Office", "Office", "Office", "Offic…
## $ pm2.5_alt <dbl> 11.45, 9.60, 9.75, 9.80, 10.80, 8.65, 9.55, 10.85, 13.40…
## $ pm2.5_aqi <dbl> 48, 40, 41, 41, 46, 37, 40, 46, 54, 55, 55, 63, 61, 57, …
glimpse(sensor_info)
## Rows: 11
## Columns: 4
## $ sensor_index <dbl> 178411, 188669, 188667, 188659, 188661, 188665, 188651…
## $ name_on_map <chr> "WE ACT for Environmental Justice Office", "WE ACT Com…
## $ `height (ft)` <dbl> 12, 34, 34, 19, 12, 26, 12, 19, 8, 41, 5
## $ `altitude (ft)` <dbl> 134, 32, 62, 22, 18, 178, 89, 33, 106, 203, 34
# 3. Clean camp_q2_data
camp_q2_data_clean <- camp_q2_data %>%
# Convert the time_stamp column to a proper datetime object
mutate(time_stamp = ymd_hms(time_stamp),
# Ensure sensor_index is treated as an integer
sensor_index = as.integer(sensor_index),
# Rename columns to remove special characters and make names easier to work with
pm25_alt_a = `pm2.5_alt_a`,
pm25_alt_b = `pm2.5_alt_b`,
pm25_alt = `pm2.5_alt`,
pm25_aqi = `pm2.5_aqi`) %>%
# Optionally, drop the original columns if they are now redundant
select(time_stamp, humidity_a, temperature_a, pressure_a,
pm25_alt_a, pm25_alt_b, sensor_index, sensor_name,
pm25_alt, pm25_aqi) %>%
# Remove rows with missing values (or consider imputation if preferred)
drop_na()
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `time_stamp = ymd_hms(time_stamp)`.
## Caused by warning:
## ! 950 failed to parse.
# 4. Clean sensor_info data
sensor_info_clean <- sensor_info %>%
mutate(sensor_index = as.integer(sensor_index),
# Rename columns to simplify names (remove spaces and special characters)
height_ft = `height (ft)`,
altitude_ft = `altitude (ft)`) %>%
select(sensor_index, name_on_map, height_ft, altitude_ft)
# 5. Merge the two datasets on sensor_index
combined_data <- left_join(camp_q2_data_clean, sensor_info_clean, by = "sensor_index")
# 6. Inspect the merged data
glimpse(combined_data)
## Rows: 44,676
## Columns: 13
## $ time_stamp <dttm> 2023-12-31 19:00:00, 2023-12-31 19:30:00, 2023-12-31 20…
## $ humidity_a <dbl> 37.000, 38.467, 39.000, 39.867, 40.867, 41.933, 42.000, …
## $ temperature_a <dbl> 49.733, 48.933, 48.400, 48.333, 48.067, 48.000, 48.067, …
## $ pressure_a <dbl> 1011.904, 1011.888, 1011.887, 1011.731, 1011.724, 1011.7…
## $ pm25_alt_a <dbl> 9.4, 8.0, 8.3, 8.8, 9.0, 9.1, 9.2, 9.7, 11.1, 12.0, 16.1…
## $ pm25_alt_b <dbl> 13.5, 11.2, 11.2, 10.8, 12.6, 8.2, 9.9, 12.0, 15.7, 15.4…
## $ sensor_index <int> 178411, 178411, 178411, 178411, 178411, 178411, 178411, …
## $ sensor_name <chr> "Office", "Office", "Office", "Office", "Office", "Offic…
## $ pm25_alt <dbl> 11.45, 9.60, 9.75, 9.80, 10.80, 8.65, 9.55, 10.85, 13.40…
## $ pm25_aqi <dbl> 48, 40, 41, 41, 46, 37, 40, 46, 54, 55, 63, 61, 57, 56, …
## $ name_on_map <chr> "WE ACT for Environmental Justice Office", "WE ACT for E…
## $ height_ft <dbl> 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, …
## $ altitude_ft <dbl> 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 1…
# 7. Optionally, write the cleaned and merged data to a new CSV file
write_csv(combined_data, "combined_cleaned_data.csv")
# ---------------------------
# Global Data Loading
# ---------------------------
# Load merged data from individual CSV files
merged_data <- {
camp_q2_data <- read_csv("camp_q2_data.csv") %>%
mutate(
time_stamp = ymd_hms(time_stamp),
sensor_index = as.integer(sensor_index),
pm25_alt = `pm2.5_alt`,
pm25_aqi = `pm2.5_aqi`
)
sensor_info <- read_csv("sensorindex_name_latlong.csv") %>%
mutate(
sensor_index = as.integer(sensor_index),
height_ft = `height (ft)`,
altitude_ft = `altitude (ft)`
) %>%
select(sensor_index, name_on_map, height_ft, altitude_ft, Latitude, Longitude)
left_join(camp_q2_data, sensor_info, by = "sensor_index")
}
## Rows: 45626 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): sensor_name
## dbl (8): humidity_a, temperature_a, pressure_a, pm2.5_alt_a, pm2.5_alt_b, s...
## dttm (1): time_stamp
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `time_stamp = ymd_hms(time_stamp)`.
## Caused by warning:
## ! 950 failed to parse.
## Rows: 11 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): name_on_map
## dbl (5): sensor_index, height (ft), altitude (ft), Latitude, Longitude
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Load the new combined, cleaned dataset
cleaned_data <- read_csv("combined_cleaned_data.csv") %>%
mutate(
time_stamp = ymd_hms(time_stamp),
sensor_index = as.integer(sensor_index)
# If necessary, add additional mutations here to ensure the format matches
)
## Rows: 44676 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): sensor_name, name_on_map
## dbl (10): humidity_a, temperature_a, pressure_a, pm25_alt_a, pm25_alt_b, se...
## dttm (1): time_stamp
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# ---------------------------
# UI
# ---------------------------
ui <- fluidPage(
titlePanel("Sensor PM2.5 and AQI Dashboard"),
sidebarLayout(
sidebarPanel(
# Data source selector: choose between merged and cleaned datasets
selectInput(
inputId = "data_source",
label = "Select Data Source:",
choices = c("Merged Data", "Cleaned Data"),
selected = "Merged Data"
),
# Sensor selection will update based on chosen data source
selectInput(
inputId = "sensor",
label = "Select Sensor:",
choices = NULL, # Will be updated in the server
selected = "All"
),
dateRangeInput(
inputId = "date_range",
label = "Select Date Range:",
start = NULL, # Will be updated in the server
end = NULL
)
),
mainPanel(
tabsetPanel(
tabPanel("PM2.5 Distribution Map", leafletOutput("pm25_map")),
tabPanel("AQI Time Series", plotOutput("aqi_plot"))
)
)
)
)
# ---------------------------
# Server
# ---------------------------
server <- function(input, output, session) {
# Reactive: Return the selected dataset
selected_data <- reactive({
if (input$data_source == "Merged Data") {
merged_data
} else {
cleaned_data
}
})
# Update UI inputs (sensor list and date range) when the data source changes
observe({
data <- selected_data()
sensor_choices <- c("All", sort(unique(data$name_on_map)))
updateSelectInput(session, "sensor", choices = sensor_choices, selected = "All")
updateDateRangeInput(session, "date_range",
start = min(as.Date(data$time_stamp), na.rm = TRUE),
end = max(as.Date(data$time_stamp), na.rm = TRUE))
})
# Reactive: Filter data based on sensor selection and date range
filtered_data <- reactive({
req(input$date_range)
data <- selected_data() %>%
filter(as.Date(time_stamp) >= input$date_range[1],
as.Date(time_stamp) <= input$date_range[2])
if (input$sensor != "All") {
data <- data %>% filter(name_on_map == input$sensor)
}
data
})
# Reactive: Summarize data per sensor for mapping
sensor_summary <- reactive({
filtered_data() %>%
group_by(sensor_index, name_on_map, Latitude, Longitude) %>%
summarize(
avg_pm25 = mean(pm25_alt, na.rm = TRUE),
avg_aqi = mean(pm25_aqi, na.rm = TRUE),
.groups = "drop"
)
})
# Leaflet map for PM2.5 distribution
output$pm25_map <- renderLeaflet({
summary_df <- sensor_summary()
leaflet(summary_df) %>%
addTiles() %>%
addCircleMarkers(
lng = ~Longitude,
lat = ~Latitude,
radius = ~avg_pm25, # Marker size proportional to average PM2.5
color = "blue",
fillOpacity = 0.7,
popup = ~paste0("<strong>", name_on_map, "</strong><br>",
"Avg PM2.5: ", round(avg_pm25, 2), "<br>",
"Avg AQI: ", round(avg_aqi, 2))
)
})
# Time series plot for AQI changes over time
output$aqi_plot <- renderPlot({
filtered_data() %>%
ggplot(aes(x = time_stamp, y = pm25_aqi, color = name_on_map)) +
geom_line() +
labs(
title = "AQI Changes Over Time",
x = "Time",
y = "AQI",
color = "Sensor"
) +
theme_minimal()
})
}
# ---------------------------
# Run the Shiny App if in interactive mode
# ---------------------------
if (interactive()) {
shinyApp(ui = ui, server = server)
}