result section

# Import the CSV file
dat <- read_csv("~/Documents/WEAct/weact/weact/camp_q2_data.csv")

## Rows: 45626 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): sensor_name
## dbl  (8): humidity_a, temperature_a, pressure_a, pm2.5_alt_a, pm2.5_alt_b, s...
## dttm (1): time_stamp
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Define the expected time format pattern: "YYYY-MM-DD HH:MM:SS"
pattern <- "^\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}$"

# Note: Update the column name if your CSV uses "time" instead of "time_stamp_parsed"
bad_time_rows <- dat %>%
  filter(!str_detect(.data$time_stamp, pattern))

# Count and print the number of rows with non-exact time format
bad_time_count <- nrow(bad_time_rows)
cat("Number of rows with non-exact time format:", bad_time_count, "\n")

## Number of rows with non-exact time format: 950

print(bad_time_rows)

## # A tibble: 950 × 10
##    time_stamp          humidity_a temperature_a pressure_a pm2.5_alt_a
##    <dttm>                   <dbl>         <dbl>      <dbl>       <dbl>
##  1 2024-01-01 00:00:00       44            47.8      1012.        12.8
##  2 2024-01-02 00:00:00       49.3          40        1014.         3.8
##  3 2024-01-03 00:00:00       42.8          42.9      1012.         6.7
##  4 2024-01-04 00:00:00       41.2          47        1011.        10  
##  5 2024-01-05 00:00:00       37.7          33.9      1019.         1  
##  6 2024-01-06 00:00:00       41.9          39        1019.        10.3
##  7 2024-01-07 00:00:00       64.8          40.9      1001.         5  
##  8 2024-01-08 00:00:00       51            41.3      1012.         4.6
##  9 2024-01-09 00:00:00       46.1          45        1026.         8.8
## 10 2024-01-10 00:00:00       73.1          60         985.         3.1
## # ℹ 940 more rows
## # ℹ 5 more variables: pm2.5_alt_b <dbl>, sensor_index <dbl>, sensor_name <chr>,
## #   pm2.5_alt <dbl>, pm2.5_aqi <dbl>

# 1. Load the datasets
camp_q2_data <- read_csv("camp_q2_data.csv")

## Rows: 45626 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): sensor_name
## dbl  (8): humidity_a, temperature_a, pressure_a, pm2.5_alt_a, pm2.5_alt_b, s...
## dttm (1): time_stamp
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

sensor_info <- read_csv("sensorindex_name.csv")

## Rows: 11 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): name_on_map
## dbl (3): sensor_index, height (ft), altitude (ft)
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# 2. Inspect the datasets
glimpse(camp_q2_data)

## Rows: 45,626
## Columns: 10
## $ time_stamp    <dttm> 2023-12-31 19:00:00, 2023-12-31 19:30:00, 2023-12-31 20…
## $ humidity_a    <dbl> 37.000, 38.467, 39.000, 39.867, 40.867, 41.933, 42.000, …
## $ temperature_a <dbl> 49.733, 48.933, 48.400, 48.333, 48.067, 48.000, 48.067, …
## $ pressure_a    <dbl> 1011.904, 1011.888, 1011.887, 1011.731, 1011.724, 1011.7…
## $ pm2.5_alt_a   <dbl> 9.4, 8.0, 8.3, 8.8, 9.0, 9.1, 9.2, 9.7, 11.1, 12.0, 12.8…
## $ pm2.5_alt_b   <dbl> 13.5, 11.2, 11.2, 10.8, 12.6, 8.2, 9.9, 12.0, 15.7, 15.4…
## $ sensor_index  <dbl> 178411, 178411, 178411, 178411, 178411, 178411, 178411, …
## $ sensor_name   <chr> "Office", "Office", "Office", "Office", "Office", "Offic…
## $ pm2.5_alt     <dbl> 11.45, 9.60, 9.75, 9.80, 10.80, 8.65, 9.55, 10.85, 13.40…
## $ pm2.5_aqi     <dbl> 48, 40, 41, 41, 46, 37, 40, 46, 54, 55, 55, 63, 61, 57, …

glimpse(sensor_info)

## Rows: 11
## Columns: 4
## $ sensor_index    <dbl> 178411, 188669, 188667, 188659, 188661, 188665, 188651…
## $ name_on_map     <chr> "WE ACT for Environmental Justice Office", "WE ACT Com…
## $ `height (ft)`   <dbl> 12, 34, 34, 19, 12, 26, 12, 19, 8, 41, 5
## $ `altitude (ft)` <dbl> 134, 32, 62, 22, 18, 178, 89, 33, 106, 203, 34

# 3. Clean camp_q2_data
camp_q2_data_clean <- camp_q2_data %>%
  # Convert the time_stamp column to a proper datetime object
  mutate(time_stamp = ymd_hms(time_stamp),
         # Ensure sensor_index is treated as an integer
         sensor_index = as.integer(sensor_index),
         # Rename columns to remove special characters and make names easier to work with
         pm25_alt_a = `pm2.5_alt_a`,
         pm25_alt_b = `pm2.5_alt_b`,
         pm25_alt   = `pm2.5_alt`,
         pm25_aqi   = `pm2.5_aqi`) %>%
  # Optionally, drop the original columns if they are now redundant
  select(time_stamp, humidity_a, temperature_a, pressure_a, 
         pm25_alt_a, pm25_alt_b, sensor_index, sensor_name, 
         pm25_alt, pm25_aqi) %>%
  # Remove rows with missing values (or consider imputation if preferred)
  drop_na()

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `time_stamp = ymd_hms(time_stamp)`.
## Caused by warning:
## !  950 failed to parse.

# 4. Clean sensor_info data
sensor_info_clean <- sensor_info %>%
  mutate(sensor_index = as.integer(sensor_index),
         # Rename columns to simplify names (remove spaces and special characters)
         height_ft   = `height (ft)`,
         altitude_ft = `altitude (ft)`) %>%
  select(sensor_index, name_on_map, height_ft, altitude_ft)

# 5. Merge the two datasets on sensor_index
combined_data <- left_join(camp_q2_data_clean, sensor_info_clean, by = "sensor_index")

# 6. Inspect the merged data
glimpse(combined_data)

## Rows: 44,676
## Columns: 13
## $ time_stamp    <dttm> 2023-12-31 19:00:00, 2023-12-31 19:30:00, 2023-12-31 20…
## $ humidity_a    <dbl> 37.000, 38.467, 39.000, 39.867, 40.867, 41.933, 42.000, …
## $ temperature_a <dbl> 49.733, 48.933, 48.400, 48.333, 48.067, 48.000, 48.067, …
## $ pressure_a    <dbl> 1011.904, 1011.888, 1011.887, 1011.731, 1011.724, 1011.7…
## $ pm25_alt_a    <dbl> 9.4, 8.0, 8.3, 8.8, 9.0, 9.1, 9.2, 9.7, 11.1, 12.0, 16.1…
## $ pm25_alt_b    <dbl> 13.5, 11.2, 11.2, 10.8, 12.6, 8.2, 9.9, 12.0, 15.7, 15.4…
## $ sensor_index  <int> 178411, 178411, 178411, 178411, 178411, 178411, 178411, …
## $ sensor_name   <chr> "Office", "Office", "Office", "Office", "Office", "Offic…
## $ pm25_alt      <dbl> 11.45, 9.60, 9.75, 9.80, 10.80, 8.65, 9.55, 10.85, 13.40…
## $ pm25_aqi      <dbl> 48, 40, 41, 41, 46, 37, 40, 46, 54, 55, 63, 61, 57, 56, …
## $ name_on_map   <chr> "WE ACT for Environmental Justice Office", "WE ACT for E…
## $ height_ft     <dbl> 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, …
## $ altitude_ft   <dbl> 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 134, 1…

# 7. Optionally, write the cleaned and merged data to a new CSV file
write_csv(combined_data, "combined_cleaned_data.csv")

# ---------------------------
# Global Data Loading
# ---------------------------
# Load merged data from individual CSV files
merged_data <- {
  camp_q2_data <- read_csv("camp_q2_data.csv") %>%
    mutate(
      time_stamp   = ymd_hms(time_stamp),
      sensor_index = as.integer(sensor_index),
      pm25_alt     = `pm2.5_alt`,
      pm25_aqi     = `pm2.5_aqi`
    )
  sensor_info <- read_csv("sensorindex_name_latlong.csv") %>%
    mutate(
      sensor_index = as.integer(sensor_index),
      height_ft    = `height (ft)`,
      altitude_ft  = `altitude (ft)`
    ) %>%
    select(sensor_index, name_on_map, height_ft, altitude_ft, Latitude, Longitude)
  
  left_join(camp_q2_data, sensor_info, by = "sensor_index")
}

## Rows: 45626 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): sensor_name
## dbl  (8): humidity_a, temperature_a, pressure_a, pm2.5_alt_a, pm2.5_alt_b, s...
## dttm (1): time_stamp
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `time_stamp = ymd_hms(time_stamp)`.
## Caused by warning:
## !  950 failed to parse.

## Rows: 11 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): name_on_map
## dbl (5): sensor_index, height (ft), altitude (ft), Latitude, Longitude
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Load the new combined, cleaned dataset
cleaned_data <- read_csv("combined_cleaned_data.csv") %>%
  mutate(
    time_stamp   = ymd_hms(time_stamp),
    sensor_index = as.integer(sensor_index)
    # If necessary, add additional mutations here to ensure the format matches
  )

## Rows: 44676 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (2): sensor_name, name_on_map
## dbl  (10): humidity_a, temperature_a, pressure_a, pm25_alt_a, pm25_alt_b, se...
## dttm  (1): time_stamp
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# ---------------------------
# UI
# ---------------------------
ui <- fluidPage(
  titlePanel("Sensor PM2.5 and AQI Dashboard"),
  sidebarLayout(
    sidebarPanel(
      # Data source selector: choose between merged and cleaned datasets
      selectInput(
        inputId = "data_source",
        label   = "Select Data Source:",
        choices = c("Merged Data", "Cleaned Data"),
        selected = "Merged Data"
      ),
      # Sensor selection will update based on chosen data source
      selectInput(
        inputId = "sensor",
        label   = "Select Sensor:",
        choices = NULL,  # Will be updated in the server
        selected = "All"
      ),
      dateRangeInput(
        inputId = "date_range",
        label   = "Select Date Range:",
        start   = NULL,  # Will be updated in the server
        end     = NULL
      )
    ),
    mainPanel(
      tabsetPanel(
        tabPanel("PM2.5 Distribution Map", leafletOutput("pm25_map")),
        tabPanel("AQI Time Series", plotOutput("aqi_plot"))
      )
    )
  )
)

# ---------------------------
# Server
# ---------------------------
server <- function(input, output, session) {
  
  # Reactive: Return the selected dataset
  selected_data <- reactive({
    if (input$data_source == "Merged Data") {
      merged_data
    } else {
      cleaned_data
    }
  })
  
  # Update UI inputs (sensor list and date range) when the data source changes
  observe({
    data <- selected_data()
    sensor_choices <- c("All", sort(unique(data$name_on_map)))
    updateSelectInput(session, "sensor", choices = sensor_choices, selected = "All")
    
    updateDateRangeInput(session, "date_range",
                         start = min(as.Date(data$time_stamp), na.rm = TRUE),
                         end   = max(as.Date(data$time_stamp), na.rm = TRUE))
  })
  
  # Reactive: Filter data based on sensor selection and date range
  filtered_data <- reactive({
    req(input$date_range)
    
    data <- selected_data() %>%
      filter(as.Date(time_stamp) >= input$date_range[1],
             as.Date(time_stamp) <= input$date_range[2])
    
    if (input$sensor != "All") {
      data <- data %>% filter(name_on_map == input$sensor)
    }
    data
  })
  
  # Reactive: Summarize data per sensor for mapping
  sensor_summary <- reactive({
    filtered_data() %>%
      group_by(sensor_index, name_on_map, Latitude, Longitude) %>%
      summarize(
        avg_pm25 = mean(pm25_alt, na.rm = TRUE),
        avg_aqi  = mean(pm25_aqi, na.rm = TRUE),
        .groups  = "drop"
      )
  })
  
  # Leaflet map for PM2.5 distribution
  output$pm25_map <- renderLeaflet({
    summary_df <- sensor_summary()
    
    leaflet(summary_df) %>%
      addTiles() %>%
      addCircleMarkers(
        lng         = ~Longitude,
        lat         = ~Latitude,
        radius      = ~avg_pm25,  # Marker size proportional to average PM2.5
        color       = "blue",
        fillOpacity = 0.7,
        popup       = ~paste0("<strong>", name_on_map, "</strong><br>",
                              "Avg PM2.5: ", round(avg_pm25, 2), "<br>",
                              "Avg AQI: ", round(avg_aqi, 2))
      )
  })
  
  # Time series plot for AQI changes over time
  output$aqi_plot <- renderPlot({
    filtered_data() %>%
      ggplot(aes(x = time_stamp, y = pm25_aqi, color = name_on_map)) +
      geom_line() +
      labs(
        title  = "AQI Changes Over Time",
        x      = "Time",
        y      = "AQI",
        color  = "Sensor"
      ) +
      theme_minimal()
  })
}

# ---------------------------
# Run the Shiny App if in interactive mode
# ---------------------------
if (interactive()) {
  shinyApp(ui = ui, server = server)
}

result section

Mingye

2025-04-04