Code Set-Up


#Example: 'person' is simulated data, conv stores some basic known values we want to use for quantification conversions, rest is done with mutate.

person <- read_xlsx(here("GO_Conversions.xlsx"), sheet=3)
conv <- read_xlsx(here("GO_Conversions.xlsx"), sheet=2)

quant <- coalesce_join(person, conv, by = 'type')

#Conversion Mutations

quant %>% mutate(standby_hours_dy = 8 - active_hours_dy,
                 kwh_active_dy = ((active_watts*active_hours_dy)/1000),
                 kwh_standby_dy = ((standby_watts*standby_hours_dy)/1000),
                 kwh_dy = kwh_active_dy+kwh_standby_dy,
                 kwh_wk = ifelse(type == c("incandescent", "halogen", "cfl", "led", "computer", "monitor", "personal_printer","other_elec"), kwh_dy*5, 
                          ifelse(type == "glass",
                      ((`weight (lbs)`*value_wk)/2000)*kwh_per_ton,
                          ifelse(type == "aluminum",
                      ((`weight (lbs)`*value_wk)/2000)*kwh_per_ton, #this can be cleaned with an '%in%'
                          ifelse(type == "plastic",
                      ((`weight (lbs)`*value_wk)/2000)*kwh_per_ton,
                          ifelse(type == "cardboard",
                      ((`weight (lbs)`*value_wk)/2000)*kwh_per_ton,
                          ifelse(type == "paper",
                      ((`weight (lbs)`*value_wk)/2000)*kwh_per_ton, NA))))))) -> quant

quant %>% mutate(gallons_oil_wk = kwh_wk/43.9,
                 kg_CO2_wk = (((gallons_oil_wk/42)*0.43)*1000)) -> quant
#Psuedo-real example. Using goquestidy, which is aggregated data, to get net impact in a hypothetical year if all participants took survey at same time. Need to use goques with a loop over month/year of entry to get over-time calculations.

#Relevant Questions:
#Count of lightbulbs and kind of lightbulbs
#"All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs."
#-> If Yes, Will just assume they are all using 2 LEDs. If Action Needed, will just assume using 2 incandescents.

#Wattage of computer and monitor and personal printer and “other electronics”
#"My computer monitor is set to sleep after 10 minutes or less"
#-> If Yes, do (8 - assumed 6 active_hours_dy) + .166 (ten minutes) = active_hours_dy and standby_hours_dy = 0

#"I shut off my personal printer when not in use"
#-> If Yes, standby_hours_dy = 0

#Amount of recycling for each material
#"I recycle all bottles and cans that I come across in the office."
#-> If Yes, will just assume 1 plastic bottle /wk

#"I recycle all cardboard that I come across in the office."
#-> If Yes, will just assume 1 cardboard box / wk

#Amount of paper used
#"I recycle all office paper."
#-> If Yes, value_wk for type == "paper" stays same, ifelse, value_wk for type == "paper" is "0".

#Note that I still need to develop columns that captures usage independent of sustainable action. Number of pieces of paper used, bottles drunk, etc. Because of the large amount of assumptions I'm using here, I'm sticking with quantities that match the sustainability questions. For example, "I recycle all paper" the value_wk is pieces of paper recycled. Not making a claim on paper used by those who don't do this.
goques %>% filter(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs." |
                      question == "My computer monitor is set to sleep after 10 minutes or less." |
                      question == "I shut off my personal printer when not in use." |
                      question == "I recycle all bottles and cans that I come across in the office." |
                      question ==  "I recycle all cardboard that I come across in the office." |
                      question == "I recycle all office paper.") %>%
  select(`NU ID`, question, section, answer_1) -> goquant

#Adding columns from 'person' simulated data
goquant[c("type", "value", "value_wk", "active_watts", "standby_watts", "active_hours_dy", "standby_hours_dy")] <- NA

#lightbulbs
goquant %>% mutate(type = ifelse(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs." & answer_1 == "Yes", "led", 
            ifelse(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs." & answer_1 == "Action Needed", "incandescent", NA))) -> goquant

goquant %>% mutate(value = ifelse(type == "led", 2, 
                           ifelse(type == "incandescent", 2, type))) -> goquant

goquant %>% mutate(active_watts = ifelse(type == "led", 9, 
                                  ifelse(type == "incandescent", 60, active_watts))) -> goquant
goquant %>% mutate(standby_watts = ifelse(type == "led", 0, 
                                  ifelse(type == "incandescent", 0, standby_watts))) -> goquant

goquant %>% mutate(active_hours_dy = ifelse(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs.", 8, active_hours_dy)) -> goquant
goquant %>% mutate(standby_hours_dy = ifelse(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs.", 8- active_hours_dy, standby_hours_dy)) -> goquant

#computer monitor
goquant %>% mutate(type = ifelse(question == "My computer monitor is set to sleep after 10 minutes or less.","monitor", type)) -> goquant

goquant %>% mutate(active_watts = ifelse(type == "monitor", 45, active_watts)) -> goquant
goquant %>% mutate(standby_watts = ifelse(type == "monitor", 4, standby_watts)) -> goquant
goquant %>% mutate(active_hours_dy = ifelse(type == "monitor", 6, active_hours_dy)) -> goquant

goquant %>% mutate(standby_hours_dy = ifelse(type == "monitor" & answer_1 == "Yes", 0, 
                            ifelse(type == "monitor" & answer_1 == "Action Needed", 8-active_hours_dy, standby_hours_dy))) -> goquant

#personal printer (here need to account for whether they have one or not)
goquant %>% mutate(type = ifelse(question == "I shut off my personal printer when not in use.", "printer", type)) -> goquant

goquant %>% mutate(active_watts = ifelse(type == "printer" & answer_1 != "N/A", 40, 
                                  ifelse(type == "printer" & answer_1 == "N/A", 0, active_watts))) -> goquant
                   
goquant %>% mutate(standby_watts = ifelse(type == "printer" & answer_1 != "N/A", 4, 
                                  ifelse(type == "printer" & answer_1 == "N/A", 0, standby_watts))) -> goquant                   
goquant %>% mutate(active_hours_dy = ifelse(type == "printer" & answer_1 != "N/A", 1,
                                     ifelse(type == "printer" & answer_1 == "N/A", 0, active_hours_dy))) -> goquant

goquant %>% mutate(standby_hours_dy = ifelse(type == "printer" & answer_1 == "Yes",
                                             0, 
                   ifelse(type == "printer" & answer_1 == "Action Needed", 
                          8-active_hours_dy, 
                   ifelse(type == "printer" & answer_1 == "N/A", 0, standby_hours_dy)))) -> goquant

#Bottles and Cans Recycling
goquant %>% mutate(type = ifelse(question == "I recycle all bottles and cans that I come across in the office.", "plastic", type)) -> goquant

goquant %>% mutate(value_wk = ifelse(type == "plastic", 1, value_wk)) -> goquant

#Cardboard Recycling
goquant %>% mutate(type = ifelse(question == "I recycle all cardboard that I come across in the office.", "cardboard", type)) -> goquant

goquant %>% mutate(value_wk = ifelse(type == "cardboard", 1, value_wk)) -> goquant

#Paper Recycling
goquant %>% mutate(type = ifelse(question == "I recycle all office paper.", "paper", type)) -> goquant

goquant %>% mutate(value_wk = ifelse(type == "paper" & answer_1 == "Yes" , 25, value_wk)) -> goquant
quant <- coalesce_join(goquant, conv, by = 'type')

quant %>% mutate(kwh_active_dy = ((active_watts*active_hours_dy)/1000),
                 kwh_standby_dy = ((standby_watts*standby_hours_dy)/1000),
                 kwh_dy = kwh_active_dy+kwh_standby_dy,
                 kwh_wk = ifelse(type == c("incandescent", "led", "monitor","printer"), kwh_dy*5, NA)) -> quant

quant %>% mutate(kwh_wk = ifelse(type == "paper",
                      ((`weight (lbs)`*value_wk)/2000)*kwh_per_ton,
                          ifelse(type == "plastic",
                      ((`weight (lbs)`*value_wk)/2000)*kwh_per_ton, #this can be cleaned with an '%in%'
                          ifelse(type == "cardboard", 
                      ((`weight (lbs)`*value_wk)/2000)*kwh_per_ton, kwh_dy*5)))) -> quant

quant %>% mutate(gallons_oil_wk = kwh_wk/43.9,
                 kg_CO2_wk = (((gallons_oil_wk/42)*0.43)*1000)) -> quant

Quantifiying Environmental Impact of the Green Office Challenge.

The following quantifies the (theoretical) environmental impact of six questions from the Green Office Challenge. It is a combination of reasonable assumptions (e.g. watts for an average computer monitor) with Green Office survey data (e.g. number of people who say “Yes” to having their monitor set to sleep after 10 minutes). A number of conversions were then calculated to estimate the impact on Kilowatt Hours of Electricity, Gallons of Oil, and Kilograms of CO2 for a 48-week year

The survey data contains 483 participants and is subset to just their first survey. The resultant calculations are, in effect, the theoretical environmental impact over one year if all 483 participants took the survey at the same time.


Dataframes


Below is the dataframe (Table 1) for which all following visualizations are referenced. To dive deeper into how it was created, select “Show All Code” underneath the “Code” button in the top-right of this document.

Each row corresponds to a unique question and answer_1 combination. The other columns illustrate the various assumptions used in this project. For example, in the question, “All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs.”, when the column “answer_1” is “Yes” the column “type” has a value of “led” and when the column “answer_1” is “Action Needed”, the column “type” has a value of “incandescent”. Furthermore, the “value” column for those lightbulb types are both “2”, indicating that all other calculations for that question assumes a participant has either 2 incandescent or led lightbulbs in their workstation.

Several of the other questions also use similar simplyifing assumptions, given that the actual data is unknown. For example, in the question “I recycle all bottles and cans that I come across in the office.”, I assume the “type” of recycled object to be a plastic bottle and that participants who answered “Yes” recycle just 2 of these a week.

Gathering the actual information from all previous Green Office participants will allow much more precise and in-depth calculations for the questions in this project and several others from the larger Green Office Survey.

All other data assumptions (e.g. weight (lbs) per type of recyclable material) or conversions (e.g. kwh_wk to gallons_oil_wk) come from authoritative sources such as the EPA.

Table 1:


quant %>% select(-`NU ID`) -> quant_lite
quant_lite[!duplicated(quant_lite),]
NA
NA

Supplementing Table 1, this dataframe (Table 1) shows the counts of different answers for each of the six questions examined in this project. These counts were used to calculate the environmental impact of those who do take the reference action (i.e. “Yes”) and those who do not (i.e. “Action Needed”).

Table 2:


goquestidy %>% select(question, `Action Needed`, `N/A`, Yes) %>% ungroup() %>%
semi_join(., quant %>% select(question)) %>% filter(surveyorder==1) %>% select(-surveyorder)
NA


Technological Adjustment Questions and Visualizations


Three Technology-related questions from the Green Office Survey lend themselves to quantification:

  1. “All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs.”
  2. “I shut off my personal printer when not in use.”
  3. “My computer monitor is set to sleep after 10 minutes or less.”

The key assumptions for these questions are the Watts of electricity used when a device is active and in standby as well as the number of hours for each. This allows for the calculation of Kilowatt hours per day and by extension Kilowatt hours per week and per year. The exact assumptions used can be seen in Table 1, depending on the type of device and the answer given to the question.

Below is the visualized impact of each of these “Technological Adjustment” questions from the assumptions in Table 1 relative to the count of answer type from Table 2:


Recycling Questions and Visualizations


Three Recycling-related questions from the Green Office Survey lend themselves to quantification:

  1. “I recycle all bottles and cans that I come across in the office.”
  2. “I recycle all cardboard that I come across in the office.”
  3. “I recycle all office paper.”

The key assumptions for these questions are the type of recycled material and the quantity recycled in a week. The exact assumptions used can be seen in Table 1. Where type is “plastic”, “cardboard”, or "paper, weight (lbs) is based on EPA data for a 20 fl oz bottle, a 2’ by 2’ by 2’ cardboard box, 3mm, and a standard sheet of printer paper, respectively.

Below is the visualized impact of each of these “Recycling” questions from the assumptions in Table 1 relative to the count of answer type from Table 2:


Time Series


Below is a time series visualization of the environmental impact Green Office Participants made each week since the program began.

goques2 %>% filter(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs." |
                      question == "My computer monitor is set to sleep after 10 minutes or less." |
                      question == "I shut off my personal printer when not in use." |
                      question == "I recycle all bottles and cans that I come across in the office." |
                      question ==  "I recycle all cardboard that I come across in the office." |
                      question == "I recycle all office paper.") %>%
  select(`NU ID`, question, section, answer_1, date, month, year) -> goquant

#Adding columns from 'person' simulated data
goquant[c("type", "value", "value_wk", "active_watts", "standby_watts", "active_hours_dy", "standby_hours_dy")] <- NA

#lightbulbs
goquant %>% mutate(type = ifelse(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs." & answer_1 == "Yes", "led", 
            ifelse(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs." & answer_1 == "Action Needed", "incandescent", NA))) -> goquant

goquant %>% mutate(value = ifelse(type == "led", 2, 
                           ifelse(type == "incandescent", 2, type))) -> goquant

goquant %>% mutate(active_watts = ifelse(type == "led", 9, 
                                  ifelse(type == "incandescent", 60, active_watts))) -> goquant
goquant %>% mutate(standby_watts = ifelse(type == "led", 0, 
                                  ifelse(type == "incandescent", 0, standby_watts))) -> goquant

goquant %>% mutate(active_hours_dy = ifelse(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs.", 8, active_hours_dy)) -> goquant
goquant %>% mutate(standby_hours_dy = ifelse(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs.", 8- active_hours_dy, standby_hours_dy)) -> goquant

#computer monitor
goquant %>% mutate(type = ifelse(question == "My computer monitor is set to sleep after 10 minutes or less.","monitor", type)) -> goquant

goquant %>% mutate(active_watts = ifelse(type == "monitor", 45, active_watts)) -> goquant
goquant %>% mutate(standby_watts = ifelse(type == "monitor", 4, standby_watts)) -> goquant
goquant %>% mutate(active_hours_dy = ifelse(type == "monitor", 6, active_hours_dy)) -> goquant

goquant %>% mutate(standby_hours_dy = ifelse(type == "monitor" & answer_1 == "Yes", 0, 
                            ifelse(type == "monitor" & answer_1 == "Action Needed", 8-active_hours_dy, standby_hours_dy))) -> goquant

#personal printer (here need to account for whether they have one or not)
goquant %>% mutate(type = ifelse(question == "I shut off my personal printer when not in use.", "printer", type)) -> goquant

goquant %>% mutate(active_watts = ifelse(type == "printer" & answer_1 != "N/A", 40, 
                                  ifelse(type == "printer" & answer_1 == "N/A", 0, active_watts))) -> goquant
                   
goquant %>% mutate(standby_watts = ifelse(type == "printer" & answer_1 != "N/A", 4, 
                                  ifelse(type == "printer" & answer_1 == "N/A", 0, standby_watts))) -> goquant                   
goquant %>% mutate(active_hours_dy = ifelse(type == "printer" & answer_1 != "N/A", 1,
                                     ifelse(type == "printer" & answer_1 == "N/A", 0, active_hours_dy))) -> goquant

goquant %>% mutate(standby_hours_dy = ifelse(type == "printer" & answer_1 == "Yes",
                                             0, 
                   ifelse(type == "printer" & answer_1 == "Action Needed", 
                          8-active_hours_dy, 
                   ifelse(type == "printer" & answer_1 == "N/A", 0, standby_hours_dy)))) -> goquant

#Bottles and Cans Recycling
goquant %>% mutate(type = ifelse(question == "I recycle all bottles and cans that I come across in the office.", "plastic", type)) -> goquant

goquant %>% mutate(value_wk = ifelse(type == "plastic", 1, value_wk)) -> goquant

#Cardboard Recycling
goquant %>% mutate(type = ifelse(question == "I recycle all cardboard that I come across in the office.", "cardboard", type)) -> goquant

goquant %>% mutate(value_wk = ifelse(type == "cardboard", 1, value_wk)) -> goquant

#Paper Recycling
goquant %>% mutate(type = ifelse(question == "I recycle all office paper.", "paper", type)) -> goquant

goquant %>% mutate(value_wk = ifelse(type == "paper" & answer_1 == "Yes" , 25, value_wk)) -> goquant
quant <- coalesce_join(goquant, conv, by = 'type')

quant %>% mutate(kwh_active_dy = ((active_watts*active_hours_dy)/1000),
                 kwh_standby_dy = ((standby_watts*standby_hours_dy)/1000),
                 kwh_dy = kwh_active_dy+kwh_standby_dy,
                 kwh_wk = ifelse(type == c("incandescent", "led", "monitor","printer"), kwh_dy*5, NA)) -> quant

quant %>% mutate(kwh_wk = ifelse(type == "paper",
                      ((`weight (lbs)`*value_wk)/2000)*kwh_per_ton,
                          ifelse(type == "plastic",
                      ((`weight (lbs)`*value_wk)/2000)*kwh_per_ton, #this can be cleaned with an '%in%'
                          ifelse(type == "cardboard", 
                      ((`weight (lbs)`*value_wk)/2000)*kwh_per_ton, kwh_dy*5)))) -> quant

quant %>% mutate(gallons_oil_wk = kwh_wk/43.9,
                 kg_CO2_wk = (((gallons_oil_wk/42)*0.43)*1000)) -> quant

Conclusion

This project emphasizes the potential for the Green Office Challenge to be a major facet of Northeastern University’s Sustainability and Climate goals. The environmental impact of these six questions is significant even under conservative assumptions. By gathering more accurate data in place of these assumptions, we can demonstrate the impact of this program at the individual, department, and campus level across a much broader variety of environmental impacts. This information can play a central role in helping participants of the Green Office Challenge understand the impact of their efforts and commit to taking further action.

---
title: "Green Office Quantification Demo- Sage Gibbons"
output: 
  html_notebook: 
    toc: yes
    code_folding: hide
    fig_width: 1
    fig_height: 2
  html_document: 
    toc: yes
  pdf_document:
    toc: yes
    toc_depth: '3'
---

<style type="text/css">

body{ /* Normal  */
      font-size: 14px;
  }
td {  /* Table  */
  font-size: 12px;
}
h1.title {
  font-size: 18px;
}
h1 { /* Header 1 */
  font-size: 18px;
  color: DarkBlue;
}
h2 { /* Header 2 */
    font-size: 16px;
  color: DarkBlue;
}
}
h3 { /* Header 3 */
    font-size: 14px;
  color: DarkBlue;
}
code.r{ /* Code block */
    font-size: 12px;
}
pre { /* Code block - determines code spacing between lines */
    font-size: 14px;
}
</style>

# Code Set-Up


```{r Libraries and Data, echo=FALSE, warning=FALSE, message=FALSE, results='hide', comment=""}
library(tidyverse)
library(here)
library(dplyr)
library(readxl)
library(ggplot2)
library(lubridate)
library(lemon)
library(ggthemes)
library(tidytext)
library(stringr)

go <- read_xlsx(here("greenoffice_2016_fall2019.xlsx"), sheet=1) #Aggregated Survey Score Data

surv <- read_xlsx(here("GreenOfficeSurveys.xlsx")) #Individual Survey Response Data
```


```{r gosurv, echo=FALSE, message=FALSE, warning=FALSE, comment="", results='hide'}
#gosurv (master dataframe. Each row corresponds to a unique Survey ID)
####Cleaning, Joining, Cleaning####

#NAs, removing rows
surv <- surv[which(!is.na(surv$`I turn off office lights when rooms are unoccupied.`)),]

##Date Columns
#Convert character to POSIXct
surv$`Date Created` <- as.POSIXct(surv$`Date Created`, format= "%m/%d/%y")

#Creating date column (no time)
go <- go %>%mutate(date = format(`Date Created`, "%m/%d/%y"))
surv <- surv %>%mutate(date = format(`Date Created`, "%m/%d/%y"))

#Removing original Date column so go and surv can join properly
go <- go %>% select(-`Date Created`)
surv <- surv %>% select(-`Date Created`)

#setting certification levels
go$`Certification Level` <- factor(go$`Certification Level`, levels = c("BRONZE", "SILVER", "GOLD"))

#Joining
gosurv <- left_join(go, surv)

#There are 189 rows in surv not in go. Need to work with those to integrate.
surv_stragg <- anti_join(surv, go) #straggling data

##Adjusting Date Columns after Join
#converting back into POSIXct for easier manipulation later
gosurv$date <- as.POSIXct(gosurv$date, format= "%m/%d/%y")

#adding month and year columns
gosurv <- gosurv %>%
mutate(month = format(date, "%m"), year = format(date, "%Y"))

#Adding the "surveyorder" column and relocating. Values are 1-4 for the order a survey is for a given user by date.
gosurv %>% 
  group_by(`NU ID`) %>%
  arrange(`NU ID`, date) %>%
  mutate(surveyorder = row_number()) %>% 
  relocate(., date, .after =`Survey ID`) %>%
  relocate(., surveyorder, .after=`Survey ID`) %>%
  ungroup() -> gosurv

#renaming
gosurv <- gosurv %>% rename(Department = `Department Name`)
gosurv <- gosurv %>% rename(., c(Cert = `Certification Level`, Perc = Percentage))
gosurv <- gosurv %>% rename(., survey_ID = `Survey ID`)

#relocating
gosurv <- relocate(gosurv, month, .after= date)
gosurv <- relocate(gosurv, year, .after= month)

gosurv %>% 
  relocate(., Perc, .after = surveyorder) %>%
  relocate(., Cert, .after = Perc) %>%
  relocate(., `NU ID`, .before = Requestor) %>%
  select(-Certification) -> gosurv

#Creating answer counts per Survey ID
#note that I leaving out the eco-ambassador question
gosurv %>% mutate(cnt_yes = rowSums(gosurv[, c(16:57)] == 'Yes', na.rm = T),
                  cnt_actionneeded = rowSums(gosurv[, c(16:57)] == 'Action Needed', na.rm = T),
                  cnt_na = rowSums(gosurv[, c(16:57)] == 'N/A', na.rm = T),
                  cnt_no = rowSums(gosurv[, c(16:57)] == 'No', na.rm = T)) -> gosurv

gosurv$surveyorder <- as.character(gosurv$surveyorder)

```


```{r gotidy, echo=FALSE, warning=FALSE, message=FALSE, results='hide', comment=""}
#gotidy (Derived from gosurv. Each row corresponds to a unique NU ID (person). Useful for analyzing position and department changes in relation to answer counts and survey results.)

####Creating "gotidy" where every row corresponds to a unique `NU ID`####

#Widening Perc, Cert, and cnt_X columns columns to capture scores across recertifications.
gosurv %>%
  pivot_wider(., 
           id_cols=c(`NU ID`, surveyorder, Perc, 
                     Cert, cnt_yes, cnt_actionneeded, cnt_na, cnt_no), 
                     names_from=surveyorder,
                     values_from=c(Perc, Cert, cnt_yes, cnt_actionneeded, cnt_na, cnt_no)) -> gotidy

#Should be true, indicating we have a unique NU ID per row in gotidy
gosurv %>% filter(!duplicated(`NU ID`)) %>% nrow() == nrow(gotidy)

#Widening Positions and Departments to capture changes across recertifications
gosurv %>% #tracking positions and department changes over certs
  pivot_wider(., 
           id_cols=c(`NU ID`, surveyorder, Position, Department),
                       names_from=surveyorder,
                       values_from=c(Position, Department)) %>% 
  ungroup() %>% #critical
  #creating temporary TRUE/FALSE columns of positions and departments uniquely
  mutate(pos_change12 = ifelse(Position_1 != Position_2, "TRUE", "FALSE")) %>%
  mutate(dep_change12 = ifelse(Department_1 != Department_2, "TRUE", "FALSE")) %>%
  mutate(pos_change23 = ifelse(Position_2 != Position_3, "TRUE", "FALSE")) %>%
  mutate(dep_change23 = ifelse(Department_2 != Department_3, "TRUE", "FALSE")) %>%
  #combining position and department changes into four values for 1st to 2nd survey (posdep1_2) and 2nd to 3rd survey (posdep2_3). Survey 3 to 4 is left out because only one individual took it 4 times and did not change.
  mutate(posdep1_2 = ifelse(pos_change12 == "TRUE" & dep_change12 == "TRUE", "both",
                    ifelse(pos_change12 == "TRUE" & dep_change12 == "FALSE", "position_change",
                    ifelse(pos_change12 == "FALSE" & dep_change12 == "TRUE", "department_change",
                    ifelse(pos_change12 == "FALSE" & dep_change12 == "FALSE", "no_change", NA))))) %>%
  mutate(posdep2_3 = ifelse(pos_change23 == "TRUE" & dep_change23 == "TRUE", "both",
                    ifelse(pos_change23 == "TRUE" & dep_change23 == "FALSE", "position_change",
                    ifelse(pos_change23 == "FALSE" & dep_change23 == "TRUE", "department_change",
                    ifelse(pos_change23 == "FALSE" & dep_change23 == "FALSE", "no_change", NA))))) %>%
  #removing temporary TRUE/FALSE columms and relocating
  select(-c(pos_change12, dep_change12, pos_change23, dep_change23)) %>%
  relocate(., c(posdep1_2, posdep2_3), .before=Position_1) -> posdepchange

left_join(gotidy, posdepchange) -> gotidy


```


```{r goques, echo=FALSE, warning=FALSE, message=FALSE, results='hide', comment="" }
#goques (Derived from gosurv. Large, non-tidy dataframe useful as a starting place for dataframes that analyze questions, answers, certifications, across survey waves)
gosurv %>% pivot_longer(c(16:58), names_to="question", values_to="answer") %>%
  pivot_wider(., 
           id_cols=c(`NU ID`, surveyorder, question, answer), 
                       names_from=surveyorder,
                       values_from=c(answer),
                       names_prefix="answer_") -> goques

left_join(goques, gotidy) -> goques

#removing unnecessary columns (only one person took the survey four times and did not change department or position)
goques <- goques %>% select(-c(Perc_4, Cert_4, answer_4, Position_4, Department_4))

#Creating columns "actiontaken1_2" and "actiontaken3_4" which capture how answers to questions changed or stayed stable across recertifications 
goques %>%
  mutate(actiontaken1_2 = 
  ifelse(answer_1 == "Action Needed" & answer_2 == "Yes", "improved",
  ifelse(answer_1 == "Yes" & answer_2 == "Action Needed", "regressed",
  ifelse(answer_1 == "Yes" & answer_2 == "Yes", "yes_stable",
  ifelse(answer_1 == "Action Needed" & answer_2 == "Action Needed", "action_still_needed", 
  ifelse(answer_1 == "N/A" & answer_2 == "N/A", "not_applicable", NA)))))) %>%
  mutate(actiontaken2_3 =
  ifelse(answer_2 == "Action Needed" & answer_3 == "Yes", "improved",
  ifelse(answer_2 == "Yes" & answer_3 == "Action Needed", "regressed",
  ifelse(answer_2 == "Yes" & answer_3 == "Yes", "yes_stable",
  ifelse(answer_2 == "Action Needed" & answer_3 == "Action Needed", "action_still_needed", 
  ifelse(answer_2 == "N/A" & answer_3 == "N/A", "not_applicable", NA)))))) %>%
  relocate(., c(actiontaken1_2, actiontaken2_3), .after=answer_3) -> goques

#Adding question categories
goques %>% ungroup() %>% select(question) %>% filter(!duplicated(question)) %>%
  mutate(row = row_number()) %>%
  mutate(section = ifelse(between(row, 1, 11), "energy", 
                   ifelse(row == 12, "misc",
                   ifelse(between(row, 13,25), "waste",
                   ifelse(between(row, 26, 32), "recycling",
                   ifelse(between(row, 33, 35), "transportation",
                   ifelse(between(row, 36, 42), "purchasing",
                   ifelse(row == 43, "misc",NA)))))))) -> questions

questions %>% select(-row) %>% left_join(goques, .) %>% relocate(., section, .after=question) -> goques
```


```{r goquestidy, echo=FALSE, warning=FALSE, message=FALSE, results='hide', comment=""}
#goquestidy (Derived from goques. Tidy dataframe that organizes question-answer combos across surveywaves.

goques %>% group_by(question, answer_1) %>% count() %>% 
  pivot_wider(., 
          id_cols=c(question, answer_1),
          names_from=(answer_1),
          values_from=c(n)) %>% 
  mutate(surveyorder = "1") -> goques1

goques %>% group_by(question, answer_2) %>% filter(!is.na(answer_2)) %>% count() %>% 
  pivot_wider(., 
          id_cols=c(question, answer_2),
          names_from=(answer_2),
          values_from=c(n)) %>% 
  mutate(surveyorder = "2")-> goques2

goques %>% group_by(question, answer_3) %>% filter(!is.na(answer_3)) %>% count() %>% 
  pivot_wider(., 
          id_cols=c(question, answer_3),
          names_from=(answer_3),
          values_from=c(n)) %>% 
  mutate(surveyorder = "3")-> goques3

goquestidy <- full_join(goques1, goques2)

goquestidy <- full_join(goquestidy, goques3)

#Calculating question-answer combos as a proportion of people who took the survey in a given wave. Within a row, proportions are equivalent to counts, but across survey waves, they are normalizing in comparison to raw counts.

#Getting counts of people in each surveyorder wave.
gosurv %>% group_by(surveyorder) %>% count() -> surveyorderpeople
surveyorderpeople$surveyorder <- as.character(surveyorderpeople$surveyorder)

goquestidy %>% group_by(surveyorder) %>% 
  left_join(., surveyorderpeople) %>%
  mutate(an_prop = (round(((`Action Needed`/n)*100),2)),
         na_prop = (round(((`N/A`/n)*100),2)),
         yes_prop = (round(((Yes/n)*100),2)),
         no_prop = (round(((No/n)*100),2))) %>%
  select(-n) -> goquestidy

#Adding question sections
questions %>% select(-row) %>% left_join(goquestidy, .) %>% relocate(., section, .after=question) -> goquestidy

```


```{r goact, echo=FALSE, warning=FALSE, message=FALSE, results='hide', comment=""}
#goact (Derived from goques. An aggregation of goquestidy to create the latent construct "action_taken").

goques %>% filter(!is.na(actiontaken1_2)) %>% 
           group_by(question, actiontaken1_2) %>% 
           count() %>% 
  pivot_wider(., 
          id_cols=c(question, actiontaken1_2),
          names_from=(actiontaken1_2),
          values_from=c(n)) %>% 
  mutate(surveyorder = "1 to 2") -> goact1_2

goques %>% filter(!is.na(actiontaken2_3)) %>% 
           group_by(question, actiontaken2_3) %>% 
           count() %>% 
  pivot_wider(., 
          id_cols=c(question, actiontaken2_3),
          names_from=(actiontaken2_3),
          values_from=c(n)) %>% 
  mutate(surveyorder = "2 to 3") -> goact2_3

goact <- full_join(goact1_2,  goact2_3)

#Calculating question-answer combos as a proportion of people who took the survey across waves 1 to 2 and 2 to 3. Within a row, proportions are equivalent to counts, but across survey waves, they are normalizing in comparison to raw counts.

#Getting counts of people in each surveyorder wave and relabeling surveyorder values to better indicate it is a combination of 1 and 2 or 2 and 3.
gosurv %>% filter(surveyorder == 2 | surveyorder == 3) %>% 
  group_by(surveyorder) %>% 
  count() %>%
  mutate_at(vars(surveyorder), 
    list(~case_when(
      . == "2" ~ "1 to 2",
      . == "3"  ~ "2 to 3",
      TRUE ~ "NA"))) -> surveyorderpeople2

goact %>% group_by(surveyorder) %>% 
  left_join(., surveyorderpeople2) %>%
  mutate(asn_prop = (round(((action_still_needed/n)*100),2)),
         na_prop = (round(((not_applicable/n)*100),2)),
         reg_prop = (round(((regressed/n)*100),2)),
         yes_prop = (round(((yes_stable/n)*100),2))) %>%
  select(-n) -> goact

#Adding question sections
questions %>% select(-row) %>% left_join(goact, .) %>% relocate(., section, .after=question) -> goact

```


```{r goquescert, echo=FALSE, warning=FALSE, message=FALSE, results='hide', comment=""}
#goquescert (Derived from goques. A version of goquestidy that disaggregates answer counts by certification for comparison within and across survey waves. Note that there are only 4 "bronze" certification levels in surveyorder 2).

#Count of Cert_1 levels Keeping ques column for later join.
goques %>% group_by(question, Cert_1) %>% count() %>% rename(cert_cnt = n) -> surv1prop
 
#Count of answers & Cert_1 levels for each question, divided by total certifications for proportion.
goques %>% group_by(question, answer_1, Cert_1) %>% count() %>% rename(answercert_cnt = n) %>%
  left_join(surv1prop) %>% mutate(certprop = (round(((answercert_cnt/cert_cnt)*100),2))) %>% 
  rename(answer = answer_1, Cert = Cert_1) %>% mutate(surveyorder = "1") -> ques1

#Count of Cert_2 levels Keeping ques column for later join.
goques %>% group_by(question, Cert_2) %>% count() %>% rename(cert_cnt = n) -> surv2prop
 
#Count of answers & Cert_2 levels for each question, divided by total certifications for proportion.
goques %>% group_by(question, answer_2, Cert_2) %>% count() %>% rename(answercert_cnt = n) %>%
  left_join(surv2prop) %>% mutate(certprop = (round(((answercert_cnt/cert_cnt)*100),2))) %>% 
  rename(answer = answer_2, Cert = Cert_2) %>% mutate(surveyorder = "2") %>% filter(!is.na(Cert)) -> ques2


#Ranking ques1 and ques2 question-answer proportions by cert level
####ques1 ranking####
ques1 %>% 
  ungroup() %>% filter(Cert == "BRONZE") %>%
    group_by(answer) %>%
    mutate(qarank = order(order(answer, certprop, decreasing=TRUE))) %>%
    arrange(answer, qarank) -> bronzerank1

ques1 %>%
  ungroup() %>% filter(Cert == "SILVER") %>%
    group_by(answer) %>%
    mutate(qarank = order(order(answer, certprop, decreasing=TRUE))) %>%
    arrange(answer, qarank) -> silverrank1

ques1 %>%
  ungroup() %>% filter(Cert == "GOLD") %>%
    group_by(answer) %>%
    mutate(qarank = order(order(answer, certprop, decreasing=TRUE))) %>%
    arrange(answer, qarank)-> goldrank1

full_join(bronzerank1, silverrank1) %>% full_join(., goldrank1) -> ques1

####ques2 ranking####
ques2 %>% 
  ungroup() %>% filter(Cert == "BRONZE") %>%
    group_by(answer) %>%
    mutate(qarank = order(order(answer, certprop, decreasing=TRUE))) %>%
    arrange(answer, qarank) -> bronzerank2

ques2 %>%
  ungroup() %>% filter(Cert == "SILVER") %>%
    group_by(answer) %>%
    mutate(qarank = order(order(answer, certprop, decreasing=TRUE))) %>%
    arrange(answer, qarank) -> silverrank2

ques2 %>%
  ungroup() %>% filter(Cert == "GOLD") %>%
    group_by(answer) %>%
    mutate(qarank = order(order(answer, certprop, decreasing=TRUE))) %>%
    arrange(answer, qarank)-> goldrank2

full_join(bronzerank2, silverrank2) %>% full_join(., goldrank2) -> ques2

####Joining####
full_join(ques1, ques2) -> ques1_2

#Adding question sections
questions %>% select(-row) %>% left_join(ques1_2, .) %>% relocate(., section, .after=question) -> goquescert

```



```{r coalesce_join, error=FALSE, echo=FALSE, warning=FALSE, message=FALSE,  comment=""}
coalesce_join <- function(x, y, 
                          by = NULL, suffix = c(".x", ".y"), 
                          join = dplyr::full_join, ...) {
    joined <- join(x, y, by = by, suffix = suffix, ...)
    # names of desired output
    cols <- union(names(x), names(y))
    
    to_coalesce <- names(joined)[!names(joined) %in% cols]
    suffix_used <- suffix[ifelse(endsWith(to_coalesce, suffix[1]), 1, 2)]
    # remove suffixes and deduplicate
    to_coalesce <- unique(substr(
        to_coalesce, 
        1, 
        nchar(to_coalesce) - nchar(suffix_used)
    ))
    
    coalesced <- purrr::map_dfc(to_coalesce, ~dplyr::coalesce(
        joined[[paste0(.x, suffix[1])]], 
        joined[[paste0(.x, suffix[2])]]
    ))
    names(coalesced) <- to_coalesce
    
    dplyr::bind_cols(joined, coalesced)[cols]
}
```

```{r Pseudo-example, echo=TRUE, warning=FALSE, message=FALSE,  comment=""}

#Example: 'person' is simulated data, conv stores some basic known values we want to use for quantification conversions, rest is done with mutate.

person <- read_xlsx(here("GO_Conversions.xlsx"), sheet=3)
conv <- read_xlsx(here("GO_Conversions.xlsx"), sheet=2)

quant <- coalesce_join(person, conv, by = 'type')

#Conversion Mutations

quant %>% mutate(standby_hours_dy = 8 - active_hours_dy,
                 kwh_active_dy = ((active_watts*active_hours_dy)/1000),
                 kwh_standby_dy = ((standby_watts*standby_hours_dy)/1000),
                 kwh_dy = kwh_active_dy+kwh_standby_dy,
                 kwh_wk = ifelse(type == c("incandescent", "halogen", "cfl", "led", "computer", "monitor", "personal_printer","other_elec"), kwh_dy*5, 
                          ifelse(type == "glass",
                      ((`weight (lbs)`*value_wk)/2000)*kwh_per_ton,
                          ifelse(type == "aluminum",
                      ((`weight (lbs)`*value_wk)/2000)*kwh_per_ton, #this can be cleaned with an '%in%'
                          ifelse(type == "plastic",
                      ((`weight (lbs)`*value_wk)/2000)*kwh_per_ton,
                          ifelse(type == "cardboard",
                      ((`weight (lbs)`*value_wk)/2000)*kwh_per_ton,
                          ifelse(type == "paper",
                      ((`weight (lbs)`*value_wk)/2000)*kwh_per_ton, NA))))))) -> quant

quant %>% mutate(gallons_oil_wk = kwh_wk/43.9,
                 kg_CO2_wk = (((gallons_oil_wk/42)*0.43)*1000)) -> quant



```

```{r Assumption Notes, echo=TRUE, warning=FALSE, message=FALSE,  comment=""}
#Psuedo-real example. Using goquestidy, which is aggregated data, to get net impact in a hypothetical year if all participants took survey at same time. Need to use goques with a loop over month/year of entry to get over-time calculations.

#Relevant Questions:
#Count of lightbulbs and kind of lightbulbs
#"All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs."
#-> If Yes, Will just assume they are all using 2 LEDs. If Action Needed, will just assume using 2 incandescents.

#Wattage of computer and monitor and personal printer and “other electronics”
#"My computer monitor is set to sleep after 10 minutes or less"
#-> If Yes, do (8 - assumed 6 active_hours_dy) + .166 (ten minutes) = active_hours_dy and standby_hours_dy = 0

#"I shut off my personal printer when not in use"
#-> If Yes, standby_hours_dy = 0

#Amount of recycling for each material
#"I recycle all bottles and cans that I come across in the office."
#-> If Yes, will just assume 1 plastic bottle /wk

#"I recycle all cardboard that I come across in the office."
#-> If Yes, will just assume 1 cardboard box / wk

#Amount of paper used
#"I recycle all office paper."
#-> If Yes, value_wk for type == "paper" stays same, ifelse, value_wk for type == "paper" is "0".

#Note that I still need to develop columns that captures usage independent of sustainable action. Number of pieces of paper used, bottles drunk, etc. Because of the large amount of assumptions I'm using here, I'm sticking with quantities that match the sustainability questions. For example, "I recycle all paper" the value_wk is pieces of paper recycled. Not making a claim on paper used by those who don't do this.
```

```{r Wrangling, echo=TRUE, warning=FALSE, message=FALSE,  comment=""}
goques %>% filter(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs." |
                      question == "My computer monitor is set to sleep after 10 minutes or less." |
                      question == "I shut off my personal printer when not in use." |
                      question == "I recycle all bottles and cans that I come across in the office." |
                      question ==  "I recycle all cardboard that I come across in the office." |
                      question == "I recycle all office paper.") %>%
  select(`NU ID`, question, section, answer_1) -> goquant

#Adding columns from 'person' simulated data
goquant[c("type", "value", "value_wk", "active_watts", "standby_watts", "active_hours_dy", "standby_hours_dy")] <- NA

#lightbulbs
goquant %>% mutate(type = ifelse(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs." & answer_1 == "Yes", "led", 
            ifelse(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs." & answer_1 == "Action Needed", "incandescent", NA))) -> goquant

goquant %>% mutate(value = ifelse(type == "led", 2, 
                           ifelse(type == "incandescent", 2, type))) -> goquant

goquant %>% mutate(active_watts = ifelse(type == "led", 9, 
                                  ifelse(type == "incandescent", 60, active_watts))) -> goquant
goquant %>% mutate(standby_watts = ifelse(type == "led", 0, 
                                  ifelse(type == "incandescent", 0, standby_watts))) -> goquant

goquant %>% mutate(active_hours_dy = ifelse(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs.", 8, active_hours_dy)) -> goquant
goquant %>% mutate(standby_hours_dy = ifelse(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs.", 8- active_hours_dy, standby_hours_dy)) -> goquant

#computer monitor
goquant %>% mutate(type = ifelse(question == "My computer monitor is set to sleep after 10 minutes or less.","monitor", type)) -> goquant

goquant %>% mutate(active_watts = ifelse(type == "monitor", 45, active_watts)) -> goquant
goquant %>% mutate(standby_watts = ifelse(type == "monitor", 4, standby_watts)) -> goquant
goquant %>% mutate(active_hours_dy = ifelse(type == "monitor", 6, active_hours_dy)) -> goquant

goquant %>% mutate(standby_hours_dy = ifelse(type == "monitor" & answer_1 == "Yes", 0, 
                            ifelse(type == "monitor" & answer_1 == "Action Needed", 8-active_hours_dy, standby_hours_dy))) -> goquant

#personal printer (here need to account for whether they have one or not)
goquant %>% mutate(type = ifelse(question == "I shut off my personal printer when not in use.", "printer", type)) -> goquant

goquant %>% mutate(active_watts = ifelse(type == "printer" & answer_1 != "N/A", 40, 
                                  ifelse(type == "printer" & answer_1 == "N/A", 0, active_watts))) -> goquant
                   
goquant %>% mutate(standby_watts = ifelse(type == "printer" & answer_1 != "N/A", 4, 
                                  ifelse(type == "printer" & answer_1 == "N/A", 0, standby_watts))) -> goquant                   
goquant %>% mutate(active_hours_dy = ifelse(type == "printer" & answer_1 != "N/A", 1,
                                     ifelse(type == "printer" & answer_1 == "N/A", 0, active_hours_dy))) -> goquant

goquant %>% mutate(standby_hours_dy = ifelse(type == "printer" & answer_1 == "Yes",
                                             0, 
                   ifelse(type == "printer" & answer_1 == "Action Needed", 
                          8-active_hours_dy, 
                   ifelse(type == "printer" & answer_1 == "N/A", 0, standby_hours_dy)))) -> goquant

#Bottles and Cans Recycling
goquant %>% mutate(type = ifelse(question == "I recycle all bottles and cans that I come across in the office.", "plastic", type)) -> goquant

goquant %>% mutate(value_wk = ifelse(type == "plastic", 1, value_wk)) -> goquant

#Cardboard Recycling
goquant %>% mutate(type = ifelse(question == "I recycle all cardboard that I come across in the office.", "cardboard", type)) -> goquant

goquant %>% mutate(value_wk = ifelse(type == "cardboard", 1, value_wk)) -> goquant

#Paper Recycling
goquant %>% mutate(type = ifelse(question == "I recycle all office paper.", "paper", type)) -> goquant

goquant %>% mutate(value_wk = ifelse(type == "paper" & answer_1 == "Yes" , 25, value_wk)) -> goquant

```

```{r Conversions, echo=TRUE, warning=FALSE, message=FALSE,  comment=""}
quant <- coalesce_join(goquant, conv, by = 'type')

quant %>% mutate(kwh_active_dy = ((active_watts*active_hours_dy)/1000),
                 kwh_standby_dy = ((standby_watts*standby_hours_dy)/1000),
                 kwh_dy = kwh_active_dy+kwh_standby_dy,
                 kwh_wk = ifelse(type == c("incandescent", "led", "monitor","printer"), kwh_dy*5, NA)) -> quant

quant %>% mutate(kwh_wk = ifelse(type == "paper",
                      ((`weight (lbs)`*value_wk)/2000)*kwh_per_ton,
                          ifelse(type == "plastic",
                      ((`weight (lbs)`*value_wk)/2000)*kwh_per_ton, #this can be cleaned with an '%in%'
                          ifelse(type == "cardboard", 
                      ((`weight (lbs)`*value_wk)/2000)*kwh_per_ton, kwh_dy*5)))) -> quant

quant %>% mutate(gallons_oil_wk = kwh_wk/43.9,
                 kg_CO2_wk = (((gallons_oil_wk/42)*0.43)*1000)) -> quant
```

```{r #Aggregation, echo=FALSE, fig.show='hide', warning=FALSE, message=FALSE,  comment=""}
#Aggregation
quant %>% group_by(question, answer_1) %>%
  summarize(kwh_yr_sum = round((sum(kwh_wk)*48),2),
            kwh_yr_mn = mean(kwh_wk)*48,
            gallons_oil_yr_sum = round((sum(gallons_oil_wk)*48),2),
            gallons_oil_yr_mn = round((mean(gallons_oil_wk)*48),2),
            kg_CO2_yr_sum = round((sum(kg_CO2_wk)*48),2),
            kg_CO2_yr_mn = round((mean(kg_CO2_wk)*48),2)) -> quantagg

quantagg %>% pivot_longer(c(3:8), names_to="metric", values_to="value") -> quantagg
```

# Quantifiying Environmental Impact of the Green Office Challenge.

The following quantifies the (theoretical) environmental impact of six questions from the Green Office Challenge. It is a combination of reasonable assumptions (e.g. watts for an average computer monitor) with Green Office survey data (e.g. number of people who say "Yes" to having their monitor set to sleep after 10 minutes). A number of conversions were then calculated to estimate the impact on Kilowatt Hours of Electricity, Gallons of Oil, and Kilograms of CO2 for a 48-week year

The survey data contains 483 participants and is subset to just their first survey. The resultant calculations are, in effect, the theoretical environmental impact over one year if all 483 participants took the survey at the same time.

\


## Dataframes
\

Below is the dataframe (Table 1) for which all following visualizations are referenced. To dive deeper into how it was created, select "Show All Code" underneath the "Code" button in the top-right of this document. 

Each row corresponds to a unique question and answer_1 combination. The other columns illustrate the various assumptions used in this project. For example, in the question, "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs.", when the column "answer_1" is "Yes" the column "type" has a value of "led" and when the column "answer_1" is "Action Needed", the column "type" has a value of "incandescent". Furthermore, the "value" column for those lightbulb types are both "2", indicating that all other calculations for that question assumes a participant has either 2 incandescent or led lightbulbs in their workstation.

Several of the other questions also use similar simplyifing assumptions, given that the actual data is unknown. For example, in the question "I recycle all bottles and cans that I come across in the office.", I assume the "type" of recycled object to be a plastic bottle and that participants who answered "Yes" recycle just 2 of these a week.

Gathering the actual information from all previous Green Office participants will allow much more precise and in-depth calculations for the questions in this project and several others from the larger Green Office Survey. 

All other data assumptions (e.g. weight (lbs) per type of recyclable material) or conversions (e.g. kwh_wk to gallons_oil_wk) come from authoritative sources such as the EPA.

Table 1:
```{r quant, warning=FALSE, message=FALSE,}

quant %>% select(-`NU ID`) -> quant_lite
quant_lite[!duplicated(quant_lite),]


```

Supplementing Table 1, this dataframe (Table 1) shows the counts of different answers for each of the six questions examined in this project. These counts were used to calculate the environmental impact of those who do take the reference action (i.e. "Yes") and those who do not (i.e. "Action Needed"). 

Table 2:
```{r quant answer count, warning=FALSE, message=FALSE}

goquestidy %>% select(question, `Action Needed`, `N/A`, Yes) %>% ungroup() %>%
semi_join(., quant %>% select(question)) %>% filter(surveyorder==1) %>% select(-surveyorder)

```

\


# Technological Adjustment Questions and Visualizations
\

Three Technology-related questions from the Green Office Survey lend themselves to quantification:

1. "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs."
2. "I shut off my personal printer when not in use."
3. "My computer monitor is set to sleep after 10 minutes or less."

The key assumptions for these questions are the Watts of electricity used when a device is active and in standby as well as the number of hours for each. This allows for the calculation of Kilowatt hours per day and by extension Kilowatt hours per week and per year. The exact assumptions used can be seen in Table 1, depending on the type of device and the answer given to the question.

Below is the visualized impact of each of these "Technological Adjustment" questions from the assumptions in Table 1 relative to the count of answer type from Table 2:


```{r Tech vix exp, echo=FALSE, error=FALSE, message=FALSE, warning=FALSE, fig.height= 5, fig.width=10, dpi=96, eval=FALSE}
#Faceted
quantagg %>% ungroup() %>% mutate(ques = str_trunc(quantagg$question, 80, "right", ellipsis = "-")) %>% filter(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs." |
                      question == "My computer monitor is set to sleep after 10 minutes or less." |
                      question == "I shut off my personal printer when not in use.") %>%
  filter(answer_1!="N/A") %>%
  filter(metric == "kwh_yr_mn" |metric ==  "gallons_oil_yr_mn" |metric ==  "kg_CO2_yr_mn") %>% 
  ggplot(., aes(value, ques, fill=answer_1)) + geom_col(position="dodge")+
  facet_rep_wrap(~metric,scales="free_x",nrow=2) +
geom_text(
    aes(label = value), 
    hjust = 1, size = 8,
    position = position_dodge(width = 1),
    inherit.aes = TRUE)+ 
  labs(title = "Mean Kilowatt Hours/Year by Tech. Adjustment")+
       xlab("Mean Kilowatt Hours/Year")+
       ylab("")+
theme(axis.title.x = element_text(size = rel(2.8), angle = 00)) +
theme(axis.title.y = element_text(size = rel(2.8), angle = 00)) +
theme(plot.title = element_text(lineheight=3, face="bold", color="black", size=25)) +
theme(axis.text.y = element_text(angle = 00, hjust = 1, size= rel(2.8))) +
theme(axis.text.x = element_text(angle = 00, hjust = 1, size= rel(2.8)))


```

```{r Tech, echo=FALSE, error=FALSE, message=FALSE, warning=FALSE, fig.height= 5, fig.width=10, dpi=96}
quantagg %>% ungroup() %>% mutate(ques = str_trunc(quantagg$question, 80, "right", ellipsis = "-")) %>% filter(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs." |
                      question == "My computer monitor is set to sleep after 10 minutes or less." |
                      question == "I shut off my personal printer when not in use.") %>%
  filter(answer_1!="N/A") %>%
  filter(metric == "kwh_yr_mn") %>% ggplot(., aes(value, ques, fill=answer_1)) + geom_col(position="dodge")+
geom_text(
    aes(label = value), 
    hjust = 1, size = 8,
    position = position_dodge(width = 1),
    inherit.aes = TRUE)+ 
  labs(title = "Mean Kilowatt Hours/Year by Tech. Adjustment")+
       xlab("Mean Kilowatt Hours/Year")+
       ylab("")+
theme(axis.title.x = element_text(size = rel(2.8), angle = 00)) +
theme(axis.title.y = element_text(size = rel(2.8), angle = 00)) +
theme(plot.title = element_text(lineheight=3, face="bold", color="black", size=25)) +
theme(axis.text.y = element_text(angle = 00, hjust = 1, size= rel(2.8))) +
theme(axis.text.x = element_text(angle = 00, hjust = 1, size= rel(2.8)))

quantagg %>% ungroup() %>% mutate(ques = str_trunc(quantagg$question, 80, "right", ellipsis = "-")) %>% filter(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs." |
                      question == "My computer monitor is set to sleep after 10 minutes or less." |
                      question == "I shut off my personal printer when not in use.") %>%
  filter(answer_1!="N/A") %>%
  filter(metric == "gallons_oil_yr_mn") %>% ggplot(., aes(value, ques, fill=answer_1)) + geom_col(position="dodge")+
geom_text(
    aes(label = value), 
    hjust = 1, size = 8,
    position = position_dodge(width = 1),
    inherit.aes = TRUE)+ 
  labs(title = "Mean Gallons of Oil/Year by Tech. Adjustment")+
       xlab("Mean Gallons of Oil/Year")+
       ylab("")+
theme(axis.title.x = element_text(size = rel(2.8), angle = 00)) +
theme(axis.title.y = element_text(size = rel(2.8), angle = 00)) +
theme(plot.title = element_text(lineheight=3, face="bold", color="black", size=25)) +
theme(axis.text.y = element_text(angle = 00, hjust = 1, size= rel(2.8))) +
theme(axis.text.x = element_text(angle = 00, hjust = 1, size= rel(2.8)))

quantagg %>% ungroup() %>% mutate(ques = str_trunc(quantagg$question, 80, "right", ellipsis = "-")) %>% filter(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs." |
                      question == "My computer monitor is set to sleep after 10 minutes or less." |
                      question == "I shut off my personal printer when not in use.") %>%
  filter(answer_1!="N/A") %>%
  filter(metric == "kg_CO2_yr_mn") %>% ggplot(., aes(value, ques, fill=answer_1)) + geom_col(position="dodge")+
geom_text(
    aes(label = value), 
    hjust = 1, size = 8,
    position = position_dodge(width = 1),
    inherit.aes = TRUE)+ 
  labs(title = "Mean Kilograms of CO2/Year by Tech. Adjustment")+
       xlab("Mean Kilograms of CO2/Year")+
       ylab("")+
theme(axis.title.x = element_text(size = rel(2.8), angle = 00)) +
theme(axis.title.y = element_text(size = rel(2.8), angle = 00)) +
theme(plot.title = element_text(lineheight=3, face="bold", color="black", size=25)) +
theme(axis.text.y = element_text(angle = 00, hjust = 1, size= rel(2.8))) +
theme(axis.text.x = element_text(angle = 00, hjust = 1, size= rel(2.8)))
```

\


# Recycling Questions and Visualizations
\

Three Recycling-related questions from the Green Office Survey lend themselves to quantification:

1. "I recycle all bottles and cans that I come across in the office."
2. "I recycle all cardboard that I come across in the office."
3. "I recycle all office paper."

The key assumptions for these questions are the type of recycled material and the quantity recycled in a week. The exact assumptions used can be seen in Table 1. Where type is "plastic", "cardboard", or "paper, weight (lbs) is based on EPA data for a 20 fl oz bottle, a 2' by 2' by 2' cardboard box, 3mm, and a standard sheet of printer paper, respectively.

Below is the visualized impact of each of these "Recycling" questions from the assumptions in Table 1 relative to the count of answer type from Table 2:

```{r Recycling, echo=FALSE, error=FALSE, message=FALSE, warning=FALSE, fig.height= 5, fig.width=10, dpi=96}
#Recycling
quantagg %>% ungroup() %>% mutate(ques = str_trunc(quantagg$question, 80, "right", ellipsis = "-")) %>% filter(question == "I recycle all bottles and cans that I come across in the office." |
                      question ==  "I recycle all cardboard that I come across in the office." |
                      question == "I recycle all office paper.") %>%
  filter(answer_1=="Yes") %>%
  filter(metric == "kwh_yr_sum") %>% ggplot(., aes(value, ques, fill=answer_1)) + geom_col(position="dodge")+
geom_text(
    aes(label = value), 
    hjust = 1, size = 7,
    position = position_dodge(width = 1),
    inherit.aes = TRUE)+ 
  labs(title = "Sum Kilowatt Hours/Year Saved by Recycling")+
       xlab("Sum Kilowatt Hours/Year")+
       ylab("")+
theme(axis.title.x = element_text(size = rel(2.8), angle = 00)) +
theme(axis.title.y = element_text(size = rel(2.8), angle = 00)) +
theme(plot.title = element_text(lineheight=3, face="bold", color="black", size=25)) +
theme(axis.text.y = element_text(angle = 00, hjust = 1, size= rel(2.8))) +
theme(axis.text.x = element_text(angle = 00, hjust = 1, size= rel(2.8)))

quantagg %>% ungroup() %>% mutate(ques = str_trunc(quantagg$question, 80, "right", ellipsis = "-")) %>% filter(question == "I recycle all bottles and cans that I come across in the office." |
                      question ==  "I recycle all cardboard that I come across in the office." |
                      question == "I recycle all office paper.") %>%
  filter(answer_1=="Yes") %>%
  filter(metric == "gallons_oil_yr_sum") %>% ggplot(., aes(value, ques, fill=answer_1)) + geom_col(position="dodge")+
geom_text(
    aes(label = value), 
    hjust = 1, size = 7,
    position = position_dodge(width = 1),
    inherit.aes = TRUE)+ 
  labs(title = "Sum Gallons of Oil/Year Saved by Recycling")+
       xlab("Sum Gallons of Oil/Year")+
       ylab("")+
theme(axis.title.x = element_text(size = rel(2.8), angle = 00)) +
theme(axis.title.y = element_text(size = rel(2.8), angle = 00)) +
theme(plot.title = element_text(lineheight=3, face="bold", color="black", size=25)) +
theme(axis.text.y = element_text(angle = 00, hjust = 1, size= rel(2.8))) +
theme(axis.text.x = element_text(angle = 00, hjust = 1, size= rel(2.8)))

quantagg %>% ungroup() %>% mutate(ques = str_trunc(quantagg$question, 80, "right", ellipsis = "-")) %>% filter(question == "I recycle all bottles and cans that I come across in the office." |
                      question ==  "I recycle all cardboard that I come across in the office." |
                      question == "I recycle all office paper.") %>%
  filter(answer_1=="Yes") %>%
  filter(metric == "kg_CO2_yr_sum") %>% ggplot(., aes(value, ques, fill=answer_1)) + geom_col(position="dodge")+
geom_text(
    aes(label = value), 
    hjust = 1, size = 7,
    position = position_dodge(width = 1),
    inherit.aes = TRUE)+ 
  labs(title = "Sum Kilograms of CO2/Year Saved by Recycling")+
       xlab("Sum Kilograms of CO2/Year")+
       ylab("")+
theme(axis.title.x = element_text(size = rel(2.8), angle = 00)) +
theme(axis.title.y = element_text(size = rel(2.8), angle = 00)) +
theme(plot.title = element_text(lineheight=3, face="bold", color="black", size=25)) +
theme(axis.text.y = element_text(angle = 00, hjust = 1, size= rel(2.8))) +
theme(axis.text.x = element_text(angle = 00, hjust = 1, size= rel(2.8)))
```

\


# Time Series
\

Below is a time series visualization of the environmental impact Green Office Participants made each week since the program began.

```{r, echo=FALSE, warning=FALSE, message=FALSE, results='hide', comment=""}
#Creating an experimental goques2 with date variables
gosurv %>% pivot_longer(c(16:58), names_to="question", values_to="answer") %>%
  pivot_wider(., 
           id_cols=c(`NU ID`, surveyorder, question, answer, date, month, year), 
                       names_from=surveyorder,
                       values_from=c(answer),
                       names_prefix="answer_") -> goques2

left_join(goques2, gotidy) -> goques2

goques2 <- goques2 %>% select(-c(Perc_4, Cert_4, answer_4, Position_4, Department_4))

#Creating columns "actiontaken1_2" and "actiontaken3_4" which capture how answers to questions changed or stayed stable across recertifications 
goques2 %>%
  mutate(actiontaken1_2 = 
  ifelse(answer_1 == "Action Needed" & answer_2 == "Yes", "improved",
  ifelse(answer_1 == "Yes" & answer_2 == "Action Needed", "regressed",
  ifelse(answer_1 == "Yes" & answer_2 == "Yes", "yes_stable",
  ifelse(answer_1 == "Action Needed" & answer_2 == "Action Needed", "action_still_needed", 
  ifelse(answer_1 == "N/A" & answer_2 == "N/A", "not_applicable", NA)))))) %>%
  mutate(actiontaken2_3 =
  ifelse(answer_2 == "Action Needed" & answer_3 == "Yes", "improved",
  ifelse(answer_2 == "Yes" & answer_3 == "Action Needed", "regressed",
  ifelse(answer_2 == "Yes" & answer_3 == "Yes", "yes_stable",
  ifelse(answer_2 == "Action Needed" & answer_3 == "Action Needed", "action_still_needed", 
  ifelse(answer_2 == "N/A" & answer_3 == "N/A", "not_applicable", NA)))))) %>%
  relocate(., c(actiontaken1_2, actiontaken2_3), .after=answer_3) -> goques2

#Adding question categories
goques2 %>% ungroup() %>% select(question) %>% filter(!duplicated(question)) %>%
  mutate(row = row_number()) %>%
  mutate(section = ifelse(between(row, 1, 11), "energy", 
                   ifelse(row == 12, "misc",
                   ifelse(between(row, 13,25), "waste",
                   ifelse(between(row, 26, 32), "recycling",
                   ifelse(between(row, 33, 35), "transportation",
                   ifelse(between(row, 36, 42), "purchasing",
                   ifelse(row == 43, "misc",NA)))))))) -> questions

questions %>% select(-row) %>% left_join(goques2, .) %>% relocate(., section, .after=question) -> goques2


```

```{r Wrangling2, echo=TRUE, warning=FALSE, message=FALSE,  comment=""}
goques2 %>% filter(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs." |
                      question == "My computer monitor is set to sleep after 10 minutes or less." |
                      question == "I shut off my personal printer when not in use." |
                      question == "I recycle all bottles and cans that I come across in the office." |
                      question ==  "I recycle all cardboard that I come across in the office." |
                      question == "I recycle all office paper.") %>%
  select(`NU ID`, question, section, answer_1, date, month, year) -> goquant

#Adding columns from 'person' simulated data
goquant[c("type", "value", "value_wk", "active_watts", "standby_watts", "active_hours_dy", "standby_hours_dy")] <- NA

#lightbulbs
goquant %>% mutate(type = ifelse(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs." & answer_1 == "Yes", "led", 
            ifelse(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs." & answer_1 == "Action Needed", "incandescent", NA))) -> goquant

goquant %>% mutate(value = ifelse(type == "led", 2, 
                           ifelse(type == "incandescent", 2, type))) -> goquant

goquant %>% mutate(active_watts = ifelse(type == "led", 9, 
                                  ifelse(type == "incandescent", 60, active_watts))) -> goquant
goquant %>% mutate(standby_watts = ifelse(type == "led", 0, 
                                  ifelse(type == "incandescent", 0, standby_watts))) -> goquant

goquant %>% mutate(active_hours_dy = ifelse(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs.", 8, active_hours_dy)) -> goquant
goquant %>% mutate(standby_hours_dy = ifelse(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs.", 8- active_hours_dy, standby_hours_dy)) -> goquant

#computer monitor
goquant %>% mutate(type = ifelse(question == "My computer monitor is set to sleep after 10 minutes or less.","monitor", type)) -> goquant

goquant %>% mutate(active_watts = ifelse(type == "monitor", 45, active_watts)) -> goquant
goquant %>% mutate(standby_watts = ifelse(type == "monitor", 4, standby_watts)) -> goquant
goquant %>% mutate(active_hours_dy = ifelse(type == "monitor", 6, active_hours_dy)) -> goquant

goquant %>% mutate(standby_hours_dy = ifelse(type == "monitor" & answer_1 == "Yes", 0, 
                            ifelse(type == "monitor" & answer_1 == "Action Needed", 8-active_hours_dy, standby_hours_dy))) -> goquant

#personal printer (here need to account for whether they have one or not)
goquant %>% mutate(type = ifelse(question == "I shut off my personal printer when not in use.", "printer", type)) -> goquant

goquant %>% mutate(active_watts = ifelse(type == "printer" & answer_1 != "N/A", 40, 
                                  ifelse(type == "printer" & answer_1 == "N/A", 0, active_watts))) -> goquant
                   
goquant %>% mutate(standby_watts = ifelse(type == "printer" & answer_1 != "N/A", 4, 
                                  ifelse(type == "printer" & answer_1 == "N/A", 0, standby_watts))) -> goquant                   
goquant %>% mutate(active_hours_dy = ifelse(type == "printer" & answer_1 != "N/A", 1,
                                     ifelse(type == "printer" & answer_1 == "N/A", 0, active_hours_dy))) -> goquant

goquant %>% mutate(standby_hours_dy = ifelse(type == "printer" & answer_1 == "Yes",
                                             0, 
                   ifelse(type == "printer" & answer_1 == "Action Needed", 
                          8-active_hours_dy, 
                   ifelse(type == "printer" & answer_1 == "N/A", 0, standby_hours_dy)))) -> goquant

#Bottles and Cans Recycling
goquant %>% mutate(type = ifelse(question == "I recycle all bottles and cans that I come across in the office.", "plastic", type)) -> goquant

goquant %>% mutate(value_wk = ifelse(type == "plastic", 1, value_wk)) -> goquant

#Cardboard Recycling
goquant %>% mutate(type = ifelse(question == "I recycle all cardboard that I come across in the office.", "cardboard", type)) -> goquant

goquant %>% mutate(value_wk = ifelse(type == "cardboard", 1, value_wk)) -> goquant

#Paper Recycling
goquant %>% mutate(type = ifelse(question == "I recycle all office paper.", "paper", type)) -> goquant

goquant %>% mutate(value_wk = ifelse(type == "paper" & answer_1 == "Yes" , 25, value_wk)) -> goquant

```

```{r Conversions2, echo=TRUE, warning=FALSE, message=FALSE,  comment=""}
quant <- coalesce_join(goquant, conv, by = 'type')

quant %>% mutate(kwh_active_dy = ((active_watts*active_hours_dy)/1000),
                 kwh_standby_dy = ((standby_watts*standby_hours_dy)/1000),
                 kwh_dy = kwh_active_dy+kwh_standby_dy,
                 kwh_wk = ifelse(type == c("incandescent", "led", "monitor","printer"), kwh_dy*5, NA)) -> quant

quant %>% mutate(kwh_wk = ifelse(type == "paper",
                      ((`weight (lbs)`*value_wk)/2000)*kwh_per_ton,
                          ifelse(type == "plastic",
                      ((`weight (lbs)`*value_wk)/2000)*kwh_per_ton, #this can be cleaned with an '%in%'
                          ifelse(type == "cardboard", 
                      ((`weight (lbs)`*value_wk)/2000)*kwh_per_ton, kwh_dy*5)))) -> quant

quant %>% mutate(gallons_oil_wk = kwh_wk/43.9,
                 kg_CO2_wk = (((gallons_oil_wk/42)*0.43)*1000)) -> quant
```

```{r #Aggregation2, echo=FALSE, fig.show='hide', warning=FALSE, message=FALSE,  comment=""}
#Aggregation
quant %>% mutate(kwh_mth = kwh_wk*4,
            gallons_oil_mth = gallons_oil_wk*4,
            kg_CO2_mth = kg_CO2_wk*4) -> quantagg2


#is a character for some reason...
quantagg2$Month_Yr <- format(as.Date(quantagg2$date), "%Y-%m")
quantagg2$Week_Yr <- format(as.Date(quantagg2$date), "%Y-%W")
quantagg2$Week <- format(as.Date(quantagg2$date), "%W") #creating "week" column for aggregation

quantagg2 %>% group_by(question, answer_1, Week, year) %>%
  summarise(tot_kwh_wk = sum(kwh_wk),
            tot_gallons_oil_wk = sum( gallons_oil_wk),
            tot_kg_CO2_wk = sum(kg_CO2_wk)) -> quantagg2 #aggregating

quantagg2 %>% ungroup() %>% mutate(date = as.Date(paste("1", quantagg2$Week, quantagg2$year, sep = "-"), format = "%w-%W-%Y")) -> quantagg2 #recreating made-up date starting on Monday based on Week and Year columns.

#filtering NAs
quantagg2 <- quantagg2 %>% filter(!is.na(tot_kwh_wk))
#arranging in date order
quantagg2 <- quantagg2 %>% arrange(date)

recycle_yes <- quantagg2 %>% filter(answer_1 == "Yes") %>% filter(question == "I recycle all cardboard that I come across in the office." | question == "I recycle all bottles and cans that I come across in the office." | question == "I recycle all office paper.") %>%
  select(-answer_1) %>% group_by(question) %>%
  mutate(cumsum_kwh = cumsum(tot_kwh_wk),
         cumsum_oil = cumsum(tot_gallons_oil_wk),
         cumsum_carbon = cumsum(tot_kg_CO2_wk))
         
g1 <- ggplot(recycle_yes, aes(x=date, y=cumsum_kwh, color=question)) +
  geom_line() + scale_x_date(date_breaks = "3 month", date_labels = "%b-%y'")+ 
  theme(legend.position = "bottom",
          legend.direction = "vertical")+
  labs(title = "Kilowatt Hours of Electricity Saved Each Week by Recycling") + 
       xlab("Date")+ 
       ylab("Kilowatt Hours Saved Each Week")+ 
  scale_y_continuous(breaks = seq(from = 0, to = 250, by = 25))

g2 <- ggplot(recycle_yes, aes(x=date, y=cumsum_oil, color=question)) +
  geom_line() + scale_x_date(date_breaks = "3 month", date_labels = "%b-%y'")+ 
  theme(legend.position = "bottom",
          legend.direction = "vertical")+
  labs(title = "Gallons of Oil Saved Each Week by Recycling") + 
       xlab("Date")+ 
       ylab("Gallons of Oil Saved Each Week")+ 
  scale_y_continuous(breaks = seq(from = 0, to = 6, by = 1))

g3 <- ggplot(recycle_yes, aes(x=date, y=cumsum_carbon, color=question)) +
  geom_line() + scale_x_date(date_breaks = "3 month", date_labels = "%b-%y'")+ 
  theme(legend.position = "bottom",
          legend.direction = "vertical")+
  labs(title = "Kilograms of CO2 Saved Each Week by Recycling") + 
       xlab("Date")+ 
       ylab("Kg of CO2 Saved Each Week")+ 
  scale_y_continuous(breaks = seq(from = 0, to = 60, by = 10))

```


```{r #Aggregation3, eval= FALSE, echo=FALSE, fig.show='hide', warning=FALSE, message=FALSE,  comment=""}
#First need to find the difference between Yes and Action Needed for the Tech. Adjustment questions in order to find "energy savings"

#This is hard.. Fundamental analysis problem is that I'm deriving difference based on the answer groups, rather than the assumptions of energy usage in quant. What I'd really want is take week over week, the number of folks who said "yes", sum to get tot_kwh_wk and subtract that from what it WOULD'VE been if they had said "Action Needed", that way it is a symmetrical dataset. Absent that, I'm not sure how to visualize. Because underneath the time series is changing rates of people, so the difference between the answers is minimized because the amount of people are skewed. There's almost 4x as many people who said "yes" to lightbulb question, so it's hard to interpret.

#Problems... see above^
quantagg2 %>% filter(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs." | question == "I shut off my personal printer when not in use." | question =="My computer monitor is set to sleep after 10 minutes or less.") %>% 
  filter(answer_1 != "N/A") %>%
  select(-c(Week, year)) %>% ungroup() %>%
pivot_wider(., 
           id_cols=c(question, answer_1, date), 
                     names_from=answer_1,
                     values_from=c(tot_kwh_wk, tot_gallons_oil_wk, tot_kg_CO2_wk)) %>% 
  mutate(tot_kwh_wk = (`tot_kwh_wk_Action Needed` - tot_kwh_wk_Yes),
         tot_gallons_oil_wk = (`tot_gallons_oil_wk_Action Needed` - tot_gallons_oil_wk_Yes),
         tot_kg_CO2_wk = (`tot_kg_CO2_wk_Action Needed` - tot_kg_CO2_wk_Yes)) %>% 
   filter(!is.na(tot_kwh_wk)) %>% arrange(date) %>%
  mutate(cumsum_kwh = cumsum(tot_kwh_wk)) %>% filter(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs.") %>% 
 ggplot(., aes(x=date, y=cumsum_kwh)) +
  geom_line() + scale_x_date(date_breaks = "3 month", date_labels = "%b-%y'")+ 
  theme(legend.position = "bottom",
          legend.direction = "vertical")



#Let's just do differences instead
tech_diff <- quantagg2 %>% filter(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs." | question == "I shut off my personal printer when not in use." | question =="My computer monitor is set to sleep after 10 minutes or less.") %>% 
  filter(answer_1 != "N/A") %>%
  group_by(question, answer_1) %>%
  mutate(cumsum_kwh = cumsum(tot_kwh_wk),
         cumsum_oil = cumsum(tot_gallons_oil_wk),
         cumsum_carbon = cumsum(tot_kg_CO2_wk))

quant %>% group_by(question, answer_1) %>% filter(!is.na(answer_1)) %>% count()
guantagg2
tech_diff %>% group_by(question, answer_1) %>% summarize(sum_kw = sum(cumsum_kwh))
tech_diff %>% group_by(question, answer_1) %>% summarize(sum_kw = sum(tot_kwh_wk))

#g4 is good, just need to do the rest. 

g4 <- tech_diff %>% filter(question == "All of my task lighting (including floor and desk lamps) uses either LED or CFL, but no incandescent light bulbs.") %>%
 ggplot(., aes(x=date, y=cumsum_kwh, color=answer_1)) +
  geom_line() + scale_x_date(date_breaks = "3 month", date_labels = "%b-%y'")+ 
  theme(legend.position = "bottom",
          legend.direction = "vertical")+
  labs(title = "Kilowatt Hours of Electricity Used Each Week by Lightbulb Type") + 
       xlab("Date")+ 
       ylab("Kilowatt Hours Used Each Week")+ 
  scale_color_manual(name="Lightbulb Type",
                    values= c("#FC6B49","#5CA9FD"),
                    labels= c("Incandescent", "LED"))
  scale_y_continuous(breaks = seq(from = 0, to = 160, by = 20))

g5 <- ggplot(recycle_yes, aes(x=date, y=cumsum_oil, color=question)) +
  geom_line() + scale_x_date(date_breaks = "3 month", date_labels = "%m-%Y")+ 
  theme(legend.position = "bottom",
          legend.direction = "vertical")+
  labs(title = "Gallons of Oil Saved Each Week by Recycling") + 
       xlab("Date")+ 
       ylab("Gallons of Oil Saved Each Week")+ 
  scale_y_continuous(breaks = seq(from = 0, to = 6, by = 1))

g6 <- ggplot(recycle_yes, aes(x=date, y=cumsum_carbon, color=question)) +
  geom_line() + scale_x_date(date_breaks = "3 month", date_labels = "%m-%Y")+ 
  theme(legend.position = "bottom",
          legend.direction = "vertical")+
  labs(title = "Kilograms of CO2 Saved Each Week by Recycling") + 
       xlab("Date")+ 
       ylab("Kg of CO2 Saved Each Week")+ 
  scale_y_continuous(breaks = seq(from = 0, to = 60, by = 10))



```

```{r Time Series, echo=FALSE, error=FALSE, message=FALSE, warning=FALSE, fig.height= 5, fig.width=7, dpi=96}

g1
g2
g3

```



# Conclusion

This project emphasizes the potential for the Green Office Challenge to be a major facet of Northeastern University's Sustainability and Climate goals. The environmental impact of these six questions is significant even under conservative assumptions. By gathering more accurate data in place of these assumptions, we can demonstrate the impact of this program at the individual, department, and campus level across a much broader variety of environmental impacts. This information can play a central role in helping participants of the Green Office Challenge understand the impact of their efforts and commit to taking further action.



