The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
Show the code
library(ggplot2)## Reference: Turn off warning method from https://stackoverflow.com/questions/16194212/how-to-suppress-warnings-globally-in-an-r-scriptoptions(warn=-1) ## Turn off the warning when generate graphsdata_dir <-file.path(getwd(), 'chloride_data')
Step 2: Read Chloride file
Show the code
## The csv file has been manually exported from separated sheet.## Read in the cl concentration & cl limitcl_df <-read.csv(file.path(data_dir, 'Data-Table 1.csv'))## Adjust the date format, X is the actual date column name, we replaced with a meaningful namecolnames(cl_df)[1] <-"Chloride_measured_dates"
Step 3: Make a function for Plotting Production Wells
Show the code
## Reference: paste function comes from https://www.r-bloggers.com/2022/08/r-program-to-concatenate-two-strings/plot_production_well_func <-function(well_number, cl_df, data_dir, year_range, graph_type){ chloride_limit_col =paste("Well." , well_number, sep ="") chloride_concentration_col =paste("Well.", well_number, ".CL", sep ="") chloride_loss_file_name =paste("Cl Well ", well_number, "-Table 1.csv", sep ="")## Read the data in production well data from cl_df well_df <- cl_df %>%select(all_of(chloride_limit_col), all_of(chloride_concentration_col), Chloride_measured_dates)## Read in Chloride losses data well_losses_df <-read.csv(file.path(data_dir, chloride_loss_file_name))colnames(well_losses_df)[1] <-"Chloride_measured_dates"## Adjust as.date formatwell_df$Chloride_measured_dates <-as.Date(well_df$Chloride_measured_dates, format ="%m/%d/%y") well_df <- well_df %>%filter(!is.na(Chloride_measured_dates))well_losses_df$Chloride_measured_dates <-as.Date(well_losses_df$Chloride_measured_dates, format ="%m/%d/%y") well_losses_df <- well_losses_df %>%filter(!is.na(Chloride_measured_dates))## Merge columns of cl concentration, cl limit and cl loss into well_df well_df <-merge(well_df, well_losses_df, by =c("Chloride_measured_dates", chloride_concentration_col), all =TRUE)# Clean NAs from the file# Reference: Get the method of filtering by passing a string column name from https://stackoverflow.com/questions/71875366/how-do-you-filter-by-passing-a-string-as-a-column-name-in-a-user-defined-functio well_df <- well_df %>%filter(!is.na(.[[chloride_limit_col]]) &!is.na(.[[chloride_concentration_col]]))## Convert the values to numeric to fit the 'scale_y_continuous' function well_df[[chloride_concentration_col]] <-as.numeric(well_df[[chloride_concentration_col]]) well_df[[chloride_limit_col]] <-as.numeric(well_df[[chloride_limit_col]])## Plot the data## Reference: Use scale_x/y_continous from https://medium.com/@bao.character/how-to-use-scale-y-continuous-in-ggplot2-library-in-r-ca233b06fe82#:~:text=The%20scale_y_continuous()%20function%20in,breaks%2C%20labels%2C%20and%20transformations.## Reference: Use method = "loess" from https://www.datanovia.com/en/blog/how-to-plot-a-smooth-line-using-ggplot2/## Reference: Use aes_string from https://stackoverflow.com/questions/19826352/pass-character-strings-to-ggplot2-within-a-function## Reference: Use .data[[]] to retrieve data from df with string variable from https://stackoverflow.com/questions/28897577/what-is-the-difference-between-aes-and-aes-string-ggplot2-in-r## Adjust different settings and graph title for period of record AND five-year periodif (graph_type =='record'){ span_number <-0.034 graph_title <-'Period of Record: Production Well ' year_break <-"5 years" limit_plot_func <-function(chloride_limit_col) {geom_point(aes(y = .data[[chloride_limit_col]],color ="CL Limit"),size =1 ) } }else { span_number <-0.310 graph_title <-'Five-Year Period - Production Well ' year_break <-"year" limit_plot_func <-function(chloride_limit_col) {geom_line(aes(y = .data[[chloride_limit_col]], color ="CL Limit") ) } }## Reference: pmin function to get the minimun values among two columns in a df from:https://stackoverflow.com/questions/21977353/get-the-min-of-two-columns## Reference: scale_color_manual method and its usage from https://stackoverflow.com/questions/74392543/how-to-add-a-legend-to-ggplot-in-r y_min <-max(0, pmin(well_df[[chloride_concentration_col]], well_df[[chloride_limit_col]]) -100) y_max <-pmax(well_df[[chloride_concentration_col]], well_df[[chloride_limit_col]]) +100 y_range <-c(y_min, y_max)ggplot(well_df, aes(x = Chloride_measured_dates)) +limit_plot_func(chloride_limit_col) +geom_point(aes(y = .data[[chloride_concentration_col]],color ='Chloride Concentration'),shape =23,fill ='lightblue' ) +geom_smooth(aes(y = LOESS.2023, color ="LOESS" ),method ="loess",span = span_number,se = F ) +scale_y_continuous(limits = y_range) +# set y rangescale_x_date(limits =as.Date(year_range),breaks = year_break,date_labels ="%b-%Y" ) +# set x rangelabs(x ="Date", y ="Chloride Concentration (mg/L)", title =paste (graph_title, well_number, sep =""),color ="symbol" ) +scale_color_manual(values =c("Chloride Concentration"="blue","LOESS"="purple","CL Limit"="red"))}
Step 3.1: Plot Production Well & Five-Year period for Product Well 2
Step 4.1: Table Kendall’s tau b for Production Well 2
Show the code
## Read mw files for creating the table chartMW_df <- cl_df %>%select(MW.CHL, CHL, CHL.1, CHL.2, CHL.3)MW_df$MW.CHL <-as.Date(MW_df$MW.CHL, format ="%m/%d/%y") MW_df <- MW_df %>%filter(!is.na(MW.CHL))colnames(MW_df) <-c('Date', 'mw4', 'mw20','mw21','mw25')MW_df$Date <-as.numeric(MW_df$Date)MW_df$mw4 <-as.numeric(MW_df$mw4)MW_df$mw20 <-as.numeric(MW_df$mw20)MW_df$mw21 <-as.numeric(MW_df$mw21)MW_df$mw25 <-as.numeric(MW_df$mw25)## Do the cor test.## Reference: correlation coefficient https://stackoverflow.com/questions/69594298/r-calculate-the-correlation-coefficient## Reference: cor.test comes from https://www.geeksforgeeks.org/kendall-correlation-testing-in-r-programming/### Reference: na.omit to count the non NAs in column from https://stackoverflow.com/questions/15704665/simple-method-of-counting-non-nas-in-column-of-data-stringtest_func <-function(x, y){ result <-cor.test(x, y, method ='kendall')return (c(round(result$estimate, 3), round(result$p.value,3), length(na.omit(y))))}## Apply the functiointable_mw <-sapply(MW_df, function(y){test_func(MW_df$Date, y)})## Didable scientific notation## Reference: https://stackoverflow.com/questions/25946047/how-to-prevent-scientific-notation-in-roptions(scipen =999)rownames(table_mw) <-c("Correlation-coffecient", "Sig (1 sided)", "N")print(table_mw)
Date mw4 mw20 mw21 mw25
Correlation-coffecient 1 0.165 0.69 -0.301 0.487
Sig (1 sided) 0 0.000 0.00 0.000 0.000
N 360 322.000 322.00 321.000 347.000
Source Code
---title: "R Final Project - Chloride Report"author: "Zirui Zheng"date: "2/12/2025"format: html: toc: true toc-location: left code-fold: true code-summary: "Show the code" code-tools: true---## R Spatial final project## Step 1: Import libraries and data path```{r}library(dplyr)library(ggplot2)## Reference: Turn off warning method from https://stackoverflow.com/questions/16194212/how-to-suppress-warnings-globally-in-an-r-scriptoptions(warn=-1) ## Turn off the warning when generate graphsdata_dir <-file.path(getwd(), 'chloride_data')```## Step 2: Read Chloride file```{r}## The csv file has been manually exported from separated sheet.## Read in the cl concentration & cl limitcl_df <-read.csv(file.path(data_dir, 'Data-Table 1.csv'))## Adjust the date format, X is the actual date column name, we replaced with a meaningful namecolnames(cl_df)[1] <-"Chloride_measured_dates"```## Step 3: Make a function for Plotting Production Wells```{r}## Reference: paste function comes from https://www.r-bloggers.com/2022/08/r-program-to-concatenate-two-strings/plot_production_well_func <-function(well_number, cl_df, data_dir, year_range, graph_type){ chloride_limit_col =paste("Well." , well_number, sep ="") chloride_concentration_col =paste("Well.", well_number, ".CL", sep ="") chloride_loss_file_name =paste("Cl Well ", well_number, "-Table 1.csv", sep ="")## Read the data in production well data from cl_df well_df <- cl_df %>%select(all_of(chloride_limit_col), all_of(chloride_concentration_col), Chloride_measured_dates)## Read in Chloride losses data well_losses_df <-read.csv(file.path(data_dir, chloride_loss_file_name))colnames(well_losses_df)[1] <-"Chloride_measured_dates"## Adjust as.date formatwell_df$Chloride_measured_dates <-as.Date(well_df$Chloride_measured_dates, format ="%m/%d/%y") well_df <- well_df %>%filter(!is.na(Chloride_measured_dates))well_losses_df$Chloride_measured_dates <-as.Date(well_losses_df$Chloride_measured_dates, format ="%m/%d/%y") well_losses_df <- well_losses_df %>%filter(!is.na(Chloride_measured_dates))## Merge columns of cl concentration, cl limit and cl loss into well_df well_df <-merge(well_df, well_losses_df, by =c("Chloride_measured_dates", chloride_concentration_col), all =TRUE)# Clean NAs from the file# Reference: Get the method of filtering by passing a string column name from https://stackoverflow.com/questions/71875366/how-do-you-filter-by-passing-a-string-as-a-column-name-in-a-user-defined-functio well_df <- well_df %>%filter(!is.na(.[[chloride_limit_col]]) &!is.na(.[[chloride_concentration_col]]))## Convert the values to numeric to fit the 'scale_y_continuous' function well_df[[chloride_concentration_col]] <-as.numeric(well_df[[chloride_concentration_col]]) well_df[[chloride_limit_col]] <-as.numeric(well_df[[chloride_limit_col]])## Plot the data## Reference: Use scale_x/y_continous from https://medium.com/@bao.character/how-to-use-scale-y-continuous-in-ggplot2-library-in-r-ca233b06fe82#:~:text=The%20scale_y_continuous()%20function%20in,breaks%2C%20labels%2C%20and%20transformations.## Reference: Use method = "loess" from https://www.datanovia.com/en/blog/how-to-plot-a-smooth-line-using-ggplot2/## Reference: Use aes_string from https://stackoverflow.com/questions/19826352/pass-character-strings-to-ggplot2-within-a-function## Reference: Use .data[[]] to retrieve data from df with string variable from https://stackoverflow.com/questions/28897577/what-is-the-difference-between-aes-and-aes-string-ggplot2-in-r## Adjust different settings and graph title for period of record AND five-year periodif (graph_type =='record'){ span_number <-0.034 graph_title <-'Period of Record: Production Well ' year_break <-"5 years" limit_plot_func <-function(chloride_limit_col) {geom_point(aes(y = .data[[chloride_limit_col]],color ="CL Limit"),size =1 ) } }else { span_number <-0.310 graph_title <-'Five-Year Period - Production Well ' year_break <-"year" limit_plot_func <-function(chloride_limit_col) {geom_line(aes(y = .data[[chloride_limit_col]], color ="CL Limit") ) } }## Reference: pmin function to get the minimun values among two columns in a df from:https://stackoverflow.com/questions/21977353/get-the-min-of-two-columns## Reference: scale_color_manual method and its usage from https://stackoverflow.com/questions/74392543/how-to-add-a-legend-to-ggplot-in-r y_min <-max(0, pmin(well_df[[chloride_concentration_col]], well_df[[chloride_limit_col]]) -100) y_max <-pmax(well_df[[chloride_concentration_col]], well_df[[chloride_limit_col]]) +100 y_range <-c(y_min, y_max)ggplot(well_df, aes(x = Chloride_measured_dates)) +limit_plot_func(chloride_limit_col) +geom_point(aes(y = .data[[chloride_concentration_col]],color ='Chloride Concentration'),shape =23,fill ='lightblue' ) +geom_smooth(aes(y = LOESS.2023, color ="LOESS" ),method ="loess",span = span_number,se = F ) +scale_y_continuous(limits = y_range) +# set y rangescale_x_date(limits =as.Date(year_range),breaks = year_break,date_labels ="%b-%Y" ) +# set x rangelabs(x ="Date", y ="Chloride Concentration (mg/L)", title =paste (graph_title, well_number, sep =""),color ="symbol" ) +scale_color_manual(values =c("Chloride Concentration"="blue","LOESS"="purple","CL Limit"="red"))}```## Step 3.1: Plot Production Well & Five-Year period for Product Well 2```{r}plot_production_well_func(2, cl_df, data_dir, c("1978-01-01", "2023-01-01"), "record")plot_production_well_func(2, cl_df, data_dir, c("2018-01-01", "2023-01-01"), "five_year")```## Step 3.2: Plot Production Well & Five-Year period for Product Well 3```{r}plot_production_well_func(3, cl_df, data_dir, c("1977-11-01", "2022-12-31"), "record")plot_production_well_func(3, cl_df, data_dir, c("2017-11-01", "2022-12-31"), "five_year")```## Step 3.3: Plot Production Well & Five-Year period for Product Well 6```{r}plot_production_well_func(6, cl_df, data_dir, c("1977-10-01", "2023-12-31"), "record")plot_production_well_func(6, cl_df, data_dir, c("2016-10-01", "2023-12-31"), "five_year")```## Step 3.4: Plot Production Well & Five-Year period for Product Well 7```{r}plot_production_well_func(7, cl_df, data_dir, c("2000-03-01", "2023-12-31"), "record")plot_production_well_func(7, cl_df, data_dir, c("2016-09-01", "2023-12-31"), "five_year")```## Step 4.1: Table Kendall’s tau b for Production Well 2```{r}## Read mw files for creating the table chartMW_df <- cl_df %>%select(MW.CHL, CHL, CHL.1, CHL.2, CHL.3)MW_df$MW.CHL <-as.Date(MW_df$MW.CHL, format ="%m/%d/%y") MW_df <- MW_df %>%filter(!is.na(MW.CHL))colnames(MW_df) <-c('Date', 'mw4', 'mw20','mw21','mw25')MW_df$Date <-as.numeric(MW_df$Date)MW_df$mw4 <-as.numeric(MW_df$mw4)MW_df$mw20 <-as.numeric(MW_df$mw20)MW_df$mw21 <-as.numeric(MW_df$mw21)MW_df$mw25 <-as.numeric(MW_df$mw25)## Do the cor test.## Reference: correlation coefficient https://stackoverflow.com/questions/69594298/r-calculate-the-correlation-coefficient## Reference: cor.test comes from https://www.geeksforgeeks.org/kendall-correlation-testing-in-r-programming/### Reference: na.omit to count the non NAs in column from https://stackoverflow.com/questions/15704665/simple-method-of-counting-non-nas-in-column-of-data-stringtest_func <-function(x, y){ result <-cor.test(x, y, method ='kendall')return (c(round(result$estimate, 3), round(result$p.value,3), length(na.omit(y))))}## Apply the functiointable_mw <-sapply(MW_df, function(y){test_func(MW_df$Date, y)})## Didable scientific notation## Reference: https://stackoverflow.com/questions/25946047/how-to-prevent-scientific-notation-in-roptions(scipen =999)rownames(table_mw) <-c("Correlation-coffecient", "Sig (1 sided)", "N")print(table_mw)```