Brian

note to self: after class ask how to get the strain to also show up on my bar plots given how i found the mean.

library(tidyverse)
setwd("C:/Users/bkslu/Downloads/MATH 217")
#setwd("C:/Users/rsaidi/Downloads")
primary <- read_csv("atp3ufs6composition.csv")
harvest1 <- read_csv("harvestdata.mar122015.lrbaz1201.csv")
harvest2 <- read_csv("harvestdata.may052015.ka32.csv")
instrum1 <- read_csv("instrumentation.daily.mar122015.lrbaz1201.csv")
instrum2 <- read_csv("instrumentation.daily.may052015.ka32.csv")
summ1 <- read_csv("summary.combined.mar122015.lrbaz1201.csv")
summ2 <- read_csv("summary.combined.may052015.ka32.csv")
weath1 <- read_csv("weather.daily.mar122015.lrbaz1201.csv")
weath2 <- read_csv("weather.daily.may052015.ka32.csv")

names(harvest2)

 [1] "SiteID"          "ExperimentID"    "StrainID"        "SourceID"       
 [5] "BatchID"         "Date"            "PondID"          "TreatmentID"    
 [9] "Harvest."        "time..d."        "Harvest.Vol..L." "AFDW..g.L."     
[13] "AFDW..g."        "crash"

Productivity

harvest <- full_join(harvest2, harvest1)

Joining with `by = join_by(SiteID, ExperimentID, StrainID, SourceID, BatchID,
Date, PondID, TreatmentID, Harvest., time..d., Harvest.Vol..L., AFDW..g.L.,
AFDW..g., crash)`

names(harvest)[names(harvest) == "AFDW..g.L."] <- "Harvest_density_g/L"
names(harvest)[names(harvest) == "AFDW..g."] <- "Ash-free_dry_mass_of_algal_harvest_g"
names(harvest)[names(harvest) == "time..d."] <- "experimental_duration"

# renaming the variables to be less "usable" but more understandable to the general person

current <- harvest |> 
  group_by(PondID, SourceID) |>
  mutate(`Ash-free_dry_mass_sum` = sum(`Ash-free_dry_mass_of_algal_harvest_g`))

# calculating the sum of the mass grouped by pond and experiment. We only have to do group_by in this chunk for the rest of all of the calculations for it to stay. To undo it we would need to do ungroup()

currentf <- current |> 
  filter(!Harvest.=='1') |>
  mutate(`Ash-free_dry_mass_sum_f` = sum(`Ash-free_dry_mass_of_algal_harvest_g`))

# adjusting for the initial grow out for productivity calculations by removing the first harvest and calculating the sum of the masses.

currentfl <- currentf |> 
  filter(!Harvest.==max(Harvest.)) |>
  mutate(`Ash-free_dry_mass_sum_fl` = sum(`Ash-free_dry_mass_of_algal_harvest_g`))

# adjusting for the initial grow out and final harvest for productivity calculations by removing the first harvest and calculating the sum of the masses.

current <- current |> 
  mutate(`Ash-free_dry_mass_div_pond_area` = `Ash-free_dry_mass_sum`/4.2)

currentf <- currentf |> 
  mutate(`Ash-free_dry_mass_div_pond_area_f` = `Ash-free_dry_mass_sum_f`/4.2)

currentfl <- currentfl |> 
  mutate(`Ash-free_dry_mass_div_pond_area_fl` = `Ash-free_dry_mass_sum_fl`/4.2)

# dividing unadjusted and adjusted by pond area.

current <- current |> 
  mutate(maxharvest = max(Harvest.))

# defining the max harvest to find the total experimental duration per harvest.

current <- current |> 
  mutate(minharvest = min(Harvest.))

# defining the min harvest to subtract from the final for the adjusted experimental duration.

current2 <- current[current$Harvest.==current$maxharvest, ] 

# limiting the rows to the ones with the total experimental duration per experiment to divide.

names(current2)[names(current2) == "experimental_duration"] <- "tot_experimental_duration"

# I am also renaming this variable to be more accurate as we will be joining it later.

current3 <- current[current$Harvest.==current$minharvest, ] 

# limiting the rows to the ones with the time required to reach the first harvest per experiment to subtract from the final for the adjusted experimental duration.

names(current3)[names(current3) == "experimental_duration"] <- "first_experimental_duration"

# I am also renaming this variable to be more accurate as we will be joining it later.

current4 <- current |> 
  arrange(desc(Harvest.)) |>
  mutate(n=1:n()) |>
  ungroup() |>
  filter(n==2)

# defining and limiting the 2nd to max harvest to find the total experimental duration per harvest excluding the final harvest.

names(current4)[names(current4) == "experimental_duration"] <- "2nd_last_experimental_duration"

# I am also renaming this variable to be more accurate as we will be joining it later.

joined <- left_join(harvest, current)

Joining with `by = join_by(SiteID, ExperimentID, StrainID, SourceID, BatchID,
Date, PondID, TreatmentID, Harvest., experimental_duration, Harvest.Vol..L.,
`Harvest_density_g/L`, `Ash-free_dry_mass_of_algal_harvest_g`, crash)`

joined <- left_join(joined, currentf)

Joining with `by = join_by(SiteID, ExperimentID, StrainID, SourceID, BatchID,
Date, PondID, TreatmentID, Harvest., experimental_duration, Harvest.Vol..L.,
`Harvest_density_g/L`, `Ash-free_dry_mass_of_algal_harvest_g`, crash,
`Ash-free_dry_mass_sum`)`

joined <- left_join(joined, currentfl)

Joining with `by = join_by(SiteID, ExperimentID, StrainID, SourceID, BatchID,
Date, PondID, TreatmentID, Harvest., experimental_duration, Harvest.Vol..L.,
`Harvest_density_g/L`, `Ash-free_dry_mass_of_algal_harvest_g`, crash,
`Ash-free_dry_mass_sum`, `Ash-free_dry_mass_sum_f`)`

joined <- left_join(joined, current2)

Joining with `by = join_by(SiteID, ExperimentID, StrainID, SourceID, BatchID,
Date, PondID, TreatmentID, Harvest., Harvest.Vol..L., `Harvest_density_g/L`,
`Ash-free_dry_mass_of_algal_harvest_g`, crash, `Ash-free_dry_mass_sum`,
`Ash-free_dry_mass_div_pond_area`, maxharvest, minharvest)`

joined <- left_join(joined, current3)

Joining with `by = join_by(SiteID, ExperimentID, StrainID, SourceID, BatchID,
Date, PondID, TreatmentID, Harvest., Harvest.Vol..L., `Harvest_density_g/L`,
`Ash-free_dry_mass_of_algal_harvest_g`, crash, `Ash-free_dry_mass_sum`,
`Ash-free_dry_mass_div_pond_area`, maxharvest, minharvest)`

joined<- left_join(joined, current4)

Joining with `by = join_by(SiteID, ExperimentID, StrainID, SourceID, BatchID,
Date, PondID, TreatmentID, Harvest., Harvest.Vol..L., `Harvest_density_g/L`,
`Ash-free_dry_mass_of_algal_harvest_g`, crash, `Ash-free_dry_mass_sum`,
`Ash-free_dry_mass_div_pond_area`, maxharvest, minharvest)`

joined1 <- joined |>
    group_by(PondID, SourceID) |> 
    tidyr::fill(tot_experimental_duration, .direction = "downup") |>
    ungroup()

# replacing na's with the total experimental duration for its experiment

joined2 <- joined1 |>
    group_by(PondID, SourceID) |> 
    tidyr::fill(first_experimental_duration, .direction = "downup") |>
    ungroup()

# replacing na's with the first experimental duration for its experiment

#joined3 <- joined2
    #group_by(PondID, SourceID) |> 
    #tidyr::fill(`2nd_last_experimental_duration`, .direction = "downup") |>
    #ungroup()

# replacing na's with the second to last experimental duration for its experiment. 

joined3 <- joined2 |>
    filter(!is.na(`2nd_last_experimental_duration`))

# didn't do to reduce it to one observation per experiment for averaging and instead got rid of na's.

joined3 <- joined3 |>
  mutate(`Ash-free_dry_mass_div_time_g_m2_day` = `Ash-free_dry_mass_div_pond_area`/(`tot_experimental_duration`))

# we divide now by the total experimental duration to get productivity in the units...

\(g/m^2/d\)

joined3 <- joined3 |>
  mutate(`Ash-free_dry_mass_div_time_g_m2_day_f` = `Ash-free_dry_mass_div_pond_area_f`/(`tot_experimental_duration` - (`first_experimental_duration`)))

# we repeat the same thing but for our adjusted experimental duration by subtracting the first harvest duration from the total. This gives us the productivity without the initial growout with the same units...

\(g/m^2/d\)

joined3 <- joined3 |>
  mutate(`Ash-free_dry_mass_div_time_g_m2_day_fl` = `Ash-free_dry_mass_div_pond_area_fl`/(`2nd_last_experimental_duration` - (`first_experimental_duration`)))

# we repeat the same thing but for our 2nd adjusted experimental duration by subtracting the first harvest duration from the 2nd to last total duration. This gives us the productivity without the initial growout and final harvest with the same units...

Productivity by treatment

joined4 <- joined3 |>
  filter(!is.na(`Ash-free_dry_mass_sum_f`))

treatment_mean <- aggregate(x= joined4$`Ash-free_dry_mass_div_time_g_m2_day`,
                      by = list(joined4$TreatmentID),      
                      FUN = mean)
print(treatment_mean)

           Group.1        x
1 1201 Progression 3.728955
2         3x, 0.11 4.584166
3        3x, 0.214 5.662126
4              NH4 8.220360
5              NO3 9.266052

library(RColorBrewer)
coul <- brewer.pal(5, "Set2") 
barplot(height=treatment_mean$x, names=treatment_mean$Group.1, col=coul,
        xlab="Treatment", 
        ylab="Algae Ash-free Dry Mass g/m^2/day", 
        main="Algae Productivity by Treatment", 
        ylim=c(0,25)
        )

# Plot for algae growth per meter^2 per day by treatment

treatment_mean_f <- aggregate(x= joined4$`Ash-free_dry_mass_div_time_g_m2_day_f`,
                      by = list(joined4$TreatmentID),      
                      FUN = mean)
print(treatment_mean_f)

           Group.1         x
1 1201 Progression 21.354573
2         3x, 0.11  7.356291
3        3x, 0.214  7.618427
4              NH4 10.237375
5              NO3 11.038429

library(RColorBrewer)
coul <- brewer.pal(5, "Set2") 
barplot(height=treatment_mean_f$x, names=treatment_mean_f$Group.1, col=coul,
        xlab="Treatment", 
        ylab="Algae Ash-free Dry Mass g/m^2/day", 
        main="Algae Productivity by Treatment w/o Initial Grow Out", 
        ylim=c(0,25)
        )

# Plot for algae growth per meter^2 per day by treatment without the initial harvest factored in.

treatment_mean_fl <- aggregate(x= joined4$`Ash-free_dry_mass_div_time_g_m2_day_fl`,
                      by = list(joined4$TreatmentID),      
                      FUN = mean)
print(treatment_mean_fl)

           Group.1         x
1 1201 Progression 15.422454
2         3x, 0.11  7.605908
3        3x, 0.214  6.493216
4              NH4 10.080297
5              NO3 10.997605

library(RColorBrewer)
coul <- brewer.pal(5, "Set2") 
barplot(height=treatment_mean_fl$x, names=treatment_mean_fl$Group.1, col=coul,
        xlab="Treatment", 
        ylab="Algae Ash-free Dry Mass g/m^2/day", 
        main="Algae Productivity by Treatment w/o Initial Grow Out and Final Harvest", 
        ylim=c(0,25)
        )

# Plot for algae growth per meter^2 per day by treatment without the initial and final harvest factored in.

Crashes

names(harvest1)[names(harvest1) == "AFDW..g.L."] <- "Harvest_density_g/L"
names(harvest1)[names(harvest1) == "AFDW..g."] <- "Ash-free_dry_mass_of_algal_harvest_g"
names(harvest1)[names(harvest1) == "time..d."] <- "experimental_duration"

# renaming the variables to be less "usable" but more understandable to the general person

harvest0 <- harvest1 |> 
  group_by(PondID, SourceID) |>
  mutate(`Ash-free_dry_mass_sum` = sum(`Ash-free_dry_mass_of_algal_harvest_g`))

# calculating the sum of the mass grouped by pond and experiment.

harvest0 <- harvest0 |> 
  mutate(`Ash-free_dry_mass_div_pond_area` = `Ash-free_dry_mass_sum`/4.2)

# dividing by the pond area.

harvest0 <- harvest0 |> 
  group_by(PondID, SourceID) |>
  mutate(maxharvest = max(Harvest.))

# defining the max harvest to find the total experimental duration per harvest.

harvest01 <- harvest0[harvest0$Harvest.==harvest0$maxharvest, ] 

# limiting the rows to the ones with the total experimental duration per experiment to divide.

names(harvest01)[names(harvest01) == "experimental_duration"] <- "tot_experimental_duration"

# I am also renaming this variable to be more accurate as we will be joining it later.

harvest01 <- harvest01 |>
  mutate(`Ash-free_dry_mass_div_time_g_m2_day` = `Ash-free_dry_mass_div_pond_area`/(`tot_experimental_duration`))

# we divide now by the total experimental duration to get productivity in the units...

harvest01$crashn <- harvest01$crash |> replace_na('no')

  # replacing na's to separate by group

crashes_mean <- aggregate(x= harvest01$`Ash-free_dry_mass_div_time_g_m2_day`,
                      by = list(harvest01$crashn),      
                      FUN = mean)
print(crashes_mean)

  Group.1        x
1      no 1.622001
2     yes 2.415194

library(RColorBrewer)
coul <- brewer.pal(5, "Set2") 
barplot(height=crashes_mean$x, names=crashes_mean$Group.1, col=coul,
        xlab="Pond pH Crash", 
        ylab="Algae Ash-free Dry Mass g/m^2/day", 
        main="Algae Productivity by Pond Crash During Harvest", 
        ylim=c(0,2.5),
        sub = "A pond pH crash is when a pond experiences a sudden and dramatic change in pH levels."
        )

# Plot for algae growth per meter^2 per day by pond crash yes or no.