install packages
library(knitr)
library(seqinr)
library(reshape2)
library(ggplot2)
library(vegan)
library(stringr)
library(plyr)
#detach(package:dplyr)
library(readxl)
library(ggpubr)
library(segmented)
set.seed(100)
upload files
path<-"/Users/kylielanglois/OneDrive - SCCWRP/algae/FHABs/FHAB_geneTox/"
d<-read.csv(file.path(path, "2023-2025_WB_Toxins_qPCR Data_110725.csv"))
ceden<-read_excel(file.path("/Users/kylielanglois/OneDrive - SCCWRP/algae/mASCI",
"CEDEN_stationLU.xlsx"), sheet = 1)
ceden<-ceden[!duplicated(ceden$StationCode), ]
edit stationcodes, where possible, from CEDEN/SWAMP StationDescription
d$Location<-gsub("554SJC556", "544SJC556", d$Location)
d$Location<-gsub("633RLCK5", "633RLCK05", d$Location)
d$Location<-gsub("637ELGB01", "637EL6B", d$Location)
d$Location<-gsub("Cache Creek Camp Haswell", "560XCCHWC", d$Location)
d$Location<-gsub("American River @ Watt Ave.", "514SAC011", d$Location)
d$Location<-gsub("General Creek", "634S18359", d$Location)
d$Location<-gsub("Gold Run", "637CE0655", d$Location)
d$Location<-gsub("Laguna Lake at Dock", "310LAGDOC", d$Location)
d$Location<-gsub("Lake Clementine", "514PLA071", d$Location)
d$Location<-gsub("Lake Evans E Shore", "801PEL155", d$Location)
d$Location<-gsub("Lake Evans NE Shore", "801PEL155", d$Location)
d$Location<-gsub("Lake Evans SE Shore", "801PEL155", d$Location)
d$Location<-gsub("Lake Gregory", "628PGL147", d$Location)
d$Location<-gsub("Lake Hemet N. Shore", "802LHN004", d$Location)
d$Location<-gsub("Lake Nacimiento Day Use Area", "309NACIDU", d$Location)
d$Location<-gsub("Lake Nacimiento Marina Launch", "309NACIMA", d$Location)
d$Location<-gsub("Lake Nacimiento\\b", "309NACIMA", d$Location)
d$Location<-gsub("Lake Pillsbury", "111LKPL01", d$Location)
d$Location<-gsub("Lake San Antonio N. Shore", "309SANTMC", d$Location)
d$Location<-gsub("Lopez Lake Cottonwood Cove", "310PLL106", d$Location)
d$Location<-gsub("Lopez Lake Kayak Dock", "310PLL106", d$Location)
d$Location<-gsub("Lost Lake Unnamed Pond", "801LL0001", d$Location)
d$Location<-gsub("Lost Lakes", "801LL0001", d$Location)
d$Location<-gsub("Oso Flaco Creek", "312OFL" , d$Location)
d$Location<-gsub("Prosser Creek Reservoir", "635PS0294", d$Location)
d$Location<-gsub("San Joaquin River", "BG30", d$Location)
d$Location<-gsub("Ventura River", "402VNTEST", d$Location)
d$Location<-gsub("Upper Truckee River", "3-404", d$Location)
d$Location<-gsub("Tallac Creek", "SLT-TALL-00", d$Location)
d$Location<-gsub("Spring Pond", "909SPNGPD", d$Location)
d$Location<-gsub("Spring Lake", "114PSP009", d$Location)
d$Location<-gsub("Smith Creek", "204R00189", d$Location)
d$Location<-gsub("Santa Margarita lake", "309SMLMAR", d$Location)
organize into supertypes
d.loc<-merge(ceden, d, by.x="StationCode", by.y="Location", all.y = T)
#merge with CEDEN data
#supertypes from Jayme Smith
d.loc$Sample.SuperType<-
ifelse(grepl("AlgalMat", d.loc$Sample.Type), "AlgalMat",
ifelse(grepl("Algal Mat", d.loc$Sample.Type), "AlgalMat",
ifelse(grepl("Scum", d.loc$Sample.Type), "Scum",
ifelse(grepl("Water", d.loc$Sample.Type), "Water", ""))))
#supertypes from Jayme Smith
d.loc$System.Supertype<-
ifelse(grepl("Lake|Pond|Marina|Beach|Reservoir|pond|Cove|Levee",
d.loc$StationCode), "nonflowing",
ifelse(grepl("River|Creek|Eel|Channel|Run|Canal",
d.loc$StationCode), "flowing", NA))
#supertypes from Jayme Smith
d.loc$System.Supertype<-
ifelse(!is.na(d.loc$System.Supertype), d.loc$System.Supertype,
ifelse(grepl("Falls|Trib|River|Creek|Channel|Klamath",
d.loc$StationName), "flowing",
ifelse(grepl("Lake|Cove|Reservoir|Bay|Pond|Slough|pond|Lagoon|",
d.loc$StationName), "nonflowing", NA)))
#estimate regions
d.loc$Region<-sub("_.*", "", d.loc$Project.Name)
d.loc$Region<-gsub("RWB", "", d.loc$Region)
d.loc$Region<-gsub("RWQCB", "", d.loc$Region)
d.loc$Region<-gsub("SWAMP", "6", d.loc$Region)
library(dplyr)
#summarize supertypes
d.loc.summ <- d.loc %>%
group_by(System.Supertype, Sample.SuperType) %>%
summarise(samples=length(Sample.SuperType))
kable(d.loc.summ)
| System.Supertype | Sample.SuperType | samples |
|---|---|---|
| flowing | AlgalMat | 92 |
| flowing | Scum | 3 |
| flowing | Water | 45 |
| nonflowing | AlgalMat | 10 |
| nonflowing | Scum | 11 |
| nonflowing | Water | 130 |
| NA | AlgalMat | 29 |
| NA | Scum | 3 |
| NA | Water | 25 |
d.loc1<-d.loc
d.loc1[,20:27]<-lapply(d.loc1[,20:27], gsub, pattern="([0-9]+).*$", replacement=1)
#if value, it was tested
d.loc1[,20:27]<-lapply(d.loc1[,20:27], gsub, pattern="ND", replacement=1)
#if non-detect, it was tested
d.loc1[,20:27]<-lapply(d.loc1[,20:27], gsub, pattern="^$", replacement=NA)
#if NA, it was NOT tested
d.loc1[,20:27]<-sapply(d.loc1[,20:27], as.numeric)
d.loc1$tox.present<-rowSums(d.loc1[, grepl("ug.L", colnames(d.loc1))], na.rm = T)
#count toxin tests
d.loc1$gene.present<-rowSums(d.loc1[, grepl("mL", colnames(d.loc1))], na.rm = T)
#count qPCR tests
#count paired toxin-qPCR tests for each group
d.loc1$ANA<-rowSums(d.loc1[,
grepl("anaC|Anatox", colnames(d.loc1))], na.rm = T)
d.loc1$MYC<-rowSums(d.loc1[,
grepl("mcy|Microcys", colnames(d.loc1))], na.rm = T)
d.loc1$CYL<-rowSums(d.loc1[,
grepl("cyr|Cylind", colnames(d.loc1))], na.rm = T)
d.loc1$SXT<-rowSums(d.loc1[,
grepl("sxt|Sax", colnames(d.loc1))], na.rm = T)
#reshape for plotting
d.tox1.m<-melt(d.loc1[, 20:29])
d.tox1.m$group<-ifelse(grepl("anaC|Anatox", d.tox1.m$variable), "ANA",
ifelse(grepl("mcy|Microcys", d.tox1.m$variable), "MCY",
ifelse(grepl("cyr|Cylind", d.tox1.m$variable), "CYL",
ifelse(grepl("sxt|Sax", d.tox1.m$variable), "SXT", NA))))
d.tox1.m$assay<-ifelse(grepl("ug.L", d.tox1.m$variable), "toxin", "gene")
gt2<-ggplot(data = d.tox1.m)+
geom_bar(aes(x=group, y=value, fill=assay),
stat = "identity", position = position_stack())+
scale_fill_manual(values=c("#669900", "#ab75bd"), name="",
labels=c("gene", "toxin"))+
facet_grid(Sample.SuperType~System.Supertype, scales = "free_y")+
labs(x="assays", y="# samples")+
theme_bw()+
theme(legend.position = "bottom")
gt2
paired sample prep
d.loc2<-d.loc1[!is.na(d.loc1$System.Supertype), ] #only those with systems
d.ana<-d.loc2$LIMS.ID[d.loc2$ANA==2] #paired gene-toxin ONLY
df.ana<-d.loc[d.loc$LIMS.ID %in% d.ana, ] #get all info of paired
df.ana.m<-melt(df.ana[, grepl("LIMS|Super|anaC|Anat", colnames(df.ana))],
id.vars = c("LIMS.ID", "Sample.SuperType", "System.Supertype"))
#only need some columns
#repeat for all genes/toxins
d.myc<-d.loc2$LIMS.ID[d.loc2$MYC==2] #paired gene-toxin ONLY
df.myc<-d.loc[d.loc$LIMS.ID %in% d.myc, ] #get all info of paired
df.myc.m<-melt(df.myc[, grepl("LIMS|Super|mcyE|Micro", colnames(df.myc))],
id.vars = c("LIMS.ID", "Sample.SuperType", "System.Supertype"))
d.cyl<-d.loc2$LIMS.ID[d.loc2$CYL==2] #paired gene-toxin ONLY
df.cyl<-d.loc[d.loc$LIMS.ID %in% d.cyl, ] #get all info of paired
df.cyl.m<-melt(df.cyl[, grepl("LIMS|Super|cyr|Cyl", colnames(df.cyl))],
id.vars = c("LIMS.ID", "Sample.SuperType", "System.Supertype"))
d.sxt<-d.loc2$LIMS.ID[d.loc2$SXT==2] #paired gene-toxin ONLY
df.sxt<-d.loc[d.loc$LIMS.ID %in% d.sxt, ] #get all info of paired
df.sxt.m<-melt(df.sxt[, grepl("LIMS|Super|sxt|Saxi", colnames(df.sxt))],
id.vars = c("LIMS.ID", "Sample.SuperType", "System.Supertype"))
df.all<-rbind(df.ana.m, df.myc.m, df.cyl.m, df.sxt.m)
#combine columns of interest
#change numbers with commas into numeric
df.all$value<-gsub(",", "", df.all$value)
df.all$value<-gsub("ND", "0", df.all$value) #CHANGE ND TO 0 (for plotting)
df.all$value<-as.numeric(df.all$value)
#for plotting
df.all$assay<-ifelse(grepl("qPCR", df.all$variable),
"gene (copies per mL)", "toxin (ug per L)")
df.all$group<-ifelse(grepl("anaC|Anatox", df.all$variable), "ANA",
ifelse(grepl("mcy|Microcys", df.all$variable), "MCY",
ifelse(grepl("cyr|Cylind", df.all$variable), "CYL",
ifelse(grepl("sxt|Sax", df.all$variable), "SXT", NA))))
df.all$supertype<-paste(df.all$System.Supertype, df.all$Sample.SuperType)
df.all$supertype<-gsub("nonflowing", "NF", df.all$supertype)
df.all$supertype<-gsub("flowing", "F", df.all$supertype)
#get region
df.all$uniqueID<-paste(df.all$LIMS.ID, df.all$supertype, df.all$group)
df.all<-merge(df.all, d.loc[,grepl("LIMS|Region", colnames(d.loc))],
by="LIMS.ID", all.x = T)
just look at presence/absence
#df.all.3<-df.all[!is.na(df.all$value), ]
apg<-ggplot(data = df.all)+
geom_tile(aes(x=supertype, y=assay, fill=log10(value)))+
scale_fill_gradient2(low = "black", mid = "#558000", high = "white",
midpoint = 1, na.value = "grey")+
facet_grid(.~group, scales = "free_x", space = "free_x")+
theme_bw()+
theme(axis.text.x =element_text(angle = 45, hjust = 1),
axis.text.y =element_text(angle = 45, hjust = 1),
text = element_text(size=14),
axis.title = element_blank(),
legend.position = "top",
legend.direction = "horizontal")
apg
look at actual concentrations
#summarized paired samples
df.all.sum <- df.all %>%
group_by(variable) %>%
summarise(assays=length(value)-sum(is.na(value)))
kable(df.all.sum)
| variable | assays |
|---|---|
| qPCR.anaC..copies.mL. | 120 |
| Anatoxin.a..ug.L. | 120 |
| qPCR.mcyE..copies.mL. | 149 |
| Microcystin.Nod…ug.L. | 149 |
| qPCR.cyrA..copies.mL. | 26 |
| Cylindrospermopsin..ug.L. | 26 |
| qPCR.sxtA..copies.mL. | 24 |
| Saxitoxin..ug.L. | 24 |
df.all.1<-df.all[grepl("qPCR", df.all$variable), ] #genes
df.all.2<-df.all[!grepl("qPCR", df.all$variable), ] #toxins
#summary stats of paired genes
df.harm.gene.summ <- df.all.1[df.all.1$value>0, ] %>%
group_by(System.Supertype, Sample.SuperType, variable) %>%
summarise(min=min(value, na.rm = T),
max=max(value, na.rm = T),
mean=mean(value, na.rm = T),
count=length(variable))
#summary stats of paired toxin
df.harm.tox.summ <- df.all.2[df.all.2$value>0, ] %>%
group_by(System.Supertype, Sample.SuperType, variable) %>%
summarise(min=min(value, na.rm = T),
max=max(value, na.rm = T),
mean=mean(value, na.rm = T),
count=length(variable))
#put together to plot
df.harm.summ<-rbind(df.harm.gene.summ, df.harm.tox.summ)
hg<-ggplot(data = df.all)+
geom_boxplot(aes(x=supertype, y=log10(value), color=System.Supertype))+
geom_point(aes(x=supertype, y=log10(value),
color=System.Supertype, shape=Sample.SuperType),
alpha=0.5, size=3)+
facet_grid(assay~group, scales = "free_y")+
labs(x="", y="log10 transformed",
title =
paste("FHABs 2023-2025 harmonized samples\nwith paired gene-toxin data",
Sys.Date()))+
scale_shape_manual(values=c(1,2,6))+
scale_color_manual(values=c("#2985a3", "#14b814"))+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom")+
guides(color=guide_legend(direction = "vertical"),
shape=guide_legend(direction = "vertical"))
hg
### linear regression of all paired
df.all.4<-merge(df.all.1,
df.all.2[, grepl("value|unique", colnames(df.all.2))],
by="uniqueID")
colnames(df.all.4)<-gsub("value.x", "gene", colnames(df.all.4))
colnames(df.all.4)<-gsub("value.y", "toxin", colnames(df.all.4))
lnp<-ggplot(data = df.all.4)+
geom_point(aes(x=log10(gene), y=log10(toxin),
color=group, shape=group), size=3, alpha=0.75)+
facet_grid(System.Supertype~Sample.SuperType)+
scale_shape_manual(values=c(15, 3, 16, 4))+
scale_color_manual(values=c("#5ab4ac", "#8c510a", "#1b7837", "#762a83"))+
theme_bw()+
theme(text = element_text(size=14))
lnp
colnames(df.all.4)
## [1] "uniqueID" "LIMS.ID" "Sample.SuperType" "System.Supertype"
## [5] "variable" "gene" "assay" "group"
## [9] "supertype" "Region" "toxin"
df.all.4$gene[df.all.4$gene==0]<-NA
df.all.4$toxin[df.all.4$toxin==0]<-NA
df.all.g<-df.all.4[, match(c("Sample.SuperType", "System.Supertype",
"gene", "toxin", "group"),
colnames(df.all.4))]
#MCY and ANA only
df.g<-df.all.g[grepl("MCY|ANA", df.all.g$group), ]
df.g$supertype<-paste(df.g$System.Supertype, df.g$Sample.SuperType)
df.g$supertype<-gsub("nonflowing", "NF", df.g$supertype)
df.g$supertype<-gsub("flowing", "F", df.g$supertype)
ng<-ggplot(data = df.g)+
geom_abline(slope = 1, intercept = 1.25, color="grey", alpha=0.75)+
geom_abline(slope = 1, intercept = 5, color="grey", alpha=0.75)+
geom_vline(xintercept=0, lty=2, color="red")+
geom_vline(xintercept=1, lty=2, color="red")+
geom_qq(aes(sample=log10(gene)), shape=16)+
geom_qq(aes(sample=log10(toxin)), shape=8)+
facet_grid(supertype~group)+
ggtitle("qq plot, .log10(gene), *log10(toxin)")+
theme_bw()
ng
dg<-ggplot(data = df.g)+
geom_density(aes(x=log10(gene)))+
geom_density(aes(x=log10(toxin)), color="red")+
facet_grid(supertype~group)+
ggtitle("density plot, black:log10(gene), red: log10(toxin)")+
theme_bw()
dg
df.g.a<-df.all.g[grepl("ANA", df.all.g$group), ]
df.g.a<-df.g.a[, !grepl("group", colnames(df.g.a))]
df.g.a$toxin.pa<-ifelse(df.g.a$toxin>0, 1, 0)
df.g.a<-subset(df.g.a, !is.na(df.g.a$gene))
df.g.a<-subset(df.g.a, !is.na(df.g.a$toxin))
df.g.a$gene.log10<-log10(df.g.a$gene)
df.g.a$toxin.log10<-log10(df.g.a$toxin)
df.g.a$toxin.log10<-ifelse(df.g.a$toxin.log10<0, 0, df.g.a$toxin.log10)
df.g.a1<-df.g.a[!grepl("nonflowing", df.g.a$System.Supertype), ]
df.g.a1<-df.g.a1[grepl("AlgalMat", df.g.a1$Sample.SuperType), ]
df.g.a1<-df.g.a1[, !grepl("SuperType", colnames(df.g.a1), ignore.case = T)]
glm.1 <- glm(toxin~gene, data=df.g.a1[],
family = Gamma(link = "inverse"))
#Gamma(link = "inverse")
#binomial(link="logit")
summary(glm.1)
##
## Call:
## glm(formula = toxin ~ gene, family = Gamma(link = "inverse"),
## data = df.g.a1[])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.966 -3.471 -2.582 -1.935 8.433
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.825e-04 2.555e-04 1.106 0.275
## gene -3.170e-13 2.002e-12 -0.158 0.875
##
## (Dispersion parameter for Gamma family taken to be 35.1535)
##
## Null deviance: 417.75 on 46 degrees of freedom
## Residual deviance: 417.08 on 45 degrees of freedom
## AIC: 641.21
##
## Number of Fisher Scoring iterations: 13
prep
df.all.4.mcy<-df.all.4[grepl("MCY", df.all.4$group), ] #just MCY
df.all.4.mcy<-df.all.4.mcy[!is.na(df.all.4.mcy$toxin),] #remove NAs
df.all.4.mcy<-df.all.4.mcy[!is.na(df.all.4.mcy$gene),] #remove NAs
#for linear model
df.all.4.mcy1<-df.all.4.mcy
df.all.4.mcy1$zero<-df.all.4.mcy1$gene+df.all.4.mcy1$toxin
df.all.4.mcy1<-subset(df.all.4.mcy1, df.all.4.mcy1$zero>0)
#remove both toxin and gene are zero
model, plot
#linear models of each supertype
lm1<-lm(log10(toxin) ~ log10(gene),
data = df.all.4.mcy1[df.all.4.mcy1$supertype=="NF Scum",])
summary(lm1)
##
## Call:
## lm(formula = log10(toxin) ~ log10(gene), data = df.all.4.mcy1[df.all.4.mcy1$supertype ==
## "NF Scum", ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7245 -0.5016 -0.3602 0.6260 1.1594
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.7744 1.8177 -3.177 0.01915 *
## log10(gene) 1.1877 0.2842 4.180 0.00582 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7688 on 6 degrees of freedom
## Multiple R-squared: 0.7444, Adjusted R-squared: 0.7017
## F-statistic: 17.47 on 1 and 6 DF, p-value: 0.005815
lm2<-lm(log10(toxin) ~ log10(gene),
data = df.all.4.mcy1[df.all.4.mcy1$supertype=="NF Water",])
summary(lm2)
##
## Call:
## lm(formula = log10(toxin) ~ log10(gene), data = df.all.4.mcy1[df.all.4.mcy1$supertype ==
## "NF Water", ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0262 -0.3765 -0.1162 0.3240 1.5742
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.29961 0.43197 -7.639 1.71e-07 ***
## log10(gene) 0.77769 0.08234 9.445 5.22e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6173 on 21 degrees of freedom
## Multiple R-squared: 0.8094, Adjusted R-squared: 0.8004
## F-statistic: 89.2 on 1 and 21 DF, p-value: 5.218e-09
lm3<-lm(log10(toxin) ~ log10(gene),
data = df.all.4.mcy1[df.all.4.mcy1$supertype=="F Water",])
summary(lm3)
##
## Call:
## lm(formula = log10(toxin) ~ log10(gene), data = df.all.4.mcy1[df.all.4.mcy1$supertype ==
## "F Water", ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9127 -0.1326 0.1651 0.1823 0.4004
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.0857 0.7781 -2.680 0.03152 *
## log10(gene) 0.6017 0.1472 4.087 0.00465 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4176 on 7 degrees of freedom
## Multiple R-squared: 0.7047, Adjusted R-squared: 0.6625
## F-statistic: 16.7 on 1 and 7 DF, p-value: 0.004648
lm.mcy.combo<-df.all.4.mcy1[grepl("NF Scum|NF Water|F Water", df.all.4.mcy1$supertype), ]
#plot all supertypes with linear smoothing, separate by facet
lnp2<-ggplot(data = lm.mcy.combo)+
geom_point(aes(x=log10(gene), y=log10(toxin), shape=Region),
size=3, alpha=0.75, color="#1b7837")+
geom_smooth(aes(x=log10(gene), y=log10(toxin)),
method = "lm")+
scale_shape_manual(values=c(15, 16, 17, 18, 8, 4))+
facet_grid(~supertype, scales = "free_x")+
theme_bw()+
theme(text = element_text(size=14),
legend.position = "bottom",
legend.direction = "horizontal")+
guides(shape=guide_legend(ncol = 6))
lnp2
prep
df.all.4.ana<-df.all.4[grepl("ANA", df.all.4$group), ] #get just ana
df.all.4.ana<-df.all.4.ana[!is.na(df.all.4.ana$toxin),] #remove NAs
df.all.4.ana<-df.all.4.ana[!is.na(df.all.4.ana$gene),] #remove NAs
model, plot
df.all.4.ana.a<-df.all.4.ana[df.all.4.ana$supertype=="F AlgalMat",]
#just sample supertype with most data points
#remove outliers
df.all.4.ana.a1<-subset(df.all.4.ana.a, df.all.4.ana.a$toxin<5000)
df.all.4.ana.a1<-subset(df.all.4.ana.a1, df.all.4.ana.a1$gene<12e7)
df.all.ana.select<-d.loc[d.loc$LIMS.ID %in% df.all.4.ana.a1$LIMS.ID, ]
#linear model
lm.an1<-lm((toxin) ~ (gene), data = df.all.4.ana.a1)
summary(lm.an1)
##
## Call:
## lm(formula = (toxin) ~ (gene), data = df.all.4.ana.a1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1070.38 -38.17 -32.63 31.41 969.69
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.455e+01 5.018e+01 0.689 0.495
## gene 3.809e-05 2.220e-06 17.157 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 310.7 on 41 degrees of freedom
## Multiple R-squared: 0.8777, Adjusted R-squared: 0.8748
## F-statistic: 294.4 on 1 and 41 DF, p-value: < 2.2e-16
#plot linear model, with linear smoothing
lnp3<-ggplot(data = df.all.4.ana.a1,
aes(x=(gene), y=(toxin)))+
geom_point(aes(shape=Region),
size=3, alpha=0.75, color="#1b7837")+
geom_smooth(method = "lm")+
scale_shape_manual(values=c(15, 16, 17, 18, 8, 4))+
facet_grid(~supertype, scales = "free_x")+
theme_bw()+
theme(text = element_text(size=14),
legend.position = "bottom",
legend.direction = "horizontal")+
guides(shape=guide_legend(ncol = 6))
lnp3