library(dplyr)
library(ggplot2)
library(rstatix)
library(tidyverse)
data = read_delim("Project_1_integration_data.csv", delim = ',')
Rows: 148 Columns: 7── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): Analyte
dbl (6): Concentration, Peak, Retention Time, Area, Height, Width
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cereal_data = read_delim("cereal_samples.csv", delim = ',')
Rows: 18 Columns: 6── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): Sample
dbl (5): Peak, Ret_time, Area, Height, Width
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Separate peaks by variables
fructose = data %>%
filter(Analyte == "Fructose")
glucose = data %>%
filter(Analyte == "Glucose")
sucrose = data %>%
filter(Analyte == "Sucrose")
xylose = data %>%
filter(Analyte == "Xylose")
dsmo = data %>%
filter(Peak == 1)
fructose_peaks = fructose %>%
filter(Peak == 2) %>%
select(-Analyte) %>%
select(-Peak)
glucose_peaks = glucose %>%
filter(Peak == 2) %>%
select(-Analyte) %>%
select(-Peak)
sucrose_peaks = sucrose %>%
filter(Peak == 2) %>%
select(-Analyte) %>%
select(-Peak)
sucrose_peaks = sucrose_peaks[-c(3),]
xylose_peaks = xylose %>%
filter(`Retention Time`>10)
ggplot(data=fructose_peaks, mapping=aes(x=Concentration, y=Area)) +
geom_point(color = "blue", size = 2) +
ggtitle("Fructose")+
theme(plot.title=element_text(face='bold', size = 20, hjust = 0.5), panel.background = element_rect('azure2'))

ggplot(data=glucose_peaks, mapping=aes(x=Concentration, y=Area)) +
geom_point(color = "blue", size = 2) +
ggtitle("Glucose")+
theme(plot.title=element_text(face='bold', size = 20, hjust = 0.5), panel.background = element_rect('azure2'))

ggplot(data=sucrose_peaks, mapping=aes(x=Concentration, y=Area)) +
geom_point(color = "blue", size = 2) +
ggtitle("Sucrose")+
theme(plot.title=element_text(face='bold', size = 20, hjust = 0.5), panel.background = element_rect('azure2')) +
geom_smooth(method = 'lm') +
xlab("Concentration (g/L)")

Grubbs_max = function(peaks = tibble()) {
grubbs_max = max(peaks$Area)
grubbs_avg = mean(peaks$Area)
grubbs_reg = lm(Area ~ Concentration +0, data=peaks)
G_calc = abs(grubbs_max-grubbs_avg)/grubbs_reg$coefficents[1]
return(G_calc)
}
Sucrose_reg = lm(Area ~ Concentration+0, data= sucrose_peaks)
summary(Sucrose_reg)
Call:
lm(formula = Area ~ Concentration + 0, data = sucrose_peaks)
Residuals:
Min 1Q Median 3Q Max
-15.561 -7.797 1.930 4.166 32.884
Coefficients:
Estimate Std. Error t value Pr(>|t|)
Concentration 18.908 1.002 18.87 7.89e-11 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 11.79 on 13 degrees of freedom
Multiple R-squared: 0.9648, Adjusted R-squared: 0.9621
F-statistic: 356.1 on 1 and 13 DF, p-value: 7.893e-11
sucrose_peaks = sucrose_peaks %>%
mutate(sig_pred = Concentration * Sucrose_reg$coefficients[1]) %>%
mutate(resid = Area - sig_pred)
ggplot(data=sucrose_peaks, mapping=aes(x=Concentration, y=resid)) +
geom_point(color = "blue", size = 2) +
ggtitle("Sucrose Residual")+
theme(plot.title=element_text(face='bold', size = 20, hjust = 0.5), panel.background = element_rect('azure2')) +
geom_hline(yintercept = 0)

suc_res_sd = sd(sucrose_peaks$resid)
LOD = 3 *suc_res_sd / as.numeric(Sucrose_reg$coefficients[1])
LOQ = 10 *suc_res_sd / as.numeric(Sucrose_reg$coefficients[1])
LOD
[1] 1.869589
LOQ
[1] 6.231964
Fructose_reg = lm(Area ~ Concentration, data= fructose_peaks)
summary(Fructose_reg)
Call:
lm(formula = Area ~ Concentration, data = fructose_peaks)
Residuals:
Min 1Q Median 3Q Max
-32.471 -15.573 -14.070 -3.192 174.167
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 46.747 23.400 1.998 0.0671 .
Concentration 5.142 15.388 0.334 0.7436
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 51.61 on 13 degrees of freedom
Multiple R-squared: 0.008517, Adjusted R-squared: -0.06775
F-statistic: 0.1117 on 1 and 13 DF, p-value: 0.7436
Glucose_reg = lm(Area ~ Concentration, data= glucose_peaks)
summary(Glucose_reg)
Call:
lm(formula = Area ~ Concentration, data = glucose_peaks)
Residuals:
Min 1Q Median 3Q Max
-12.4570 -5.6250 -0.5563 2.9662 22.9004
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 25.027 4.228 5.919 5.07e-05 ***
Concentration -5.413 2.780 -1.947 0.0735 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9.326 on 13 degrees of freedom
Multiple R-squared: 0.2257, Adjusted R-squared: 0.1662
F-statistic: 3.79 on 1 and 13 DF, p-value: 0.0735
R_times = data.frame(Analyte = character(),
avg_r_time = numeric(),
sd_r_time = numeric(),
r_ci_low = numeric(),
r_ci_high = numeric())
R_times = rbind(R_times, data.frame(Analyte = "Sucrose",
avg_r_time = mean(sucrose_peaks$`Retention Time`),
sd_r_time = sd(sucrose_peaks$`Retention Time`),
r_ci_low = mean(sucrose_peaks$`Retention Time`)-2*sd(sucrose_peaks$`Retention Time`),
r_ci_high = mean(sucrose_peaks$`Retention Time`)+2*sd(sucrose_peaks$`Retention Time`)))
R_times = rbind(R_times, data.frame(Analyte = "Fructose",
avg_r_time = mean(fructose_peaks$`Retention Time`),
sd_r_time = sd(fructose_peaks$`Retention Time`),
r_ci_low = mean(fructose_peaks$`Retention Time`)-2*sd(fructose_peaks$`Retention Time`),
r_ci_high = mean(fructose_peaks$`Retention Time`)+2*sd(fructose_peaks$`Retention Time`)))
R_times = rbind(R_times, data.frame(Analyte = "Glucose",
avg_r_time = mean(glucose_peaks$`Retention Time`),
sd_r_time = sd(glucose_peaks$`Retention Time`),
r_ci_low = mean(glucose_peaks$`Retention Time`)-2*sd(glucose_peaks$`Retention Time`),
r_ci_high = mean(glucose_peaks$`Retention Time`)+2*sd(glucose_peaks$`Retention Time`)))
R_times = rbind(R_times, data.frame(Analyte = "Xylose",
avg_r_time = mean(xylose_peaks$`Retention Time`),
sd_r_time = sd(xylose_peaks$`Retention Time`),
r_ci_low = mean(xylose_peaks$`Retention Time`)-2*sd(xylose_peaks$`Retention Time`),
r_ci_high = mean(xylose_peaks$`Retention Time`)+2*sd(xylose_peaks$`Retention Time`)))
R_times = rbind(R_times, data.frame(Analyte = "DSMO",
avg_r_time = mean(dsmo$`Retention Time`),
sd_r_time = sd(dsmo$`Retention Time`),
r_ci_low = mean(dsmo$`Retention Time`)-2*sd(dsmo$`Retention Time`),
r_ci_high = mean(dsmo$`Retention Time`)+2*sd(dsmo$`Retention Time`)))
dfructose_peaks = fructose %>%
filter(Peak == 1) %>%
select(-Analyte)
dglucose_peaks = glucose %>%
filter(Peak == 1) %>%
select(-Analyte)
dsucrose_peaks = sucrose %>%
filter(Peak == 1) %>%
select(-Analyte)
dsucrose_peaks = dsucrose_peaks[-c(3),]
xsucrose_peaks = sucrose %>%
filter(`Retention Time` < 12.80 & `Retention Time` > 10.97) %>%
mutate(Peak = as.character("IS"))
sucrose_x_is = data.frame(Peak = numeric(),
Retention_time = numeric(),
Area = numeric())
sucrose_is = data.frame("Concentration" = sucrose_peaks$Concentration, "Fructose Peak" = sucrose_peaks$Area, "IS Peak" = xsucrose_peaks$Area)
sucrose_is = sucrose_is %>%
mutate(Response_Factor = (sucrose_is$Fructose.Peak/sucrose_is$Concentration)/(sucrose_is$IS.Peak/0.5))
sucrose_peaks = sucrose_peaks %>%
mutate(response_factor = (sucrose_peaks$Area/sucrose_peaks$Concentration)/dsucrose_peaks$Area)
print("Sucrose Peaks F Mean and SD")
[1] "Sucrose Peaks F Mean and SD"
mean(sucrose_peaks$response_factor)
[1] 3.869912
sd(sucrose_peaks$response_factor)
[1] 1.220331
print("DSMO Peaks Mean and SD")
[1] "DSMO Peaks Mean and SD"
mean(dsmo$Area)
[1] 11.10546
sd(dsmo$Area)
[1] 4.655197
print("DSMO F, sd, and CI")
[1] "DSMO F, sd, and CI"
dmso_RFF = mean(sucrose_peaks$response_factor)
dmso_RFF_sd = sd(sucrose_peaks$response_factor)
dmso_RFF
[1] 3.869912
dmso_RFF_sd
[1] 1.220331
dmso_RFF_low = mean(sucrose_peaks$response_factor) - 2 * sd(sucrose_peaks$response_factor)
dmso_RFF_low
[1] 1.42925
dmso_RFF_high = mean(sucrose_peaks$response_factor) + 2 * sd(sucrose_peaks$response_factor)
dmso_RFF_high
[1] 6.310574
print("Xylose F, sd, and CI")
[1] "Xylose F, sd, and CI"
mean(sucrose_is$Response_Factor)
[1] 1.17316
sd(sucrose_is$Response_Factor)
[1] 2.408369
mean(sucrose_is$Response_Factor) - 2 * sd(sucrose_is$Response_Factor)
[1] -3.643578
mean(sucrose_is$Response_Factor) + 2 * sd(sucrose_is$Response_Factor)
[1] 5.989898
cereal_data = cereal_data %>%
mutate(peak_type = case_when(
Ret_time >= 5.676919 & Ret_time <= 7.391209 ~ "DSMO",
Ret_time >= 8.363270 & Ret_time <= 8.595879 ~ "Sucrose",
Ret_time >= 10.119791 & Ret_time < 10.296231 ~ "Fructose",
Ret_time >= 10.296231 & Ret_time <= 10.653432 ~ "Fructose or Glucose",
Ret_time >= 10.653432 & Ret_time <= 10.724288 ~ "Fructose",
Ret_time >= 10.978320 & Ret_time <= 12.796417 ~ "Xylose",
TRUE ~ "Outside of 95% CI"
))
choco = cereal_data %>%
filter(Sample =="Choco") %>%
select(-Sample)
special_k = cereal_data %>%
filter(Sample =="Special_k") %>%
select(-Sample)
corn = cereal_data %>%
filter(Sample =="corn") %>%
select(-Sample)
#sucrose in solution (lowest conc. to test LOQ)
(corn$Area[2] * 5.0 / (corn$Area[1] * dmso_RFF_high))
[1] 8.580379
#g sugar per 100 g if sugar is evenly distributed
((corn$Area[2] * 5.0 / (corn$Area[1] * dmso_RFF_low)) * 0.1 / 11.36) * 100
[1] 33.34945
((corn$Area[2] * 5.0 / (corn$Area[1] * dmso_RFF_high)) * 0.1 / 11.36) * 100
[1] 7.553151
#g sugar per 100 g if sugar is all in there
((corn$Area[2] * 5.0 / (corn$Area[1] * dmso_RFF_low)) * 0.1 / 11.36) * 100 * 56.1042 / 67.2568
[1] 27.81941
((corn$Area[2] * 5.0 / (corn$Area[1] * dmso_RFF_high)) * 0.1 / 11.36) * 100 * 56.1042 / 67.2568
[1] 6.300679
---
title: "Project 1 Data Analysis: Sugars in Cereal"
author: "Jacob Ehrbaker"
output: html_notebook
---

```{r}
library(dplyr)
library(ggplot2)
library(rstatix)
library(tidyverse)
```

```{r}
data = read_delim("Project_1_integration_data.csv", delim = ',')
cereal_data = read_delim("cereal_samples.csv", delim = ',')
```

```{r}
#Separate peaks by variables
fructose = data %>% 
  filter(Analyte == "Fructose")

glucose = data %>% 
  filter(Analyte == "Glucose")

sucrose = data %>% 
  filter(Analyte == "Sucrose")

xylose = data %>% 
  filter(Analyte == "Xylose")

dsmo = data %>% 
  filter(Peak == 1)
```

```{r}
fructose_peaks = fructose %>% 
  filter(Peak == 2) %>% 
  select(-Analyte) %>% 
  select(-Peak)

glucose_peaks = glucose %>% 
  filter(Peak == 2) %>% 
  select(-Analyte) %>% 
  select(-Peak)

sucrose_peaks = sucrose %>% 
  filter(Peak == 2) %>% 
  select(-Analyte) %>% 
  select(-Peak)

sucrose_peaks = sucrose_peaks[-c(3),]

xylose_peaks = xylose %>% 
  filter(`Retention Time`>10)
```

```{r}
ggplot(data=fructose_peaks, mapping=aes(x=Concentration, y=Area)) + 
  geom_point(color = "blue", size = 2) + 
  ggtitle("Fructose")+
  theme(plot.title=element_text(face='bold', size = 20, hjust = 0.5), panel.background = element_rect('azure2'))

ggplot(data=glucose_peaks, mapping=aes(x=Concentration, y=Area)) + 
  geom_point(color = "blue", size = 2) + 
  ggtitle("Glucose")+
  theme(plot.title=element_text(face='bold', size = 20, hjust = 0.5), panel.background = element_rect('azure2'))

ggplot(data=sucrose_peaks, mapping=aes(x=Concentration, y=Area)) + 
  geom_point(color = "blue", size = 2) + 
  ggtitle("Sucrose")+
  theme(plot.title=element_text(face='bold', size = 20, hjust = 0.5), panel.background = element_rect('azure2')) +
  geom_smooth(method = 'lm') +
  xlab("Concentration (g/L)")

```


```{r}
Grubbs_max = function(peaks = tibble()) {
  grubbs_max = max(peaks$Area)
  grubbs_avg = mean(peaks$Area)
  grubbs_reg = lm(Area ~ Concentration +0, data=peaks)
  
  G_calc = abs(grubbs_max-grubbs_avg)/grubbs_reg$coefficents[1]
  
  return(G_calc)
}
```

```{r}
Sucrose_reg = lm(Area ~ Concentration+0, data= sucrose_peaks)
summary(Sucrose_reg)
```
```{r}
sucrose_peaks = sucrose_peaks %>% 
  mutate(sig_pred = Concentration * Sucrose_reg$coefficients[1]) %>% 
  mutate(resid = Area - sig_pred)

ggplot(data=sucrose_peaks, mapping=aes(x=Concentration, y=resid)) + 
  geom_point(color = "blue", size = 2) + 
  ggtitle("Sucrose Residual")+
  theme(plot.title=element_text(face='bold', size = 20, hjust = 0.5), panel.background = element_rect('azure2')) +
  geom_hline(yintercept = 0)
```

```{r}
suc_res_sd = sd(sucrose_peaks$resid)
LOD = 3 *suc_res_sd / as.numeric(Sucrose_reg$coefficients[1])
LOQ = 10 *suc_res_sd / as.numeric(Sucrose_reg$coefficients[1])
LOD
LOQ
```

```{r}
Fructose_reg = lm(Area ~ Concentration, data= fructose_peaks)
summary(Fructose_reg)
```

```{r}
Glucose_reg = lm(Area ~ Concentration, data= glucose_peaks)
summary(Glucose_reg)
```

```{r}
R_times = data.frame(Analyte = character(), 
                     avg_r_time = numeric(),
                     sd_r_time = numeric(),
                     r_ci_low = numeric(), 
                     r_ci_high = numeric())

R_times = rbind(R_times, data.frame(Analyte = "Sucrose",
                                    avg_r_time = mean(sucrose_peaks$`Retention Time`),
                                    sd_r_time = sd(sucrose_peaks$`Retention Time`),
                                    r_ci_low = mean(sucrose_peaks$`Retention Time`)-2*sd(sucrose_peaks$`Retention Time`),
                                    r_ci_high = mean(sucrose_peaks$`Retention Time`)+2*sd(sucrose_peaks$`Retention Time`)))

R_times = rbind(R_times, data.frame(Analyte = "Fructose",
                                    avg_r_time = mean(fructose_peaks$`Retention Time`),
                                    sd_r_time = sd(fructose_peaks$`Retention Time`),
                                    r_ci_low = mean(fructose_peaks$`Retention Time`)-2*sd(fructose_peaks$`Retention Time`),
                                    r_ci_high = mean(fructose_peaks$`Retention Time`)+2*sd(fructose_peaks$`Retention Time`)))

R_times = rbind(R_times, data.frame(Analyte = "Glucose",
                                    avg_r_time = mean(glucose_peaks$`Retention Time`),
                                    sd_r_time = sd(glucose_peaks$`Retention Time`),
                                    r_ci_low = mean(glucose_peaks$`Retention Time`)-2*sd(glucose_peaks$`Retention Time`),
                                    r_ci_high = mean(glucose_peaks$`Retention Time`)+2*sd(glucose_peaks$`Retention Time`)))

R_times = rbind(R_times, data.frame(Analyte = "Xylose",
                                    avg_r_time = mean(xylose_peaks$`Retention Time`),
                                    sd_r_time = sd(xylose_peaks$`Retention Time`),
                                    r_ci_low = mean(xylose_peaks$`Retention Time`)-2*sd(xylose_peaks$`Retention Time`),
                                    r_ci_high = mean(xylose_peaks$`Retention Time`)+2*sd(xylose_peaks$`Retention Time`)))

R_times = rbind(R_times, data.frame(Analyte = "DSMO",
                                    avg_r_time = mean(dsmo$`Retention Time`),
                                    sd_r_time = sd(dsmo$`Retention Time`),
                                    r_ci_low = mean(dsmo$`Retention Time`)-2*sd(dsmo$`Retention Time`),
                                    r_ci_high = mean(dsmo$`Retention Time`)+2*sd(dsmo$`Retention Time`)))
```


```{r}
dfructose_peaks = fructose %>% 
    filter(Peak == 1) %>% 
  select(-Analyte)

dglucose_peaks = glucose %>% 
  filter(Peak == 1) %>% 
  select(-Analyte)

dsucrose_peaks = sucrose %>% 
  filter(Peak == 1) %>% 
  select(-Analyte)

dsucrose_peaks = dsucrose_peaks[-c(3),]

xsucrose_peaks = sucrose %>% 
  filter(`Retention Time` < 12.80 & `Retention Time` > 10.97) %>% 
  mutate(Peak = as.character("IS"))

sucrose_x_is = data.frame(Peak = numeric(),
                          Retention_time = numeric(), 
                          Area = numeric()) 


```

```{r}
sucrose_is = data.frame("Concentration" = sucrose_peaks$Concentration, "Fructose Peak" = sucrose_peaks$Area, "IS Peak" = xsucrose_peaks$Area)

sucrose_is = sucrose_is %>% 
  mutate(Response_Factor = (sucrose_is$Fructose.Peak/sucrose_is$Concentration)/(sucrose_is$IS.Peak/0.5))
```

```{r}
sucrose_peaks = sucrose_peaks %>% 
  mutate(response_factor = (sucrose_peaks$Area/sucrose_peaks$Concentration)/dsucrose_peaks$Area)
```

```{r}
print("Sucrose Peaks F Mean and SD")
mean(sucrose_peaks$response_factor)
sd(sucrose_peaks$response_factor)

print("DSMO Peaks Mean and SD")
mean(dsmo$Area)
sd(dsmo$Area)
```

```{r}
print("DSMO F, sd, and CI")
dmso_RFF = mean(sucrose_peaks$response_factor)
dmso_RFF_sd = sd(sucrose_peaks$response_factor)
dmso_RFF
dmso_RFF_sd
dmso_RFF_low = mean(sucrose_peaks$response_factor) - 2 * sd(sucrose_peaks$response_factor)
dmso_RFF_low
dmso_RFF_high = mean(sucrose_peaks$response_factor) + 2 * sd(sucrose_peaks$response_factor)
dmso_RFF_high

print("Xylose F, sd, and CI")
mean(sucrose_is$Response_Factor)
sd(sucrose_is$Response_Factor)
mean(sucrose_is$Response_Factor) - 2 * sd(sucrose_is$Response_Factor)
mean(sucrose_is$Response_Factor) + 2 * sd(sucrose_is$Response_Factor)
```

```{r}
cereal_data = cereal_data %>% 
  mutate(peak_type = case_when(
    Ret_time >= 5.676919 & Ret_time <= 7.391209 ~ "DSMO",
    Ret_time >= 8.363270 & Ret_time <= 8.595879 ~ "Sucrose",
    Ret_time >= 10.119791 & Ret_time < 10.296231 ~ "Fructose",
    Ret_time >= 10.296231 & Ret_time <= 10.653432 ~ "Fructose or Glucose",
    Ret_time >= 10.653432 & Ret_time <= 10.724288 ~ "Fructose",
    Ret_time >= 10.978320 & Ret_time <= 12.796417 ~ "Xylose",
    TRUE ~ "Outside of 95% CI"
  ))
```

```{r}
choco = cereal_data %>% 
  filter(Sample =="Choco") %>% 
  select(-Sample)

special_k = cereal_data %>% 
  filter(Sample =="Special_k") %>% 
  select(-Sample)

corn = cereal_data %>% 
  filter(Sample =="corn") %>% 
  select(-Sample)
```

```{r}
#sucrose in solution (lowest conc. to test LOQ)
(corn$Area[2] * 5.0 / (corn$Area[1] * dmso_RFF_high))
 
#g sugar per 100 g if sugar is evenly distributed
((corn$Area[2] * 5.0 / (corn$Area[1] * dmso_RFF_low)) * 0.1 / 11.36) * 100
((corn$Area[2] * 5.0 / (corn$Area[1] * dmso_RFF_high)) * 0.1 / 11.36) * 100

#g sugar per 100 g if sugar is all in there
((corn$Area[2] * 5.0 / (corn$Area[1] * dmso_RFF_low)) * 0.1 / 11.36) * 100 * 56.1042 / 67.2568
((corn$Area[2] * 5.0 / (corn$Area[1] * dmso_RFF_high)) * 0.1 / 11.36) * 100 * 56.1042 / 67.2568
```

