library(ggplot2)
library(readxl)
library(tidyverse) # you may not have this installed yet. Use install.packages("tidyverse"). You'll get an error message if you try to activate (load) a package you have not installed.
## -- Attaching packages -------------------- tidyverse 1.2.1 --
## v tibble 2.1.3 v purrr 0.3.2
## v tidyr 0.8.3 v dplyr 0.8.3
## v readr 1.3.1 v stringr 1.4.0
## v tibble 2.1.3 v forcats 0.4.0
## -- Conflicts ----------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
potato.dat = read_xlsx(file.choose())
#Potatoe dat
names(potato.dat)
## [1] "Group_ID" "Potato ID" "Treatment"
## [4] "Initial_length_mm" "Initial_mass_g" "Final_length_mm"
## [7] "Final_mass_g" "Change_mass_g" "Change_length_mm"
summary(potato.dat)
## Group_ID Potato ID Treatment Initial_length_mm
## Length:72 Min. : 1.00 Length:72 Min. :50.50
## Class :character 1st Qu.: 3.75 Class :character 1st Qu.:60.00
## Mode :character Median : 6.50 Mode :character Median :60.00
## Mean : 6.50 Mean :58.42
## 3rd Qu.: 9.25 3rd Qu.:60.00
## Max. :12.00 Max. :60.00
##
## Initial_mass_g Final_length_mm Final_mass_g Change_mass_g
## Min. :5.100 Min. :50.30 Min. :4.700 Min. :-1.20000
## 1st Qu.:6.000 1st Qu.:59.00 1st Qu.:6.000 1st Qu.: 0.00000
## Median :6.300 Median :60.50 Median :6.400 Median : 0.15000
## Mean :6.235 Mean :59.31 Mean :6.276 Mean : 0.04167
## 3rd Qu.:6.500 3rd Qu.:62.00 3rd Qu.:6.725 3rd Qu.: 0.40000
## Max. :6.900 Max. :65.00 Max. :7.200 Max. : 1.80000
##
## Change_length_mm
## Min. :-1.0000
## 1st Qu.: 0.0000
## Median : 0.2000
## Mean : 0.9859
## 3rd Qu.: 2.0000
## Max. : 5.0000
## NA's :1
#Plot for length
ggplot(potato.dat,
aes(x=Final_length_mm, fill=Treatment)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#I set fill = Treatment to color the bars by our Treatments
#Plot for mass
ggplot(potato.dat,
aes(x=Final_mass_g, fill=Treatment)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Use substraction to see difference between final and initial
ggplot(potato.dat,
aes(fill=Treatment,
x=Initial_length_mm -Final_length_mm)) +
geom_histogram() +
scale_y_continuous(name="Change in length (mm)") +
geom_vline(aes(xintercept=0),
size=1.5, color="black") +
scale_fill_manual(values=c("Deepskyblue",
"Darkseagreen1",
"Darkseagreen2",
"Darkseagreen3"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#here I use the code scale_y_continous with the argument name= "y axis label" to set the label to change in length (mm). Then I used geom_vline to add a verttical line at the x intercept = 0.
#Then I used the scale_fill_manual with the argument values to supply specific color names, the colors match the order of the levels (usually alphabetical). You can check with levels(potato.dat$Treatment)
ggplot(potato.dat,
aes(fill=Treatment,
x=Initial_mass_g - Final_mass_g)) +
geom_histogram() +
scale_y_continuous(name="Change in mass (g)") +
geom_vline(aes(xintercept=0),
size=1.5, color="black") +
scale_fill_manual(values=c("Deepskyblue",
"Darkseagreen1",
"Darkseagreen2",
"Darkseagreen3"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
summary.stat.table = potato.dat %>%
group_by(Treatment) %>%
summarise(Final.length.mm=mean(Initial_length_mm - Final_length_mm),
Stat.dev.lenght=sd(Initial_length_mm - Final_length_mm, na.rm=T),
Final.mass.g=mean(Initial_mass_g - Final_mass_g),
Stat.dev.mass.g = mean(Initial_mass_g - Final.mass.g)
)
#Here I used tidyverse which is a new package we will go over next class. It applies a new coding argument knowns as a pipe %>%. A pipe allows you to pipe functions together, so you can code from one function to another. In this case I created a new objects and piped in the original dataset, then I grouped by Treatment and calculated the mean and standared dev for the difference in final length and mass
summary.stat.table
## # A tibble: 4 x 5
## Treatment Final.length.mm Stat.dev.lenght Final.mass.g Stat.dev.mass.g
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Water -1.79 1.68 -0.444 6.52
## 2 Water + 10 ~ -0.0889 1.23 0.278 5.97
## 3 Water + 2 N~ -1.06 1.86 -0.233 6.47
## 4 Water + 6 N~ -0.639 1.28 0.233 6.16
#For length
ggplot(summary.stat.table,
aes(x=Treatment,
y=Final.length.mm,
color=Treatment)) +
geom_point(size=3) +
geom_errorbar(aes(
ymin=Final.length.mm-Stat.dev.lenght,
ymax=Final.length.mm + Stat.dev.lenght),
position="dodge") +
geom_hline(aes(yintercept=0), size=1.5)
#Here I used geom_errorbar() to create the graphing element of errorbars. In the aesthethic I had to set ymin and ymax (the lower and upper bounds of the error bars). I do this by adding or substracting the standard deviation to the mean.
#Here I used geom_hline to create a horizontal line at zero
#Can you reproduce this for mass?
dye.dat = read_xlsx(file.choose())
summary(dye.dat)
## Activity Group_ID Dye_number_name Concentration_mM
## Min. :1.000 Length:280 Length:280 Min. :0.300
## 1st Qu.:1.000 Class :character Class :character 1st Qu.:1.000
## Median :2.000 Mode :character Mode :character Median :1.000
## Mean :1.857 Mean :1.186
## 3rd Qu.:3.000 3rd Qu.:1.000
## Max. :3.000 Max. :3.000
## Agarose_concentration Time Distance_from_Interface_mm
## Min. :0.150 Min. : 0 Min. : 0.000
## 1st Qu.:0.175 1st Qu.:20 1st Qu.: 1.465
## Median :0.175 Median :45 Median : 3.700
## Mean :0.175 Mean :45 Mean : 8.068
## 3rd Qu.:0.175 3rd Qu.:70 3rd Qu.: 7.715
## Max. :0.200 Max. :90 Max. :147.000
#notice that the Distance from interface seems like it has an outlier
ggplot(dye.dat,
aes(x=Time,
y=Distance_from_Interface_mm,
color=Dye_number_name)) +
geom_point() +
facet_grid(Dye_number_name~Activity)
#This graph is a lot, but we are looking at all the data across all activities. I used the function facet_grid to separate out columns by part 1, part 2 and part 3 and by Dye color in rows. Notice that we have an outlier of 150mm
ggplot(dye.dat,
aes(x=Distance_from_Interface_mm,
fill=Dye_number_name)) +
geom_histogram() +
facet_grid(Dye_number_name~Activity)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Let's break it out by Activity
part1 = dye.dat %>%
filter(Activity==1)
#Here I used the package tidverse to pipe our dye data into part1 and I filtered by Activity 1
ggplot(part1,
aes(x=Time,
y=Distance_from_Interface_mm,
color=Dye_number_name)) +
geom_point(size=2) +
facet_grid(.~Group_ID)
#Note that in the last group the points are kinda wonky and regress in time which should happen. In group 2 there is a dramatic pull for Methylene blue towarsd the end which might be an error.
part2 = dye.dat %>%
filter(Activity==2)
control.dye = dye.dat %>%
filter(Concentration_mM==1,
Dye_number_name=="Dye_1_Methyl_Red")
ggplot(part2,
aes(x=Time,
y=Distance_from_Interface_mm,
color=as.factor(Concentration_mM))) +
geom_point(size=2) +
geom_point(data=control.dye) +
facet_grid(.~Group_ID)
#Here I added the control dye using geom_point. Note that there is an error in group 1. In the data they have 1.47, 1.47, 147 in a sequence which probably means that they missed a decimal point
part3 = dye.dat %>%
filter(Activity==3)
ggplot(part3,
aes(x=Time,
y=Distance_from_Interface_mm,
color=Agarose_concentration)) +
geom_point(size=2) +
geom_point(data=control.dye) +
facet_grid(.~Group_ID)
comments: Note that I share this with you as a learning experience so that you can see what common errors happen and how your work translates into data. I opted to keep the names so that you can see how you did and reflect on what errors potentially influenced your group. This is an exercise in building skills in working carefully and in how to spot potential errors in a dataset. Anyone, everyone, makes mistakes, the important thing is learning how to work carefully and how to spot mistakes in the dataset.
#dye.dat without error
dye.dat2 = read_xlsx(file.choose())
#excluding group 4 from dataset
dye.dat3 = dye.dat2 %>%
filter(Group_ID != "RRG")
#Here I use the argument != which means is not equal to create a dataset of all groups not equal to RRG.
#Now I will use tidyverse to avarage between groups
dye.averages = dye.dat3 %>%
group_by(Dye_number_name, Time, Activity,
Concentration_mM, Agarose_concentration) %>%
summarize_all(mean)
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
## Warning in mean.default(Group_ID): argument is not numeric or logical:
## returning NA
dye.averages
## # A tibble: 70 x 7
## # Groups: Dye_number_name, Time, Activity, Concentration_mM [60]
## Dye_number_name Time Activity Concentration_mM Agarose_concent~
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Dye_1_Methyl_R~ 0 1 1 0.175
## 2 Dye_1_Methyl_R~ 0 2 0.3 0.175
## 3 Dye_1_Methyl_R~ 0 2 3 0.175
## 4 Dye_1_Methyl_R~ 0 3 1 0.15
## 5 Dye_1_Methyl_R~ 0 3 1 0.2
## 6 Dye_1_Methyl_R~ 10 1 1 0.175
## 7 Dye_1_Methyl_R~ 10 2 0.3 0.175
## 8 Dye_1_Methyl_R~ 10 2 3 0.175
## 9 Dye_1_Methyl_R~ 10 3 1 0.15
## 10 Dye_1_Methyl_R~ 10 3 1 0.2
## # ... with 60 more rows, and 2 more variables: Group_ID <dbl>,
## # Distance_from_Interface_mm <dbl>
ggplot(dye.averages,
aes(x=Time,
y=Distance_from_Interface_mm,
color=Dye_number_name)) +
geom_point(size=2) + facet_grid(.~Activity)
#Seperate by parts
part1 = dye.averages %>%
filter(Activity == 1)
#Here I used the package tidverse to pipe our dye data into part1 and I filtered by Activity 1
ggplot(part1,
aes(x=Time,
y=Distance_from_Interface_mm,
color=Dye_number_name)) +
geom_point(size=2)
part2 = dye.averages %>%
filter(Activity==2)
control.dye = dye.averages %>%
filter(Concentration_mM==1,
Dye_number_name=="Dye_1_Methyl_Red")
ggplot(part2,
aes(x=Time,
y=Distance_from_Interface_mm,
color=as.factor(Concentration_mM))) +
geom_point(size=2) +
geom_line(data=control.dye)
part3 = dye.averages %>%
filter(Activity==3)
ggplot(part3,
aes(x=Time,
y=Distance_from_Interface_mm,
color=Agarose_concentration)) +
geom_point(size=2) +
geom_point(data=control.dye)