# read data.txt
dd <- read.table('~/Documents/EEB Orientation/data.txt', header=T, sep='\t', as.is=T)
# create a body size vector based on a random normal distribution with
# mean of 75 and sd of 12
bod.size <- rnorm(92, mean = 75, sd = 12)
# put it in a new column
dd$body.size <- bod.size
# write a text csv of MPF swimmers
head(dd)
## species mode body.size
## 1 Naso_brevirostris BCF 51.82720
## 2 glass_fish <NA> 83.00697
## 3 Zebrasoma_scopas BCF 100.13663
## 4 Apogon_nigrofasciatus BCF 75.44911
## 5 Cheilodipterus_macrodon BCF 85.01915
## 6 Cheilodipterus_quinquelineatus BCF 83.55614
# select MPF and two columns
dd.out <- dd %>%
filter(mode=='MPF') %>%
select(species, body.size)
# save into a text file
write.table(dd.out, '~/Documents/EEB Orientation/data.bodysize.txt', sep=',')
# read and plot histogram of size
dd.size <- read.table('~/Documents/EEB Orientation/data.bodysize.txt', header=T, sep=',', as.is=T)
# head(dd.size)
ggplot(dd.size, aes(x=body.size)) +
geom_histogram(fill='white', color='gray', binwidth=4) +
theme_classic(14) +
ggtitle('Histogram of body size') +
xlab('Body size') +
ylab('Count')
matched. You will need to use subset functions on the dataframe and drop.tip on the tree. setdiff() will help you identify taxa present in one object that are missing from the other. Print this tree as a pdf.
tt <- read.tree("~/Documents/EEB Orientation/tree.tre")
# see elements of an object
# attributes(tt)
# First prune the data file
# find differences in species names (= tip label)
delete <- setdiff(dd.size$species, tt$tip.label)
# delete species in dd but not in tt
pruned.data <- dd.size[!(dd.size$species %in% delete), ] # 9 observations were pruned
# check if there are species still in pruned dataset that aren't in tree
setdiff(pruned.data$species, tt$tip.label) # yay! none.
## character(0)
# Prune the tree
delete.tips <- setdiff(tt$tip.label, pruned.data$species) %>%
unique() # ~7900 elements to trim
# prune using drop.tip function
pruned.tree <- drop.tip(tt, delete.tips)
# check if species remain
setdiff(pruned.tree$tip.label, pruned.data$species) # KOO
## character(0)
# Export as a pdf
pdf(file = '~/Documents/EEB Orientation/pruned_tree.pdf',
width = 6, height = 8)
plot(pruned.tree, type = "phylogram", cex = 0.5)
dev.off()
## quartz_off_screen
## 2
Try if time.
Pseudocode: - start with a function that acts on a vector (vec)
- subtract 1 from all values in vector, assign to variable
- add 1 to each value in vector, assign to another variable
- sum the values in each of the variables (new vector)
- take the square root of each sum
- print
fun.sqrt <- function(vec){
# add or subtract
minus <- vec-1
plus <- vec+1
# sum new vectors
sum.m <- sum(minus)
sum.p <- sum(plus)
# square root
sqrt.m <- sqrt(sum.m)
sqrt.p <- sqrt(sum.p)
# print
cat('Output of vector + 1 = ', sqrt.p, '\n')
cat('Output of vector - 1 = ', sqrt.m)
}
# test function
vec <- c(2,3,6)
fun.sqrt(vec)
## Output of vector + 1 = 3.741657
## Output of vector - 1 = 2.828427
vec <- c(100,200,3)
# Not sure how to source, an error message is produced when typing
# source(fun.sqrt(vec))
# also could not successfully Google sourcing a function, just a script.
fun.sqrt(vec)
## Output of vector + 1 = 17.49286
## Output of vector - 1 = 17.32051