SUMMARY

The goal of this analysis is to analyze the AFM data for the different molecules measured by Sara & Angela

List of tasks:

Data input

The original data file is Angela movie data 2016-06-03.xlsx, from where columns A to J were extracted and saved in .csv format in AFM_CBP.data, columns K to T were saved in AFM_CBPHAT.data, and columns V to AK were saved in AFM_P300.data.

Reading in the data and removing missing values (marked as “-” in the original file).

# Helper function
combine_AFM_scores <- function(data){
  combined <- unlist(data[!is.na(data)])
}

plot_template <- function(data, y, plot_title){
  plot(0, type="n", 
     xlim=c(0,max(data, na.rm=T)),
     ylim=c(0,y),
     xlab="Distance (nm)",
     ylab="Density (frequency)",
     main=plot_title) 
}

plot_combined_AFM_density <- function(data){
  lines(density(data), col="orange", lwd=3)
}

plot_AFM_density <- function(data){
  lines(density(data))
}

# Data input
CBP_AFM_data <- read.csv("AFM_CBP.data", sep="\t", head=T)
CBP6_AFM_data <- read.csv("AFM_CBPHAT.data", sep="\t", head=T)
P300_AFM_data <- read.csv("AFM_P300.data", sep="\t", head=T)

# Remove missing values
CBP_combined <- combine_AFM_scores(CBP_AFM_data)
CBP6_combined <- combine_AFM_scores(CBP6_AFM_data)
P300_combined <- combine_AFM_scores(P300_AFM_data)

# CBP
plot_template(CBP_combined, 0.2, "CBP distance distributions")
invisible(lapply(names(CBP_AFM_data), function(x) {
  current_col = CBP_AFM_data[, x]
  current_col_noNa = current_col[!is.na(current_col)]
  plot_AFM_density(current_col_noNa)
}))
plot_combined_AFM_density(CBP_combined)
legend("topright", c("Individual measurements", "All measurements combined"), col=c("black", "orange"), lwd=3)

# CBPHAT
plot_template(CBP6_combined, 0.37, "HATnCBP distance distributions")
invisible(lapply(names(CBP6_AFM_data), function(x) {
  current_col = CBP6_AFM_data[, x]
  current_col_noNa = current_col[!is.na(current_col)]
  plot_AFM_density(current_col_noNa)
}))
plot_combined_AFM_density(CBP6_combined)
legend("topright", c("Individual measurements", "All measurements combined"), col=c("black", "orange"), lwd=3)

# P300
plot_template(P300_combined, 0.25, "p300 distance distributions")
invisible(lapply(names(P300_AFM_data), function(x) {
  current_col = P300_AFM_data[, x]
  current_col_noNa = current_col[!is.na(current_col)]
  plot_AFM_density(current_col_noNa)
}))
plot_combined_AFM_density(P300_combined)
legend("topright", c("Individual measurements", "All measurements combined"), col=c("black", "orange"), lwd=3)