#install.packages(“effects”) #library(effects) #library(pacman) #library(tidyverse) #library(readxl) #library(WriteXLS) #library(writexl) #library(psych) pacman :: p_load(GGally, ggthemes, ggvis, httr, plotly, rio, rmarkdown, shiny)
#our experimental design is not really appropriate to assess the effect of #the empty vector control itself on the outcome (can you see why?)
#**Answer*: Its so because we have used 5 MOI of the virus as the negative # control thus lower values are negligible and show any functional results.
data <- read.table(“D:\R - Problem Solving\Data\Input data.txt”, sep=’, header=TRUE) head(data) data # Transforming In to categorical data data\(NGN2_MOI <- factor(data\)NGN2_MOI, levels=c(0,2,5,10)) data\(NT3_ng <- factor(data\)NT3_ng, levels=c(0,10)) head(data)
#Question: Can you see what has changed in the output? #**Answer*: class(data\(NGN2_MOI) class(data\)NT3_ng)
sumdata <- data %>% group_by(NGN2_MOI, NT3_ng) %>% summarise(meanFractMAP2=mean(fractMAP2), sdFractMAP2 = sd(fractMAP2))
ggplot(sumdata, aes(x=NGN2_MOI, y=meanFractMAP2, fill=NT3_ng)) + geom_bar(position=position_dodge(), stat=“identity”, colour=‘black’) + geom_errorbar(aes(ymin=meanFractMAP2-sdFractMAP2, ymax=meanFractMAP2+sdFractMAP2), width=0.2, position=position_dodge(0.9))
sumdataNGN2 <- data %>% group_by(NGN2_MOI, NT3_ng) %>% summarise(meanFracMAP2=mean(fractMAP2), sdFracMAP2 = sd(fractMAP2))
ggplot(sumdataNGN2, aes(x=NT3_ng, y=meanFracMAP2)) + geom_bar(stat=“identity”) + geom_errorbar(aes(ymin=meanFracMAP2-sdFracMAP2, ymax=meanFracMAP2+sdFracMAP2), width=0.2,position=position_dodge(0.9))
#**Question*: Look at the error bars in the plot above. #What do you notice and how would you interpret this result?
#**Answer*: I feel like the error bars are giving a range of flexibility to #each observation that there might be some variation in the observations but # that are up to this much limit. I will interpret it like if there are values # after the range they are outliers but the datas within the error bars but # outside the main bar are permissible.
#**Question*Please think of the outcome variable and explanatory variables #in our model and input them into the formula below.
mod <- glm( fractMAP2 ~ NGN2_MOI + NT3_ng, family = binomial(link = “logit”), data = data, weights = rep(10000, nrow(data)) )
summary(mod) plot(allEffects(mod))
#**Question*What treatments/doses have a significant effect #on the outcome compared with the no-treatment control?
#**Answer*All tested treatments have a statistically significant positive #effect on differentiation efficiency compared with the no-treatment control.
#Specifically:
#MAP2-positive cells #(Estimate = 2.12, p < 2×10⁻¹⁶)
#NGN2 virus at 5 MOI shows a very strong significant increase #(Estimate = 3.97, p < 2×10⁻¹⁶)
#NGN2 virus at 10 MOI also significantly increases MAP2 expression #(Estimate = 3.11, p < 2×10⁻¹⁶)
#NT3 treatment (10 ng) independently has a significant positive effect #(Estimate = 0.31, p < 2×10⁻¹⁶)
#Thus, both NGN2 virus dose and NT3 treatment significantly #improve differentiation efficiency relative to untreated controls.
#**Question*What treatment/dose has the strongest positive effect #on the outcome compared with the control?
#**Answer*The NGN2 virus at 5 MOI has the strongest positive effect on #differentiation efficiency.
#This is supported by the fact that it has the largest regression coefficient:
#In logistic regression, a larger positive coefficient corresponds to a #larger increase in the log-odds(and therefore probability) of MAP2 expression.
levels(data\(NGN2_MOI) data\)NGN2_MOI <- relevel(data$NGN2_MOI, ref = “5”)
mod3 <- glm( fractMAP2 ~ NGN2_MOI + NT3_ng, family = binomial(link = “logit”), data = data, weights = rep(10000, nrow(data)) )
summary(mod3)
data\(NGN2_MOI = factor(data\)NGN2_MOI, levels = c(“0”, “2”, “5”, “10”)) levels(data$NGN2_MOI) # back to how they were originally
#**Question*Did 5 MOI perform significantly better than 2 and 10 MOI? #**Answer*Yes, #NGN2 at 5 MOI performed significantly better than both 2 MOI and 10 MOI. #The coefficients for NGN2_MOI2 and NGN2_MOI10 are negative, meaning both doses #produce lower MAP2 differentiation efficiency than 5 MOI.
#**Question*: Can you see what condition it is? #**Answer*: Sample-id 10,11,12 has ntg2 concentration as 2 and ntg3 # concentration as 10 which shifts the mean towards ntg3. # This produces a standard deviation almost as large as the mean.
dataFilt = data[-10,] sumdataFilt <- dataFilt %>% group_by(NGN2_MOI, NT3_ng) %>% summarise(meanFractMAP2=mean(fractMAP2), sdFractMAP2 = sd(fractMAP2))
ggplot(sumdataFilt, aes(x=NGN2_MOI, y=meanFractMAP2, fill=NT3_ng)) + geom_bar(position=position_dodge(), stat=“identity”, colour=‘black’) + geom_errorbar(aes(ymin=meanFractMAP2-sdFractMAP2, ymax=meanFractMAP2+sdFractMAP2), width=0.2,position=position_dodge(0.9))
mod2 = glm( fractMAP2 ~ NGN2_MOI + NT3_ng, family = binomial(link=“logit”), data = dataFilt, weights = rep(10000, nrow(dataFilt)) )
message(“With the outlier:”) summary(mod)
message(“Without the outlier:”) summary(mod2)
levels(dataFilt\(NGN2_MOI) # the original level order dataFilt\)NGN2_MOI = relevel(dataFilt\(NGN2_MOI, ref=3) levels(dataFilt\)NGN2_MOI) # the new level order
modFilt5MOI = glm( fractMAP2 ~ NGN2_MOI + NT3_ng, family = binomial(link=“logit”), data = dataFilt, weights = rep(10000, nrow(dataFilt)) )
message(“Without the outlier”) summary(modFilt5MOI)
#note that relevel() won’t work as it will put 5 before 2 dataFilt\(NGN2_MOI = factor(dataFilt\)NGN2_MOI, levels = c(“0”, “2”, “5”, “10”)) levels(dataFilt$NGN2_MOI) # back to how they were originally
#**Questions:*: What regression coefficients have changed?
#**Answer*: #After removing the outlier replicate: #The NGN2_MOI2 coefficient shows the largest change #Its estimated effect becomes stronger and more consistent #The standard error is reduced #The NT3_ng10 coefficient remains minimally changed #Coefficients for NGN2_MOI5 and NGN2_MOI10 mostly unchange
#**Questions*: Has the apparent optimal experimental #condition changed after removing the outlier (and if so, how)?
#**Answer*: No. The optimal experimental condition has not changed. #NGN2 at 5 MOI remains the condition with the strongest positive #effect on differentiation efficiency #Releveling the model still shows that: # 2 MOI performs significantly worse than 5 MOI #10 MOI also performs worse than 5 MOI #Removing the outlier strengthens confidence in this #conclusion rather than overturning it
#**Question*:Based on the analyses above, what combination of treatment #doses do you recommend to use in the differentiation protocol?
#**Answer*: #Based on logistic regression analysis of MAP2-positive cell proportions, #NGN2 virus at 5 MOI consistently produced the highest #differentiation efficiency and performed significantly better than both #lower (2 MOI) and higher (10 MOI) doses. NT3 treatment at 10 ng provided an #additional independent positive effect. Therefore, the recommended #differentiation protocol is NGN2 at 5 MOI combined with NT3 at 10 ng, #as this combination yields the most robust and #reproducible neuronal differentiation.