#install.packages(“effects”) #library(effects) #library(pacman) #library(tidyverse) #library(readxl) #library(WriteXLS) #library(writexl) #library(psych) pacman :: p_load(GGally, ggthemes, ggvis, httr, plotly, rio, rmarkdown, shiny)

#our experimental design is not really appropriate to assess the effect of #the empty vector control itself on the outcome (can you see why?)

#**Answer*: Its so because we have used 5 MOI of the virus as the negative # control thus lower values are negligible and show any functional results.

Loading the data

data <- read.table(“D:\R - Problem Solving\Data\Input data.txt”, sep=’, header=TRUE) head(data) data # Transforming In to categorical data data$NGN2_MOI <- factor(data$NGN2_MOI, levels=c(0,2,5,10)) data$NT3_ng <- factor(data$NT3_ng, levels=c(0,10)) head(data)

#Question: Can you see what has changed in the output? #**Answer*: class(data$NGN2_MOI) class(data$NT3_ng)

Summarizing teh data

sumdata <- data %>% group_by(NGN2_MOI, NT3_ng) %>% summarise(meanFractMAP2=mean(fractMAP2), sdFractMAP2 = sd(fractMAP2))

Plotting

ggplot(sumdata, aes(x=NGN2_MOI, y=meanFractMAP2, fill=NT3_ng)) + geom_bar(position=position_dodge(), stat=“identity”, colour=‘black’) + geom_errorbar(aes(ymin=meanFractMAP2-sdFractMAP2, ymax=meanFractMAP2+sdFractMAP2), width=0.2, position=position_dodge(0.9))

Please insert the correct grouping into group_by() for this visualization

sumdataNGN2 <- data %>% group_by(NGN2_MOI, NT3_ng) %>% summarise(meanFracMAP2=mean(fractMAP2), sdFracMAP2 = sd(fractMAP2))

ggplot(sumdataNGN2, aes(x=NT3_ng, y=meanFracMAP2)) + geom_bar(stat=“identity”) + geom_errorbar(aes(ymin=meanFracMAP2-sdFracMAP2, ymax=meanFracMAP2+sdFracMAP2), width=0.2,position=position_dodge(0.9))

#**Question*: Look at the error bars in the plot above. #What do you notice and how would you interpret this result?

#**Answer*: I feel like the error bars are giving a range of flexibility to #each observation that there might be some variation in the observations but # that are up to this much limit. I will interpret it like if there are values # after the range they are outliers but the datas within the error bars but # outside the main bar are permissible.

#**Question*Please think of the outcome variable and explanatory variables #in our model and input them into the formula below.

mod <- glm( fractMAP2 ~ NGN2_MOI + NT3_ng, family = binomial(link = “logit”), data = data, weights = rep(10000, nrow(data)) )

summary(mod) plot(allEffects(mod))

#**Question*What treatments/doses have a significant effect #on the outcome compared with the no-treatment control?

#**Answer*All tested treatments have a statistically significant positive #effect on differentiation efficiency compared with the no-treatment control.

#Specifically:

NGN2 virus at 2 MOI significantly increases the fraction of

#MAP2-positive cells #(Estimate = 2.12, p < 2×10⁻¹⁶)

#NGN2 virus at 5 MOI shows a very strong significant increase #(Estimate = 3.97, p < 2×10⁻¹⁶)

#NGN2 virus at 10 MOI also significantly increases MAP2 expression #(Estimate = 3.11, p < 2×10⁻¹⁶)

#NT3 treatment (10 ng) independently has a significant positive effect #(Estimate = 0.31, p < 2×10⁻¹⁶)

#Thus, both NGN2 virus dose and NT3 treatment significantly #improve differentiation efficiency relative to untreated controls.

#**Question*What treatment/dose has the strongest positive effect #on the outcome compared with the control?

#**Answer*The NGN2 virus at 5 MOI has the strongest positive effect on #differentiation efficiency.

#This is supported by the fact that it has the largest regression coefficient:

NGN2_MOI5: Estimate = 3.97 (largest among all treatments)

#In logistic regression, a larger positive coefficient corresponds to a #larger increase in the log-odds(and therefore probability) of MAP2 expression.

levels(data$NGN2_MOI) data$NGN2_MOI <- relevel(data$NGN2_MOI, ref = “5”)

mod3 <- glm( fractMAP2 ~ NGN2_MOI + NT3_ng, family = binomial(link = “logit”), data = data, weights = rep(10000, nrow(data)) )

summary(mod3)

Restore the original order - note that relevel() won’t work as it

will put 5 before 2

data$NGN2_MOI = factor(data$NGN2_MOI, levels = c(“0”, “2”, “5”, “10”)) levels(data$NGN2_MOI) # back to how they were originally

#**Question*Did 5 MOI perform significantly better than 2 and 10 MOI? #**Answer*Yes, #NGN2 at 5 MOI performed significantly better than both 2 MOI and 10 MOI. #The coefficients for NGN2_MOI2 and NGN2_MOI10 are negative, meaning both doses #produce lower MAP2 differentiation efficiency than 5 MOI.

#**Question*: Can you see what condition it is? #**Answer*: Sample-id 10,11,12 has ntg2 concentration as 2 and ntg3 # concentration as 10 which shifts the mean towards ntg3. # This produces a standard deviation almost as large as the mean.

dataFilt = data[-10,] sumdataFilt <- dataFilt %>% group_by(NGN2_MOI, NT3_ng) %>% summarise(meanFractMAP2=mean(fractMAP2), sdFractMAP2 = sd(fractMAP2))

ggplot(sumdataFilt, aes(x=NGN2_MOI, y=meanFractMAP2, fill=NT3_ng)) + geom_bar(position=position_dodge(), stat=“identity”, colour=‘black’) + geom_errorbar(aes(ymin=meanFractMAP2-sdFractMAP2, ymax=meanFractMAP2+sdFractMAP2), width=0.2,position=position_dodge(0.9))

mod2 = glm( fractMAP2 ~ NGN2_MOI + NT3_ng, family = binomial(link=“logit”), data = dataFilt, weights = rep(10000, nrow(dataFilt)) )

message(“With the outlier:”) summary(mod)

message(“Without the outlier:”) summary(mod2)

levels(dataFilt$NGN2_MOI) # the original level order dataFilt$NGN2_MOI = relevel(dataFilt$NGN2_MOI, ref=3) levels(dataFilt$NGN2_MOI) # the new level order

modFilt5MOI = glm( fractMAP2 ~ NGN2_MOI + NT3_ng, family = binomial(link=“logit”), data = dataFilt, weights = rep(10000, nrow(dataFilt)) )

message(“Without the outlier”) summary(modFilt5MOI)

Restore the original order -

#note that relevel() won’t work as it will put 5 before 2 dataFilt$NGN2_MOI = factor(dataFilt$NGN2_MOI, levels = c(“0”, “2”, “5”, “10”)) levels(dataFilt$NGN2_MOI) # back to how they were originally

#**Questions:*: What regression coefficients have changed?

#**Answer*: #After removing the outlier replicate: #The NGN2_MOI2 coefficient shows the largest change #Its estimated effect becomes stronger and more consistent #The standard error is reduced #The NT3_ng10 coefficient remains minimally changed #Coefficients for NGN2_MOI5 and NGN2_MOI10 mostly unchange

#**Questions*: Has the apparent optimal experimental #condition changed after removing the outlier (and if so, how)?

#**Answer*: No. The optimal experimental condition has not changed. #NGN2 at 5 MOI remains the condition with the strongest positive #effect on differentiation efficiency #Releveling the model still shows that: # 2 MOI performs significantly worse than 5 MOI #10 MOI also performs worse than 5 MOI #Removing the outlier strengthens confidence in this #conclusion rather than overturning it

#**Question*:Based on the analyses above, what combination of treatment #doses do you recommend to use in the differentiation protocol?

#**Answer*: #Based on logistic regression analysis of MAP2-positive cell proportions, #NGN2 virus at 5 MOI consistently produced the highest #differentiation efficiency and performed significantly better than both #lower (2 MOI) and higher (10 MOI) doses. NT3 treatment at 10 ng provided an #additional independent positive effect. Therefore, the recommended #differentiation protocol is NGN2 at 5 MOI combined with NT3 at 10 ng, #as this combination yields the most robust and #reproducible neuronal differentiation.

lifearc_proj