#loading file mice.RData into R and convert into data frame format
load("~/Desktop/Data science/Linear model/mice.RData")
mice
## concentration IgG1
## 1 0.025 3.00225
## 2 0.025 3.47225
## 3 0.025 3.81225
## 4 0.025 1.76225
## 5 0.025 2.84225
## 6 0.025 1.71225
## 7 0.025 2.32225
## 8 0.025 3.24225
## 9 0.025 3.09225
## 10 0.025 2.95225
## 11 0.025 3.53225
## 12 0.025 2.67225
## 13 0.075 3.27675
## 14 0.075 3.29675
## 15 0.075 3.76675
## 16 0.075 2.63675
## 17 0.075 4.23675
## 18 0.075 4.57675
## 19 0.075 4.04675
## 20 0.075 4.32675
## 21 0.075 3.78675
## 22 0.075 2.26675
## 23 0.075 3.58675
## 24 0.075 3.61675
## 25 0.100 3.41900
## 26 0.100 2.26900
## 27 0.100 1.99900
## 28 0.100 2.33900
## 29 0.100 1.40900
## 30 0.100 1.97900
## 31 0.100 1.77900
## 32 0.100 3.13900
## 33 0.100 3.81900
## 34 0.100 2.66900
## 35 0.100 1.19900
## 36 0.100 4.38900
## 37 0.200 2.46800
## 38 0.200 4.15800
## 39 0.200 2.65800
## 40 0.200 2.52800
## 41 0.200 1.68800
## 42 0.200 1.76800
## 43 0.200 2.82800
## 44 0.200 3.56800
## 45 0.200 4.21800
## 46 0.200 3.57800
## 47 0.200 3.60800
## 48 0.200 3.95800
## 49 0.500 3.37500
## 50 0.500 3.21500
## 51 0.500 6.05500
## 52 0.500 3.67500
## 53 0.500 3.02500
## 54 0.500 3.26500
## 55 0.500 4.44500
## 56 0.500 2.72500
## 57 0.500 2.53500
## 58 0.500 2.00500
## 59 0.500 3.40500
## 60 0.500 2.94500
## 61 2.000 5.04000
## 62 2.000 5.30000
is.data.frame(mice) # checking whether mice is data frame or not
## [1] TRUE
head(mice)
## concentration IgG1
## 1 0.025 3.00225
## 2 0.025 3.47225
## 3 0.025 3.81225
## 4 0.025 1.76225
## 5 0.025 2.84225
## 6 0.025 1.71225
skim(mice)
| Name | mice |
| Number of rows | 62 |
| Number of columns | 2 |
| _______________________ | |
| Column type frequency: | |
| numeric | 2 |
| ________________________ | |
| Group variables | None |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| concentration | 0 | 1 | 0.24 | 0.37 | 0.03 | 0.07 | 0.10 | 0.20 | 2.00 | ▇▂▁▁▁ |
| IgG1 | 0 | 1 | 3.17 | 0.97 | 1.20 | 2.53 | 3.23 | 3.74 | 6.06 | ▃▇▇▃▁ |
#plot the relationship of IgG1 and doses of new drug ADDF17
ggplot(mice, aes(x = concentration, y = IgG1)) +
geom_point(color = "blue") +
labs(
title = "Relationship between IgG1 and dose of the drug ADDF17",
xlabs = "Dose of drug ADDF17",
ylabs = "Blood serum levels of Immunoglobulin IgG1"
) +
theme(axis.title = element_text(size = 15),
axis.text = element_text(size = 15))
#plot distribution of IgG1
with(mice, hist(IgG1))
#Linear regression analysis
model <- lm(IgG1 ~ concentration, data = mice)
summary_model <- summary(model)
summary_model
##
## Call:
## lm(formula = IgG1 ~ concentration, data = mice)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.81822 -0.64593 0.00123 0.59608 2.60921
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.9101 0.1367 21.287 <2e-16 ***
## concentration 1.0714 0.3150 3.401 0.0012 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8989 on 60 degrees of freedom
## Multiple R-squared: 0.1616, Adjusted R-squared: 0.1476
## F-statistic: 11.57 on 1 and 60 DF, p-value: 0.0012
coef(model)
## (Intercept) concentration
## 2.910081 1.071418
confint(model)
## 2.5 % 97.5 %
## (Intercept) 2.6366273 3.183535
## concentration 0.4412574 1.701578
summary_model$coefficients[, "Std. Error"]["concentration"]
## concentration
## 0.3150333
#Repeat the linear regression analysis without the received dose 2 micro gram
mice_1 <- mice %>%
filter(concentration != 2)
#Linear regression analysis with mice_1
model1 <- lm(IgG1 ~ concentration, data = mice_1)
summary(model1)
##
## Call:
## lm(formula = IgG1 ~ concentration, data = mice_1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.83402 -0.64787 0.02151 0.58186 2.69190
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.9505 0.1717 17.18 <2e-16 ***
## concentration 0.8252 0.6937 1.19 0.239
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9127 on 58 degrees of freedom
## Multiple R-squared: 0.02382, Adjusted R-squared: 0.006985
## F-statistic: 1.415 on 1 and 58 DF, p-value: 0.2391
set.seed(2678)
p1<-p2<-vector(length=1000)
for(i in 1:1000) {
conc2<-mice$concentration
mice2<-data.frame(
concentration=mice$concentration,
IgG1=3+0.83*conc2+rnorm(62,sd=0.9)
)
conc1<-mice$concentration[mice$concentration<2]
mice1<-data.frame(
concentration=conc1,
IgG1=3+0.83*conc1+rnorm(60,sd=0.9)
)
m2<-lm(IgG1~concentration,data=mice2)
p2[i]<-summary(m2)$coef[2,4]
m1<-lm(IgG1~concentration,data=mice1)
p1[i]<-summary(m1)$coef[2,4]
}
mean(p2<0.05)
## [1] 0.707
mean(p1<0.05)
## [1] 0.21
The effect of the concentration of ADDF17 on the mean IgG1 blood level concentrations is 1.07
Given the estimate above, the standard error of estimator is 0.32 Given the estimate above, the confidence interval is 0.44, 1.7
There is a significant effect of the concentration of ADDF17 on the mean IgG1 blood level concentrations at the 5% level of significance.
Null hypothesis: the mean IgG1 does not linearly depend on the concentration of ADDF17 (beta1 = 0)
Alternative hypothesis: the mean IgG1 does linearly depend on the concentration of ADDF17 (beta1 > 0)
The major differences in the value of regression coefficients: 0.83 and p_value: 0.24 higher than 5% level of significance. It means that we accept the Null hypothesis. In conclusion, the mean IgG1 does not linearly depend on the concentration of ADDF17.
In the simulation, we repeated the experiment 1000 times and we did the repeating 2 time. * In the first time: we repeated the experiement 1000 times with 62 observation and values of the concentration of ADDF17 drawing from the values of original data set in the question and IgG1 was calculated by the formula of linear regression relationship with concentration of ADDF17 in question 1D \(IgG1=3+0.83*conc2+rnorm(62,sd=0.9)\)
In the second time: we repeated the experiement 1000 times with 60 observation and values of the concentration of ADDF17 drawing from the values of original data set with values less than 2 microgram and IgG1 was calculated by the formula of linear regression relationship with concentration of ADDF17 in question 1D \(IgG1=3+0.83*conc1+rnorm(60,sd=0.9)\)
And then we built the linear regression and calculated the proportion of p_value less than 0.05 in 1000 repeat in the first and second time.
The result showed that when repeating 1000 times the experiment in the first time, there is 70.7% of experiment having the p_value less 0.05. And when repeating 1000 times the experiment in the second time, there is 21% of experiment having the p_value less than 0.05