Example 1: Outlying Observations.
library(ggplot2)
library(plotly)
library(dplyr)
library(readr)
infectionRisk <- read_csv("ST301-infectionRisk.csv")
infectionRisk <- infectionRisk[,1:4]
pairs(infectionRisk[,-1])lm.infecRisk <- lm(InfctRsk ~ . , data=infectionRisk[,-1])
infMeasures <- data.frame(ID=infectionRisk[1:112,1],hatValues = hatvalues(lm.infecRisk),cookDist = cooks.distance(lm.infecRisk))
gpStayIR <- ggplot(infectionRisk, aes(x= InfctRsk, y= Stay))
gpStayIR <- gpStayIR + geom_point() +geom_text(aes(label=ID),hjust=-0.5, vjust=0.1) + theme_classic()
gpStayIRgpAgeIR <- ggplot(infectionRisk, aes(x= InfctRsk, y= Age))
gpAgeIR <- gpAgeIR + geom_point() +geom_text(aes(label=ID),hjust=-0.5, vjust=0.1) + theme_classic()
gpAgeIRplot_ly(x=infectionRisk$Stay, y=infectionRisk$InfctRsk, z=infectionRisk$Age, type="scatter3d", mode="markers") %>%
layout(scene = list(xaxis = list(title = 'Stay'),
yaxis = list(title = 'Infction Risk'),
zaxis = list(title = 'Age')))gpHatVal <- ggplot(infMeasures, aes(x= ID, y= hatValues , label=ID))
gpHatVal <- gpHatVal + geom_point() +geom_text(aes(label=ID),hjust=-0.5, vjust=0.1) + theme_classic()
gpHatVal <- gpHatVal + geom_hline(yintercept=2*mean(infMeasures$hatValues), linetype="dashed", color = "red")
gpHatValgpCookDist <- ggplot(infMeasures, aes(x= ID, y= cookDist , label=ID))
gpCookDist <- gpCookDist + geom_point() +geom_text(aes(label=ID),hjust=-0.5, vjust=0.1) + theme_classic()
gpCookDist#### After removing outlying observarions
infectionRisk_2 <- infectionRisk[c(-47,-112),1:4]
lm.infecRisk_2 <- lm(InfctRsk ~ . , data = infectionRisk_2[,-1])
summary(lm.infecRisk_2)##
## Call:
## lm(formula = InfctRsk ~ ., data = infectionRisk_2[, -1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.74426 -0.61422 0.07069 0.61723 2.71618
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.88189 1.45512 1.293 0.199
## Stay 0.42412 0.06669 6.360 5.09e-09 ***
## Age -0.03007 0.02555 -1.177 0.242
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.134 on 107 degrees of freedom
## Multiple R-squared: 0.2767, Adjusted R-squared: 0.2631
## F-statistic: 20.46 on 2 and 107 DF, p-value: 2.983e-08
Example 2:
We are going to select best fitted model for data on age (X) and plasma level of a polyamine (Y ).
Let’s put the data,
assig <- data.frame(x=c(0,0,1,1,2,2,3,3,4,4,5,5,6,6),
y=c(17.0,11.2,9.2,12.6,7.4,10.5,8.3,5.8,4.6,6.5,5.3,3.8,3.2,4.5))Note that our Assumptions are, 1.The Y values(e errors) are independent. 2.The Y values can be expressed as a linear f^n of the X variables. 3.Variation of obs^ns around the regression line(residual) is constant(homoscedasticity)
library(ggplot2)
gpassig <- ggplot(assig, aes(x=x, y=y)) + geom_point() + theme_bw()
gpassig <- gpassig + xlab("age") + ylab("plasma level of a polyamine")
gpassigNow create the Linear Model with the Residuals,
##
## Call:
## lm(formula = y ~ x, data = assig)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1232 -1.6464 0.3464 0.9317 4.1304
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.8696 0.9048 14.224 7.12e-09 ***
## x -1.6732 0.2509 -6.668 2.30e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.878 on 12 degrees of freedom
## Multiple R-squared: 0.7875, Adjusted R-squared: 0.7698
## F-statistic: 44.46 on 1 and 12 DF, p-value: 2.3e-05
Here we can see a clean pattern
But let’s consider some Transformations,
- Transformation-1
assigT1 <- data.frame(x = assig$x, y = sqrt(assig$y))
gpassigT1 <- ggplot(assigT1, aes(x=x, y=y)) + geom_point() + theme_bw()
gpassigT1 <- gpassigT1 + xlab("age") + ylab("(plasma level of a polyamine)^1/2")
gpassigT1##
## Call:
## lm(formula = y ~ x, data = assigT1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.31729 -0.28037 0.03671 0.20607 0.50781
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.61530 0.13697 26.395 5.36e-12 ***
## x -0.29656 0.03799 -7.807 4.83e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2843 on 12 degrees of freedom
## Multiple R-squared: 0.8355, Adjusted R-squared: 0.8218
## F-statistic: 60.94 on 1 and 12 DF, p-value: 4.826e-06
- Transformation-2
assigT2 <- data.frame(x = assig$x, y = 1/(assig$y))
gpassigT2 <- ggplot(assigT1, aes(x=x, y=y)) + geom_point() + theme_bw()
gpassigT2 <- gpassigT1 + xlab("age") + ylab("1/(plasma level of a polyamine)")
gpassigT2##
## Call:
## lm(formula = y ~ x, data = assigT2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.03789 -0.03319 0.00390 0.02368 0.05618
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.060432 0.015918 3.796 0.00255 **
## x 0.032647 0.004415 7.395 8.33e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03304 on 12 degrees of freedom
## Multiple R-squared: 0.82, Adjusted R-squared: 0.805
## F-statistic: 54.68 on 1 and 12 DF, p-value: 8.332e-06
- Transformation-3
assigT3 <- data.frame(x = assig$x, y = log10(assig$y))
gpassigT3 <- ggplot(assigT1, aes(x=x, y=y)) + geom_point() + theme_bw()
gpassigT3 <- gpassigT1 + xlab("age") + ylab("log10(plasma level of a polyamine)")
gpassigT3##
## Call:
## lm(formula = y ~ x, data = assigT3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.089394 -0.076368 0.001502 0.070914 0.099807
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.13064 0.04016 28.153 2.50e-12 ***
## x -0.09462 0.01114 -8.495 2.02e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08335 on 12 degrees of freedom
## Multiple R-squared: 0.8574, Adjusted R-squared: 0.8455
## F-statistic: 72.17 on 1 and 12 DF, p-value: 2.023e-06
Finally compare the summary(of linear models),
##
## Call:
## lm(formula = y ~ x, data = assig)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1232 -1.6464 0.3464 0.9317 4.1304
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.8696 0.9048 14.224 7.12e-09 ***
## x -1.6732 0.2509 -6.668 2.30e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.878 on 12 degrees of freedom
## Multiple R-squared: 0.7875, Adjusted R-squared: 0.7698
## F-statistic: 44.46 on 1 and 12 DF, p-value: 2.3e-05
##
## Call:
## lm(formula = y ~ x, data = assigT1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.31729 -0.28037 0.03671 0.20607 0.50781
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.61530 0.13697 26.395 5.36e-12 ***
## x -0.29656 0.03799 -7.807 4.83e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2843 on 12 degrees of freedom
## Multiple R-squared: 0.8355, Adjusted R-squared: 0.8218
## F-statistic: 60.94 on 1 and 12 DF, p-value: 4.826e-06
##
## Call:
## lm(formula = y ~ x, data = assigT2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.03789 -0.03319 0.00390 0.02368 0.05618
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.060432 0.015918 3.796 0.00255 **
## x 0.032647 0.004415 7.395 8.33e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03304 on 12 degrees of freedom
## Multiple R-squared: 0.82, Adjusted R-squared: 0.805
## F-statistic: 54.68 on 1 and 12 DF, p-value: 8.332e-06
##
## Call:
## lm(formula = y ~ x, data = assigT3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.089394 -0.076368 0.001502 0.070914 0.099807
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.13064 0.04016 28.153 2.50e-12 ***
## x -0.09462 0.01114 -8.495 2.02e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08335 on 12 degrees of freedom
## Multiple R-squared: 0.8574, Adjusted R-squared: 0.8455
## F-statistic: 72.17 on 1 and 12 DF, p-value: 2.023e-06
So to interpret the best result or best fitted model I’m going to consider about two statistics, 1. Comparing the R-Squred values 2. Comparing the p-value but we can also consider other statistics F-statistic, Coefficient and Resdual standard error as well.
Now let’s check the results,
## X R.squred p.value F.satatistic.1.12df RSE.with.12df
## 1 Mod 1 0.7875 2.30e-05 44.46 1.87800
## 2 Mod 2 (T1) 0.8355 4.83e-06 60.94 0.28430
## 3 Mod 3 (T2) 0.8200 8.33e-06 54.68 0.03304
## 4 Mod 4 (T3) 0.8574 2.02e-06 72.17 0.08335
From the above table we can clearly see highest R-squred value and lowest p-value has been taken by mod4 which is the transformtion 3. i.e. The best fitted model is Transformation model 3.
Another tricky simple way to do,
##
## Call:
## lm(formula = y ~ x, data = assig)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1232 -1.6464 0.3464 0.9317 4.1304
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.8696 0.9048 14.224 7.12e-09 ***
## x -1.6732 0.2509 -6.668 2.30e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.878 on 12 degrees of freedom
## Multiple R-squared: 0.7875, Adjusted R-squared: 0.7698
## F-statistic: 44.46 on 1 and 12 DF, p-value: 2.3e-05
Example 3:
# help(read.table)
dataset <- read.table(file = "BloodPressure.txt", header = TRUE, sep = "\t")
bloodpressure <- dataset[,c(2,3,4,5,6,7,8)]
attach(bloodpressure)
names(bloodpressure)## [1] "BP" "Age" "Weight" "BSA" "Dur" "Pulse" "Stress"
########################################
# Do Forward selection,
# then at iteration 4 do Backward elimination(drop1())
# then see for significance difference of two models(FM vs RM)
########################################
## 1.step backward,
full.model <- lm(BP ~ ., data = bloodpressure)
# help(step)
step(full.model , direction = "backward")## Start: AIC=-30.55
## BP ~ Age + Weight + BSA + Dur + Pulse + Stress
##
## Df Sum of Sq RSS AIC
## <none> 2.156 -30.551
## - Dur 1 0.330 2.486 -29.698
## - Stress 1 0.442 2.598 -28.820
## - Pulse 1 0.444 2.600 -28.802
## - BSA 1 0.947 3.103 -25.267
## - Age 1 33.331 35.486 23.468
## - Weight 1 39.172 41.328 26.516
##
## Call:
## lm(formula = BP ~ Age + Weight + BSA + Dur + Pulse + Stress,
## data = bloodpressure)
##
## Coefficients:
## (Intercept) Age Weight BSA Dur Pulse
## -12.870476 0.703259 0.969920 3.776491 0.068383 -0.084485
## Stress
## 0.005572
## 2.step forward,
step(lm(BP ~ 1, data = bloodpressure), direction = "forward",
scope = ~Age+Weight+BSA+Dur+Pulse+Stress)## Start: AIC=68.64
## BP ~ 1
##
## Df Sum of Sq RSS AIC
## + Weight 1 505.47 54.53 24.060
## + BSA 1 419.86 140.14 42.938
## + Pulse 1 291.44 268.56 55.946
## + Age 1 243.27 316.73 59.247
## <none> 560.00 68.644
## + Dur 1 48.02 511.98 68.851
## + Stress 1 15.04 544.96 70.099
##
## Step: AIC=24.06
## BP ~ Weight
##
## Df Sum of Sq RSS AIC
## + Age 1 49.704 4.824 -22.443
## + Stress 1 9.660 44.868 22.160
## + Pulse 1 8.940 45.588 22.478
## + Dur 1 6.095 48.433 23.689
## <none> 54.528 24.060
## + BSA 1 2.814 51.714 25.000
##
## Step: AIC=-22.44
## BP ~ Weight + Age
##
## Df Sum of Sq RSS AIC
## + BSA 1 1.76778 3.0561 -29.572
## + Pulse 1 0.49557 4.3284 -22.611
## <none> 4.8239 -22.443
## + Dur 1 0.17835 4.6456 -21.196
## + Stress 1 0.16286 4.6611 -21.130
##
## Step: AIC=-29.57
## BP ~ Weight + Age + BSA
##
## Df Sum of Sq RSS AIC
## + Dur 1 0.33510 2.7210 -29.894
## <none> 3.0561 -29.572
## + Stress 1 0.21774 2.8384 -29.050
## + Pulse 1 0.04111 3.0150 -27.842
##
## Step: AIC=-29.89
## BP ~ Weight + Age + BSA + Dur
##
## Df Sum of Sq RSS AIC
## <none> 2.7210 -29.894
## + Pulse 1 0.12307 2.5980 -28.820
## + Stress 1 0.12077 2.6003 -28.802
##
## Call:
## lm(formula = BP ~ Weight + Age + BSA + Dur, data = bloodpressure)
##
## Coefficients:
## (Intercept) Weight Age BSA Dur
## -12.85206 0.89701 0.68335 4.86037 0.06653
## Start: AIC=-30.55
## BP ~ Age + Weight + BSA + Dur + Pulse + Stress
##
## Df Sum of Sq RSS AIC
## <none> 2.156 -30.551
## - Dur 1 0.330 2.486 -29.698
## - Stress 1 0.442 2.598 -28.820
## - Pulse 1 0.444 2.600 -28.802
## - BSA 1 0.947 3.103 -25.267
## - Age 1 33.331 35.486 23.468
## - Weight 1 39.172 41.328 26.516
##
## Call:
## lm(formula = BP ~ Age + Weight + BSA + Dur + Pulse + Stress,
## data = bloodpressure)
##
## Coefficients:
## (Intercept) Age Weight BSA Dur Pulse
## -12.870476 0.703259 0.969920 3.776491 0.068383 -0.084485
## Stress
## 0.005572
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## The following object is masked from 'package:plotly':
##
## select
## Start: AIC=-30.55
## BP ~ Age + Weight + BSA + Dur + Pulse + Stress
##
## Df Sum of Sq RSS AIC
## <none> 2.156 -30.551
## - Dur 1 0.330 2.486 -29.698
## - Stress 1 0.442 2.598 -28.820
## - Pulse 1 0.444 2.600 -28.802
## - BSA 1 0.947 3.103 -25.267
## - Age 1 33.331 35.486 23.468
## - Weight 1 39.172 41.328 26.516
##
## Call:
## lm(formula = BP ~ Age + Weight + BSA + Dur + Pulse + Stress,
## data = bloodpressure)
##
## Coefficients:
## (Intercept) Age Weight BSA Dur Pulse
## -12.870476 0.703259 0.969920 3.776491 0.068383 -0.084485
## Stress
## 0.005572
Example 4:
library(ggplot2)
library(plotly)
# we analasis our rediused model(RM)
# RM(BP) = b0 + b1*Weight + b2*Age +b3*BSA
## wee need data set with idNO.
dataset <- read.table(file = "BloodPressure.txt", header = TRUE, sep = "\t")
dataset## PtID BP Age Weight BSA Dur Pulse Stress
## 1 1 105 47 85.4 1.75 5.1 63 33
## 2 2 115 49 94.2 2.10 3.8 70 14
## 3 3 116 49 95.3 1.98 8.2 72 10
## 4 4 117 50 94.7 2.01 5.8 73 99
## 5 5 112 51 89.4 1.89 7.0 72 95
## 6 6 121 48 99.5 2.25 9.3 71 10
## 7 7 121 49 99.8 2.25 2.5 69 42
## 8 8 110 47 90.9 1.90 6.2 66 8
## 9 9 110 49 89.2 1.83 7.1 69 62
## 10 10 114 48 92.7 2.07 5.6 64 35
## 11 11 114 47 94.4 2.07 5.3 74 90
## 12 12 115 49 94.1 1.98 5.6 71 21
## 13 13 114 50 91.6 2.05 10.2 68 47
## 14 14 106 45 87.1 1.92 5.6 67 80
## 15 15 125 52 101.3 2.19 10.0 76 98
## 16 16 114 46 94.5 1.98 7.4 69 95
## 17 17 106 46 87.0 1.87 3.6 62 18
## 18 18 113 46 94.5 1.90 4.3 70 12
## 19 19 110 48 90.5 1.88 9.0 71 99
## 20 20 122 56 95.7 2.09 7.0 75 99
## The following objects are masked from bloodpressure (pos = 4):
##
## Age, BP, BSA, Weight
## [1] "PtID" "BP" "Age" "Weight" "BSA"
## PtID BP Age Weight BSA
## 1 1 105 47 85.4 1.75
## 2 2 115 49 94.2 2.10
## 3 3 116 49 95.3 1.98
## 4 4 117 50 94.7 2.01
## 5 5 112 51 89.4 1.89
## 6 6 121 48 99.5 2.25
## 7 7 121 49 99.8 2.25
## 8 8 110 47 90.9 1.90
## 9 9 110 49 89.2 1.83
## 10 10 114 48 92.7 2.07
## 11 11 114 47 94.4 2.07
## 12 12 115 49 94.1 1.98
## 13 13 114 50 91.6 2.05
## 14 14 106 45 87.1 1.92
## 15 15 125 52 101.3 2.19
## 16 16 114 46 94.5 1.98
## 17 17 106 46 87.0 1.87
## 18 18 113 46 94.5 1.90
## 19 19 110 48 90.5 1.88
## 20 20 122 56 95.7 2.09
## BP Age Weight BSA
## 1 105 47 85.4 1.75
## 2 115 49 94.2 2.10
## 3 116 49 95.3 1.98
## 4 117 50 94.7 2.01
## 5 112 51 89.4 1.89
## 6 121 48 99.5 2.25
## 7 121 49 99.8 2.25
## 8 110 47 90.9 1.90
## 9 110 49 89.2 1.83
## 10 114 48 92.7 2.07
## 11 114 47 94.4 2.07
## 12 115 49 94.1 1.98
## 13 114 50 91.6 2.05
## 14 106 45 87.1 1.92
## 15 125 52 101.3 2.19
## 16 114 46 94.5 1.98
## 17 106 46 87.0 1.87
## 18 113 46 94.5 1.90
## 19 110 48 90.5 1.88
## 20 122 56 95.7 2.09
## 1 2 3 4 5 6 7
## 0.23358742 0.11464284 0.14024962 0.08206011 0.19832771 0.28393997 0.24727290
## 8 9 10 11 12 13 14
## 0.09076553 0.15693197 0.13763868 0.10950828 0.07916255 0.18751614 0.30224526
## 15 16 17 18 19 20
## 0.28152669 0.17661546 0.19174974 0.35688026 0.09474927 0.53462960
## 1 2 3 4 5 6
## 2.897873e-02 1.100156e-01 9.856423e-03 3.235619e-02 1.012834e-02 1.458255e-01
## 7 8 9 10 11 12
## 1.573275e-01 2.784216e-02 1.202892e-04 4.712645e-02 2.851731e-02 1.539220e-03
## 13 14 15 16 17 18
## 5.976127e-03 7.942169e-02 5.971442e-02 1.354558e-01 1.725834e-03 2.668880e-07
## 19 20
## 7.140561e-02 1.058192e-03
infbdMes <- data.frame(PtID=bloodpressure[1:20,1], hatVal=hatvalues(lm.RM), cookdist=cooks.distance(lm.RM))
infbdMes## PtID hatVal cookdist
## 1 1 0.23358742 2.897873e-02
## 2 2 0.11464284 1.100156e-01
## 3 3 0.14024962 9.856423e-03
## 4 4 0.08206011 3.235619e-02
## 5 5 0.19832771 1.012834e-02
## 6 6 0.28393997 1.458255e-01
## 7 7 0.24727290 1.573275e-01
## 8 8 0.09076553 2.784216e-02
## 9 9 0.15693197 1.202892e-04
## 10 10 0.13763868 4.712645e-02
## 11 11 0.10950828 2.851731e-02
## 12 12 0.07916255 1.539220e-03
## 13 13 0.18751614 5.976127e-03
## 14 14 0.30224526 7.942169e-02
## 15 15 0.28152669 5.971442e-02
## 16 16 0.17661546 1.354558e-01
## 17 17 0.19174974 1.725834e-03
## 18 18 0.35688026 2.668880e-07
## 19 19 0.09474927 7.140561e-02
## 20 20 0.53462960 1.058192e-03
## for age,
gpAge <- ggplot(bloodpressure, aes(x = BP, y = Age))
gpAge <- gpAge + geom_point() +geom_text(aes(label=PtID),hjust=-0.5, vjust=0.1) + theme_classic()
gpAge## for weight,
gpWeight <- ggplot(bloodpressure, aes(x = BP, y = Weight))
gpWeight <- gpWeight + geom_point() +geom_text(aes(label=PtID),hjust=-0.5, vjust=0.1) + theme_classic()
gpWeight## for bsa,
gpBSA <- ggplot(bloodpressure, aes(x = BP, y = BSA))
gpBSA <- gpBSA + geom_point() +geom_text(aes(label=PtID),hjust=-0.5, vjust=0.1) + theme_classic()
gpBSA## now the 3D build part,
## but we cannt buid a 3D model because this has 4 variable,
gpbdHatval <- ggplot(infbdMes, aes(x= PtID, y= hatVal , label=PtID))
gpbdHatval <- gpbdHatval + geom_point() +geom_text(aes(label=PtID),hjust=-0.5, vjust=0.1) + theme_classic()
gpbdHatval <- gpbdHatval + geom_hline(yintercept=2*mean(infbdMes$hatVal), linetype="dashed", color = "red")
gpbdHatvalgpbdCookdis <- ggplot(infbdMes, aes(x= PtID, y= cookdist , label=PtID))
gpbdCookdis <- gpbdCookdis + geom_point() +geom_text(aes(label=PtID),hjust=-0.5, vjust=0.1) + theme_classic()
gpbdCookdis# 2,6,7,14,15,16,19
## After removing influential points/ Outlyers,
after_data <- bloodpressure[c(-2,-6,-7,-14,-15,-16,-19), 1:5]
after_data## PtID BP Age Weight BSA
## 1 1 105 47 85.4 1.75
## 3 3 116 49 95.3 1.98
## 4 4 117 50 94.7 2.01
## 5 5 112 51 89.4 1.89
## 8 8 110 47 90.9 1.90
## 9 9 110 49 89.2 1.83
## 10 10 114 48 92.7 2.07
## 11 11 114 47 94.4 2.07
## 12 12 115 49 94.1 1.98
## 13 13 114 50 91.6 2.05
## 17 17 106 46 87.0 1.87
## 18 18 113 46 94.5 1.90
## 20 20 122 56 95.7 2.09
##
## Call:
## lm(formula = BP ~ ., data = after_data[, -1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41242 -0.13224 -0.03031 0.16507 0.50658
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -13.18019 2.64098 -4.991 0.000748 ***
## Age 0.73160 0.03716 19.686 1.04e-08 ***
## Weight 0.87407 0.04300 20.328 7.86e-09 ***
## BSA 5.13414 1.42006 3.615 0.005612 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3052 on 9 degrees of freedom
## Multiple R-squared: 0.9966, Adjusted R-squared: 0.9954
## F-statistic: 873.3 on 3 and 9 DF, p-value: 2.076e-11