If you want hire me as a freelancer please use the following links.
data4 <- read.csv("Data4.csv",header = T,sep = ",")
data5 <- read.csv("Data5.csv",header = T,sep = ",")
dataMerge <- read.csv("DataMerge.csv",header = T,sep = ",")
dataMerge <- dataMerge[complete.cases(dataMerge),]data <- dataMerge[1:4504,]
data <- cbind(data,data4[,c(4:9)])
data <- cbind(data,data5[,c(3:4)])
head(data)## X MaskID Visit sulfonylurea meglitinide nphl_insulin reg_insulin la_insulin
## 1 1 100001 BLR 0 0 1 0 1
## 2 2 100001 F12 0 0 1 0 0
## 3 3 100001 F24 0 0 1 0 0
## 4 4 100001 F36 0 0 1 0 1
## 5 5 100001 F48 0 0 1 0 1
## 6 6 100001 F60 0 0 1 0 1
## othbol_insulin premix_insulin hba1c fpg female baseline_age Race edu BMI
## 1 1 0 8.7 96 0 60.8 3 4 35.91218
## 2 1 0 7.7 163 1 55.0 3 1 35.12173
## 3 1 0 7.0 95 1 72.5 3 4 32.02074
## 4 0 0 7.3 215 0 47.1 3 3 35.08068
## 5 0 0 8.0 99 1 56.7 3 2 41.93726
## 6 0 0 8.2 150 0 60.0 3 3 30.62379
## yrsdiab primary primary2
## 1 26 0 0
## 2 14 0 0
## 3 9 0 0
## 4 6 0 0
## 5 5 0 0
## 6 11 0 0
summary(data)## X MaskID Visit sulfonylurea
## Min. : 1 Min. :100001 Length:4504 Min. :0.0000
## 1st Qu.:1277 1st Qu.:100398 Class :character 1st Qu.:0.0000
## Median :2606 Median :100895 Mode :character Median :0.0000
## Mean :2602 Mean :100873 Mean :0.3153
## 3rd Qu.:3913 3rd Qu.:101319 3rd Qu.:1.0000
## Max. :5231 Max. :101773 Max. :1.0000
##
## meglitinide nphl_insulin reg_insulin la_insulin
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:1.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :1.0000 Median :0.00000 Median :1.0000
## Mean :0.1139 Mean :0.8302 Mean :0.08059 Mean :0.5653
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
##
## othbol_insulin premix_insulin hba1c fpg
## Min. :0.00000 Min. :0.0000 Min. : 4.300 Min. : 30.0
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.: 6.600 1st Qu.:103.0
## Median :0.00000 Median :0.0000 Median : 7.300 Median :136.0
## Mean :0.01976 Mean :0.4338 Mean : 7.441 Mean :144.6
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.: 8.100 3rd Qu.:175.0
## Max. :1.00000 Max. :1.0000 Max. :15.100 Max. :726.0
##
## female baseline_age Race edu
## Min. :0.0000 Min. :44.40 Min. :1.00 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:57.50 1st Qu.:3.00 1st Qu.:2.000
## Median :0.0000 Median :61.80 Median :3.00 Median :3.000
## Mean :0.3921 Mean :62.52 Mean :2.68 Mean :2.719
## 3rd Qu.:1.0000 3rd Qu.:66.83 3rd Qu.:3.00 3rd Qu.:4.000
## Max. :1.0000 Max. :79.30 Max. :4.00 Max. :4.000
## NA's :3
## BMI yrsdiab primary primary2
## Min. :18.41 Min. : 0.00 Min. : 0.0000 Min. :0.00000
## 1st Qu.:28.63 1st Qu.: 7.00 1st Qu.: 0.0000 1st Qu.:0.00000
## Median :32.33 Median :12.00 Median : 0.0000 Median :0.00000
## Mean :32.72 Mean :13.19 Mean : 0.1232 Mean :0.08837
## 3rd Qu.:36.65 3rd Qu.:18.00 3rd Qu.: 0.0000 3rd Qu.:0.00000
## Max. :48.50 Max. :35.00 Max. :14.0000 Max. :1.00000
## NA's :3 NA's :46
#Correlation Matrix
res <- cor(data[,c(4:20)])
round(res, 2)## sulfonylurea meglitinide nphl_insulin reg_insulin la_insulin
## sulfonylurea 1.00 0.00 -0.28 -0.14 -0.24
## meglitinide 0.00 1.00 0.03 -0.08 -0.16
## nphl_insulin -0.28 0.03 1.00 0.13 0.47
## reg_insulin -0.14 -0.08 0.13 1.00 -0.30
## la_insulin -0.24 -0.16 0.47 -0.30 1.00
## othbol_insulin -0.06 -0.05 0.05 0.08 -0.08
## premix_insulin -0.19 0.00 0.17 0.03 0.10
## hba1c 0.05 -0.11 -0.14 0.13 -0.15
## fpg -0.01 -0.15 -0.13 0.12 -0.07
## female 0.00 -0.02 0.01 0.02 0.00
## baseline_age 0.00 0.00 -0.01 0.01 -0.02
## Race -0.01 0.01 0.01 0.01 0.00
## edu NA NA NA NA NA
## BMI NA NA NA NA NA
## yrsdiab NA NA NA NA NA
## primary 0.02 -0.02 -0.01 0.00 -0.01
## primary2 -0.01 -0.02 0.01 0.00 0.00
## othbol_insulin premix_insulin hba1c fpg female baseline_age
## sulfonylurea -0.06 -0.19 0.05 -0.01 0.00 0.00
## meglitinide -0.05 0.00 -0.11 -0.15 -0.02 0.00
## nphl_insulin 0.05 0.17 -0.14 -0.13 0.01 -0.01
## reg_insulin 0.08 0.03 0.13 0.12 0.02 0.01
## la_insulin -0.08 0.10 -0.15 -0.07 0.00 -0.02
## othbol_insulin 1.00 -0.11 -0.02 0.02 0.03 0.00
## premix_insulin -0.11 1.00 0.04 0.00 0.01 -0.01
## hba1c -0.02 0.04 1.00 0.53 -0.03 0.02
## fpg 0.02 0.00 0.53 1.00 -0.03 -0.01
## female 0.03 0.01 -0.03 -0.03 1.00 -0.03
## baseline_age 0.00 -0.01 0.02 -0.01 -0.03 1.00
## Race -0.01 0.00 0.00 0.01 -0.12 0.04
## edu NA NA NA NA NA NA
## BMI NA NA NA NA NA NA
## yrsdiab NA NA NA NA NA NA
## primary -0.02 0.03 0.00 0.00 0.01 0.05
## primary2 -0.01 0.04 0.00 0.00 0.02 0.07
## Race edu BMI yrsdiab primary primary2
## sulfonylurea -0.01 NA NA NA 0.02 -0.01
## meglitinide 0.01 NA NA NA -0.02 -0.02
## nphl_insulin 0.01 NA NA NA -0.01 0.01
## reg_insulin 0.01 NA NA NA 0.00 0.00
## la_insulin 0.00 NA NA NA -0.01 0.00
## othbol_insulin -0.01 NA NA NA -0.02 -0.01
## premix_insulin 0.00 NA NA NA 0.03 0.04
## hba1c 0.00 NA NA NA 0.00 0.00
## fpg 0.01 NA NA NA 0.00 0.00
## female -0.12 NA NA NA 0.01 0.02
## baseline_age 0.04 NA NA NA 0.05 0.07
## Race 1.00 NA NA NA -0.09 -0.06
## edu NA 1 NA NA NA NA
## BMI NA NA 1 NA NA NA
## yrsdiab NA NA NA 1 NA NA
## primary -0.09 NA NA NA 1.00 0.80
## primary2 -0.06 NA NA NA 0.80 1.00
#Correlation Plot
corrplot(res, type = "upper",
tl.col = "black", tl.srt = 45)#Correlation chart
chart.Correlation(data[,c(4:20)], histogram=TRUE, pch=19)Regression using panel data may mitigate omitted variable bias when there is no information on variables that correlate with both the regressors of interest and the independent variable and if these variables are constant in the time dimension or across entities. Provided that panel data is available panel regression methods may improve upon multiple regression models which produce results that are not internally valid in such a setting. So, here we have applied build panel modesl on the given dataset. For the setting we have all other as independent variables, while, primary and parimary2 as dependent variables.
We have used plm(), a convenient R function that enables us to estimate linear panel regression models which comes with the package plm (Croissant, Millo, and Tappe 2020). Usage of plm() is very similar as for the function lm() which have been used by the majority of the developer in the literature. Sometimes panel data is also called longitudinal data as it adds a temporal dimension to cross-sectional data. Let us have a look at the dataset by checking its structure and listing the first few observations.
str(data)## 'data.frame': 4504 obs. of 20 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ MaskID : int 100001 100001 100001 100001 100001 100001 100001 100001 100001 100002 ...
## $ Visit : chr "BLR" "F12" "F24" "F36" ...
## $ sulfonylurea : int 0 0 0 0 0 0 0 0 0 0 ...
## $ meglitinide : int 0 0 0 0 0 0 0 0 0 0 ...
## $ nphl_insulin : int 1 1 1 1 1 1 1 1 1 1 ...
## $ reg_insulin : int 0 0 0 0 0 0 0 0 0 1 ...
## $ la_insulin : int 1 0 0 1 1 1 1 1 1 0 ...
## $ othbol_insulin: int 1 1 1 0 0 0 0 0 0 1 ...
## $ premix_insulin: int 0 0 0 0 0 0 0 0 0 0 ...
## $ hba1c : num 8.7 7.7 7 7.3 8 8.2 7.6 7 7.4 8.2 ...
## $ fpg : num 96 163 95 215 99 150 88 79 198 109 ...
## $ female : int 0 1 1 0 1 0 0 0 1 0 ...
## $ baseline_age : num 60.8 55 72.5 47.1 56.7 60 55 70.4 52.7 69.7 ...
## $ Race : int 3 3 3 3 3 3 3 3 3 1 ...
## $ edu : int 4 1 4 3 2 3 4 4 4 4 ...
## $ BMI : num 35.9 35.1 32 35.1 41.9 ...
## $ yrsdiab : int 26 14 9 6 5 11 6 22 14 19 ...
## $ primary : int 0 0 0 0 0 0 0 0 0 0 ...
## $ primary2 : int 0 0 0 0 0 0 0 0 0 0 ...
We find that the dataset consists of 4504 observations on 20 variables. Since all variables are observed for all entities and over all time periods, the panel is balanced. If there were missing data for at least one entity in at least one time period we would call the panel unbalanced. As in the preprocessing step we have made the data balanced by adding picking the complete cases. Now, the data is in the shape that is suitable for the regression analysis.
#Model 1: with primary as dependent
mod1 <- plm(primary2 ~ Visit + sulfonylurea + meglitinide + nphl_insulin + reg_insulin + othbol_insulin + premix_insulin + female + baseline_age + BMI + yrsdiab + fpg + la_insulin + hba1c + Race + edu, data = data, index = c("Visit"), model = "pooling")
summary(mod1)## Pooling Model
##
## Call:
## plm(formula = primary2 ~ Visit + sulfonylurea + meglitinide +
## nphl_insulin + reg_insulin + othbol_insulin + premix_insulin +
## female + baseline_age + BMI + yrsdiab + fpg + la_insulin +
## hba1c + Race + edu, data = data, model = "pooling", index = c("Visit"))
##
## Unbalanced Panel: n = 9, T = 85-721, N = 4452
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.252426 -0.111235 -0.078343 -0.045524 0.988935
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 1.0008e-01 6.3689e-02 1.5713 0.1161818
## VisitEXIT 3.3455e-03 1.7486e-02 0.1913 0.8482839
## VisitF12 -1.6616e-02 1.6847e-02 -0.9863 0.3240495
## VisitF24 -5.7540e-03 1.7333e-02 -0.3320 0.7399304
## VisitF36 -1.0745e-02 1.7729e-02 -0.6060 0.5445150
## VisitF48 1.7519e-02 1.8587e-02 0.9426 0.3459545
## VisitF60 -5.4969e-03 2.0219e-02 -0.2719 0.7857395
## VisitF72 -4.9481e-02 3.0310e-02 -1.6325 0.1026481
## VisitF84 8.7122e-03 3.3741e-02 0.2582 0.7962562
## sulfonylurea -4.6230e-03 9.8537e-03 -0.4692 0.6389769
## meglitinide -1.5122e-02 1.4017e-02 -1.0788 0.2807225
## nphl_insulin 1.1504e-02 1.4628e-02 0.7865 0.4316455
## reg_insulin -1.1808e-02 1.8237e-02 -0.6475 0.5173687
## othbol_insulin -1.6582e-02 3.1200e-02 -0.5315 0.5951116
## premix_insulin 1.4394e-02 9.7494e-03 1.4764 0.1399062
## female 9.6322e-03 8.8559e-03 1.0877 0.2768062
## baseline_age 1.9413e-03 6.5812e-04 2.9498 0.0031964 **
## BMI -2.8714e-03 7.9591e-04 -3.6076 0.0003124 ***
## yrsdiab 3.0081e-03 5.5214e-04 5.4481 5.367e-08 ***
## fpg 7.5541e-06 8.8405e-05 0.0854 0.9319088
## la_insulin -1.2663e-02 1.1393e-02 -1.1115 0.2664205
## hba1c -5.9730e-04 4.4615e-03 -0.1339 0.8935046
## Race -1.8397e-02 4.8797e-03 -3.7701 0.0001653 ***
## edu -1.1166e-02 4.3001e-03 -2.5966 0.0094468 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 358.31
## Residual Sum of Squares: 349.39
## R-Squared: 0.024879
## Adj. R-Squared: 0.019814
## F-statistic: 4.91198 on 23 and 4428 DF, p-value: 1.2011e-13
yhat <- mod1$df.residual
ggplot(data, aes(x = meglitinide + nphl_insulin + reg_insulin + othbol_insulin + premix_insulin + female + baseline_age + BMI + yrsdiab + fpg + la_insulin + hba1c + Race + edu, y = primary2))+
geom_point() +
geom_smooth(method=lm)## `geom_smooth()` using formula 'y ~ x'
kable(tidy(mod1), digits=3, caption="Pooled model")| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| (Intercept) | 0.100 | 0.064 | 1.571 | 0.116 |
| VisitEXIT | 0.003 | 0.017 | 0.191 | 0.848 |
| VisitF12 | -0.017 | 0.017 | -0.986 | 0.324 |
| VisitF24 | -0.006 | 0.017 | -0.332 | 0.740 |
| VisitF36 | -0.011 | 0.018 | -0.606 | 0.545 |
| VisitF48 | 0.018 | 0.019 | 0.943 | 0.346 |
| VisitF60 | -0.005 | 0.020 | -0.272 | 0.786 |
| VisitF72 | -0.049 | 0.030 | -1.632 | 0.103 |
| VisitF84 | 0.009 | 0.034 | 0.258 | 0.796 |
| sulfonylurea | -0.005 | 0.010 | -0.469 | 0.639 |
| meglitinide | -0.015 | 0.014 | -1.079 | 0.281 |
| nphl_insulin | 0.012 | 0.015 | 0.786 | 0.432 |
| reg_insulin | -0.012 | 0.018 | -0.647 | 0.517 |
| othbol_insulin | -0.017 | 0.031 | -0.531 | 0.595 |
| premix_insulin | 0.014 | 0.010 | 1.476 | 0.140 |
| female | 0.010 | 0.009 | 1.088 | 0.277 |
| baseline_age | 0.002 | 0.001 | 2.950 | 0.003 |
| BMI | -0.003 | 0.001 | -3.608 | 0.000 |
| yrsdiab | 0.003 | 0.001 | 5.448 | 0.000 |
| fpg | 0.000 | 0.000 | 0.085 | 0.932 |
| la_insulin | -0.013 | 0.011 | -1.111 | 0.266 |
| hba1c | -0.001 | 0.004 | -0.134 | 0.894 |
| Race | -0.018 | 0.005 | -3.770 | 0.000 |
| edu | -0.011 | 0.004 | -2.597 | 0.009 |
#Model 2: Fixed Effect Model with primary
mod2 <- plm(primary ~ Visit + sulfonylurea + meglitinide + nphl_insulin + reg_insulin + othbol_insulin + premix_insulin + female + baseline_age + BMI + yrsdiab + fpg + la_insulin + hba1c + Race + edu,
data = data, index = c("Visit"), model = "within")
summary(mod2)## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = primary ~ Visit + sulfonylurea + meglitinide +
## nphl_insulin + reg_insulin + othbol_insulin + premix_insulin +
## female + baseline_age + BMI + yrsdiab + fpg + la_insulin +
## hba1c + Race + edu, data = data, model = "within", index = c("Visit"))
##
## Unbalanced Panel: n = 9, T = 85-721, N = 4452
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.366197 -0.161183 -0.103620 -0.052078 13.708217
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## sulfonylurea 0.02194554 0.01712442 1.2815 0.2000729
## meglitinide -0.03868340 0.02436005 -1.5880 0.1123611
## nphl_insulin 0.00171108 0.02542100 0.0673 0.9463381
## reg_insulin -0.01375097 0.03169416 -0.4339 0.6644080
## othbol_insulin -0.06013299 0.05422176 -1.1090 0.2674821
## premix_insulin 0.03426828 0.01694327 2.0225 0.0431816 *
## female 0.00023663 0.01539050 0.0154 0.9877336
## baseline_age 0.00202976 0.00114372 1.7747 0.0760175 .
## BMI -0.00461218 0.00138320 -3.3344 0.0008618 ***
## yrsdiab 0.00503180 0.00095956 5.2439 1.646e-07 ***
## fpg 0.00010721 0.00015364 0.6978 0.4853225
## la_insulin -0.01996153 0.01979985 -1.0082 0.3134302
## hba1c -0.00621048 0.00775348 -0.8010 0.4231789
## Race -0.04809614 0.00848035 -5.6715 1.506e-08 ***
## edu -0.00781413 0.00747301 -1.0456 0.2957817
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1079.7
## Residual Sum of Squares: 1055.2
## R-Squared: 0.022634
## Adj. R-Squared: 0.017557
## F-statistic: 6.8362 on 15 and 4428 DF, p-value: 6.7098e-15
yhat <- mod2$df.residual
ggplot(data, aes(x = meglitinide + nphl_insulin + reg_insulin + othbol_insulin + premix_insulin + female + baseline_age + BMI + yrsdiab + fpg + la_insulin + hba1c + Race + edu, y = primary))+
geom_point() +
geom_smooth(method=lm)## `geom_smooth()` using formula 'y ~ x'
kable(tidy(mod2), digits=3, caption="Fixed Effect Model")| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| sulfonylurea | 0.022 | 0.017 | 1.282 | 0.200 |
| meglitinide | -0.039 | 0.024 | -1.588 | 0.112 |
| nphl_insulin | 0.002 | 0.025 | 0.067 | 0.946 |
| reg_insulin | -0.014 | 0.032 | -0.434 | 0.664 |
| othbol_insulin | -0.060 | 0.054 | -1.109 | 0.267 |
| premix_insulin | 0.034 | 0.017 | 2.023 | 0.043 |
| female | 0.000 | 0.015 | 0.015 | 0.988 |
| baseline_age | 0.002 | 0.001 | 1.775 | 0.076 |
| BMI | -0.005 | 0.001 | -3.334 | 0.001 |
| yrsdiab | 0.005 | 0.001 | 5.244 | 0.000 |
| fpg | 0.000 | 0.000 | 0.698 | 0.485 |
| la_insulin | -0.020 | 0.020 | -1.008 | 0.313 |
| hba1c | -0.006 | 0.008 | -0.801 | 0.423 |
| Race | -0.048 | 0.008 | -5.671 | 0.000 |
| edu | -0.008 | 0.007 | -1.046 | 0.296 |
#Model 3: with primary as dependent
mod3 <- plm(primary ~ Visit + sulfonylurea + meglitinide + nphl_insulin + reg_insulin + othbol_insulin + premix_insulin + female + baseline_age + BMI + yrsdiab + fpg + la_insulin + hba1c + Race + edu, data = data, index = c("Visit"), model = "pooling")
summary(mod3)## Pooling Model
##
## Call:
## plm(formula = primary ~ Visit + sulfonylurea + meglitinide +
## nphl_insulin + reg_insulin + othbol_insulin + premix_insulin +
## female + baseline_age + BMI + yrsdiab + fpg + la_insulin +
## hba1c + Race + edu, data = data, model = "pooling", index = c("Visit"))
##
## Unbalanced Panel: n = 9, T = 85-721, N = 4452
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.366197 -0.161183 -0.103620 -0.052078 13.708217
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 0.25088949 0.11068390 2.2667 0.0234553 *
## VisitEXIT 0.00915279 0.03038929 0.3012 0.7632878
## VisitF12 0.02373331 0.02927743 0.8106 0.4176188
## VisitF24 -0.00136914 0.03012293 -0.0455 0.9637494
## VisitF36 -0.00019669 0.03081115 -0.0064 0.9949068
## VisitF48 0.01893606 0.03230162 0.5862 0.5577534
## VisitF60 -0.00101221 0.03513882 -0.0288 0.9770206
## VisitF72 -0.06147710 0.05267495 -1.1671 0.2432316
## VisitF84 0.00626220 0.05863717 0.1068 0.9149559
## sulfonylurea 0.02194554 0.01712442 1.2815 0.2000729
## meglitinide -0.03868340 0.02436005 -1.5880 0.1123611
## nphl_insulin 0.00171108 0.02542100 0.0673 0.9463381
## reg_insulin -0.01375097 0.03169416 -0.4339 0.6644080
## othbol_insulin -0.06013299 0.05422176 -1.1090 0.2674821
## premix_insulin 0.03426828 0.01694327 2.0225 0.0431816 *
## female 0.00023663 0.01539050 0.0154 0.9877336
## baseline_age 0.00202976 0.00114372 1.7747 0.0760175 .
## BMI -0.00461218 0.00138320 -3.3344 0.0008618 ***
## yrsdiab 0.00503180 0.00095956 5.2439 1.646e-07 ***
## fpg 0.00010721 0.00015364 0.6978 0.4853225
## la_insulin -0.01996153 0.01979985 -1.0082 0.3134302
## hba1c -0.00621048 0.00775348 -0.8010 0.4231789
## Race -0.04809614 0.00848035 -5.6715 1.506e-08 ***
## edu -0.00781413 0.00747301 -1.0456 0.2957817
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1080.5
## Residual Sum of Squares: 1055.2
## R-Squared: 0.023421
## Adj. R-Squared: 0.018348
## F-statistic: 4.61717 on 23 and 4428 DF, p-value: 1.766e-12
yhat <- mod3$df.residual
yhat## [1] 4428
kable(tidy(mod3), digits=3, caption="Pooled model")| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| (Intercept) | 0.251 | 0.111 | 2.267 | 0.023 |
| VisitEXIT | 0.009 | 0.030 | 0.301 | 0.763 |
| VisitF12 | 0.024 | 0.029 | 0.811 | 0.418 |
| VisitF24 | -0.001 | 0.030 | -0.045 | 0.964 |
| VisitF36 | 0.000 | 0.031 | -0.006 | 0.995 |
| VisitF48 | 0.019 | 0.032 | 0.586 | 0.558 |
| VisitF60 | -0.001 | 0.035 | -0.029 | 0.977 |
| VisitF72 | -0.061 | 0.053 | -1.167 | 0.243 |
| VisitF84 | 0.006 | 0.059 | 0.107 | 0.915 |
| sulfonylurea | 0.022 | 0.017 | 1.282 | 0.200 |
| meglitinide | -0.039 | 0.024 | -1.588 | 0.112 |
| nphl_insulin | 0.002 | 0.025 | 0.067 | 0.946 |
| reg_insulin | -0.014 | 0.032 | -0.434 | 0.664 |
| othbol_insulin | -0.060 | 0.054 | -1.109 | 0.267 |
| premix_insulin | 0.034 | 0.017 | 2.023 | 0.043 |
| female | 0.000 | 0.015 | 0.015 | 0.988 |
| baseline_age | 0.002 | 0.001 | 1.775 | 0.076 |
| BMI | -0.005 | 0.001 | -3.334 | 0.001 |
| yrsdiab | 0.005 | 0.001 | 5.244 | 0.000 |
| fpg | 0.000 | 0.000 | 0.698 | 0.485 |
| la_insulin | -0.020 | 0.020 | -1.008 | 0.313 |
| hba1c | -0.006 | 0.008 | -0.801 | 0.423 |
| Race | -0.048 | 0.008 | -5.671 | 0.000 |
| edu | -0.008 | 0.007 | -1.046 | 0.296 |
#Model 4: Fixed Effect Model with primary
mod4 <- plm(primary2 ~ Visit + sulfonylurea + meglitinide + nphl_insulin + reg_insulin + othbol_insulin + premix_insulin + female + baseline_age + BMI + yrsdiab + fpg + la_insulin + hba1c + Race + edu,
data = data, index = c("Visit"), model = "within")
summary(mod4)## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = primary2 ~ Visit + sulfonylurea + meglitinide +
## nphl_insulin + reg_insulin + othbol_insulin + premix_insulin +
## female + baseline_age + BMI + yrsdiab + fpg + la_insulin +
## hba1c + Race + edu, data = data, model = "within", index = c("Visit"))
##
## Unbalanced Panel: n = 9, T = 85-721, N = 4452
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.252426 -0.111235 -0.078343 -0.045524 0.988935
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## sulfonylurea -4.6230e-03 9.8537e-03 -0.4692 0.6389769
## meglitinide -1.5122e-02 1.4017e-02 -1.0788 0.2807225
## nphl_insulin 1.1504e-02 1.4628e-02 0.7865 0.4316455
## reg_insulin -1.1808e-02 1.8237e-02 -0.6475 0.5173687
## othbol_insulin -1.6582e-02 3.1200e-02 -0.5315 0.5951116
## premix_insulin 1.4394e-02 9.7494e-03 1.4764 0.1399062
## female 9.6322e-03 8.8559e-03 1.0877 0.2768062
## baseline_age 1.9413e-03 6.5812e-04 2.9498 0.0031964 **
## BMI -2.8714e-03 7.9591e-04 -3.6076 0.0003124 ***
## yrsdiab 3.0081e-03 5.5214e-04 5.4481 5.367e-08 ***
## fpg 7.5541e-06 8.8405e-05 0.0854 0.9319088
## la_insulin -1.2663e-02 1.1393e-02 -1.1115 0.2664205
## hba1c -5.9730e-04 4.4615e-03 -0.1339 0.8935046
## Race -1.8397e-02 4.8797e-03 -3.7701 0.0001653 ***
## edu -1.1166e-02 4.3001e-03 -2.5966 0.0094468 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 357.49
## Residual Sum of Squares: 349.39
## R-Squared: 0.022637
## Adj. R-Squared: 0.01756
## F-statistic: 6.83714 on 15 and 4428 DF, p-value: 6.6695e-15
yhat <- mod4$df.residual
yhat## [1] 4428
kable(tidy(mod4), digits=3, caption="Fixed Effect model")| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| sulfonylurea | -0.005 | 0.010 | -0.469 | 0.639 |
| meglitinide | -0.015 | 0.014 | -1.079 | 0.281 |
| nphl_insulin | 0.012 | 0.015 | 0.786 | 0.432 |
| reg_insulin | -0.012 | 0.018 | -0.647 | 0.517 |
| othbol_insulin | -0.017 | 0.031 | -0.531 | 0.595 |
| premix_insulin | 0.014 | 0.010 | 1.476 | 0.140 |
| female | 0.010 | 0.009 | 1.088 | 0.277 |
| baseline_age | 0.002 | 0.001 | 2.950 | 0.003 |
| BMI | -0.003 | 0.001 | -3.608 | 0.000 |
| yrsdiab | 0.003 | 0.001 | 5.448 | 0.000 |
| fpg | 0.000 | 0.000 | 0.085 | 0.932 |
| la_insulin | -0.013 | 0.011 | -1.111 | 0.266 |
| hba1c | -0.001 | 0.004 | -0.134 | 0.894 |
| Race | -0.018 | 0.005 | -3.770 | 0.000 |
| edu | -0.011 | 0.004 | -2.597 | 0.009 |
mod5 <- lmer(primary2 ~ sulfonylurea + meglitinide + nphl_insulin + reg_insulin + othbol_insulin + premix_insulin + female + baseline_age + BMI + yrsdiab + fpg + (1|hba1c), data = data)
summary(mod5)## Linear mixed model fit by REML ['lmerMod']
## Formula: primary2 ~ sulfonylurea + meglitinide + nphl_insulin + reg_insulin +
## othbol_insulin + premix_insulin + female + baseline_age +
## BMI + yrsdiab + fpg + (1 | hba1c)
## Data: data
##
## REML criterion at convergence: 1449.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.7615 -0.3804 -0.2828 -0.1859 3.4951
##
## Random effects:
## Groups Name Variance Std.Dev.
## hba1c (Intercept) 6.065e-07 0.0007788
## Residual 7.925e-02 0.2815118
## Number of obs: 4455, groups: hba1c, 87
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -4.840e-03 5.401e-02 -0.090
## sulfonylurea -1.910e-03 9.655e-03 -0.198
## meglitinide -1.147e-02 1.348e-02 -0.851
## nphl_insulin 2.111e-03 1.198e-02 0.176
## reg_insulin -3.176e-03 1.589e-02 -0.200
## othbol_insulin -1.911e-02 3.060e-02 -0.624
## premix_insulin 1.762e-02 8.813e-03 1.999
## female 1.622e-02 8.766e-03 1.851
## baseline_age 1.996e-03 6.554e-04 3.046
## BMI -2.699e-03 7.956e-04 -3.392
## yrsdiab 3.105e-03 5.523e-04 5.621
## fpg 1.691e-05 7.563e-05 0.224
##
## Correlation of Fixed Effects:
## (Intr) slfnyl mgltnd nphl_n rg_nsl othbl_ prmx_n female bsln_g
## sulfonylure -0.133
## meglitinide -0.068 0.006
## nphl_insuln -0.221 0.244 -0.021
## reg_insulin 0.024 0.100 0.058 -0.109
## othbol_nsln -0.004 0.058 0.045 -0.048 -0.064
## premix_nsln -0.063 0.158 0.002 -0.126 0.006 0.131
## female -0.002 -0.001 0.022 -0.002 -0.016 -0.023 -0.010
## baseline_ag -0.806 0.015 0.004 0.014 -0.006 0.000 0.012 0.014
## BMI -0.587 0.006 0.014 -0.008 -0.007 -0.011 0.005 -0.157 0.146
## yrsdiab 0.007 -0.030 0.019 -0.008 -0.016 -0.018 -0.015 -0.026 -0.216
## fpg -0.232 0.035 0.134 0.149 -0.117 -0.009 -0.019 0.041 0.005
## BMI yrsdib
## sulfonylure
## meglitinide
## nphl_insuln
## reg_insulin
## othbol_nsln
## premix_nsln
## female
## baseline_ag
## BMI
## yrsdiab 0.057
## fpg -0.013 0.003
mod6 <- lmer(primary ~ sulfonylurea + meglitinide + nphl_insulin + reg_insulin + othbol_insulin + premix_insulin + female + baseline_age + BMI + yrsdiab + fpg + (1|hba1c), data = data)
summary(mod6)## Linear mixed model fit by REML ['lmerMod']
## Formula: primary ~ sulfonylurea + meglitinide + nphl_insulin + reg_insulin +
## othbol_insulin + premix_insulin + female + baseline_age +
## BMI + yrsdiab + fpg + (1 | hba1c)
## Data: data
##
## REML criterion at convergence: 6368.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.6826 -0.3113 -0.2210 -0.1388 28.1809
##
## Random effects:
## Groups Name Variance Std.Dev.
## hba1c (Intercept) 0.0001157 0.01076
## Residual 0.2396552 0.48955
## Number of obs: 4455, groups: hba1c, 87
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 5.554e-02 9.400e-02 0.591
## sulfonylurea 2.555e-02 1.680e-02 1.521
## meglitinide -3.042e-02 2.346e-02 -1.297
## nphl_insulin -9.145e-03 2.084e-02 -0.439
## reg_insulin -7.301e-03 2.765e-02 -0.264
## othbol_insulin -4.540e-02 5.323e-02 -0.853
## premix_insulin 3.032e-02 1.533e-02 1.977
## female 1.326e-02 1.525e-02 0.870
## baseline_age 1.862e-03 1.140e-03 1.633
## BMI -4.225e-03 1.384e-03 -3.053
## yrsdiab 5.286e-03 9.606e-04 5.503
## fpg 3.926e-05 1.320e-04 0.297
##
## Correlation of Fixed Effects:
## (Intr) slfnyl mgltnd nphl_n rg_nsl othbl_ prmx_n female bsln_g
## sulfonylure -0.133
## meglitinide -0.068 0.006
## nphl_insuln -0.220 0.244 -0.021
## reg_insulin 0.024 0.100 0.058 -0.109
## othbol_nsln -0.004 0.058 0.045 -0.048 -0.064
## premix_nsln -0.063 0.158 0.002 -0.126 0.007 0.131
## female -0.002 -0.002 0.022 -0.002 -0.017 -0.023 -0.011
## baseline_ag -0.805 0.015 0.004 0.014 -0.006 0.000 0.012 0.014
## BMI -0.587 0.006 0.014 -0.008 -0.007 -0.011 0.005 -0.157 0.146
## yrsdiab 0.007 -0.030 0.019 -0.008 -0.016 -0.018 -0.015 -0.026 -0.216
## fpg -0.233 0.035 0.133 0.147 -0.114 -0.009 -0.018 0.041 0.005
## BMI yrsdib
## sulfonylure
## meglitinide
## nphl_insuln
## reg_insulin
## othbol_nsln
## premix_nsln
## female
## baseline_ag
## BMI
## yrsdiab 0.057
## fpg -0.012 0.003
The first model is Pooled model with all the independent variables from data1, data2, data 3, data 4 and dependent variables from data5. While, the second is fixed effect model with the same variables. In case of Pooled model, the sampling distribution of the OLS estimator in the pooled regression model is normal in large samples. In this case, each MaskId is allowed its own intercept, so that the slope on the Primary and Primary2 variables can be interpreted as the effect of a change in the other variables of within dataset. The model implicitly assumes that within each MaskID, the Primary and Primary2 have the same marginal impact on other variables. The variance of the estimates can be estimated and we can compute standard errors,t-statistics and confidence intervals for coefficients. As you can see, the estimated coeffients for the most of the independent variables are negative, where, the estimated value for VisitF24 is -0.00136914, for meglitinide is-0.03868340, for reg_insulin is -0.01375097, for BMI is -0.00461218 and hba1c is -0.00621048. Similarly, R suared is 0.023421 and P value is 1.766e-12 which is significant as compare to fixed effect model. Results show that the fixed effect model with proportional odds is not good than the null model as indicated by the p-value.
Similarly, in case of fixed effect model, the P value is less than or equal to 0.01 which is equal to the null hypothesis ans suggested that pooled model is better than the fixed effect model.
In both linear relationship plots, each point represents observations of Primary, Primary2 and all other independent variables for a given MaskID. The regression results indicate a positive relationship between the variables for all the MaskID's. However, a very weak uphil relationhsip exist in 1st Figure with dependent variable Primary. While, in 2nd Figure, it seems that it is near to no relationship, however, exist a very weak uphil relationship. Similarly, in the fixed effect model it is negative relationship. The estimated coefficient on Primary is nagative and not significantly at 5%. The same behaviour can be find in the estimators of fixed effect model, where most of the estimators have negative value.
To decide between fixed effect or pooled model you can run a Hausman test where the null hypothesis is that the preferred model is pooled model vs. the alternative. It basically tests whether the unique errors are correlated with the regressors, the null hypothesis is they are not. If the p-value is significant (for example <0.05) then use pooled model, if not use fixed effects.
phtest(mod1,mod2)##
## Hausman Test
##
## data: primary2 ~ Visit + sulfonylurea + meglitinide + nphl_insulin + ...
## chisq = 40.146, df = 15, p-value = 0.000431
## alternative hypothesis: one model is inconsistent
phtest(mod3,mod4)##
## Hausman Test
##
## data: primary ~ Visit + sulfonylurea + meglitinide + nphl_insulin + ...
## chisq = 40.146, df = 15, p-value = 0.000431
## alternative hypothesis: one model is inconsistent
data4 <- data4[,-3] #Removing the repeating primary variable
fdata <- left_join(data4,data5,"MaskID")
fdata <- left_join(fdata,dataMerge, "MaskID")
head(fdata,2)## X.x MaskID female baseline_age Race edu BMI yrsdiab X.y primary primary2
## 1 1 100001 0 60.8 3 4 35.91218 26 1 0 0
## 2 1 100001 0 60.8 3 4 35.91218 26 1 0 0
## X Visit sulfonylurea meglitinide nphl_insulin reg_insulin la_insulin
## 1 1 BLR 0 0 1 0 1
## 2 2 F12 0 0 1 0 0
## othbol_insulin premix_insulin hba1c fpg
## 1 1 0 8.7 96
## 2 1 0 7.7 163
mod7 <- lmer(primary2 ~ sulfonylurea + meglitinide + nphl_insulin + reg_insulin + othbol_insulin + premix_insulin + female + baseline_age + BMI + yrsdiab + fpg + (1|MaskID), data = fdata)
summary(mod7)## Linear mixed model fit by REML ['lmerMod']
## Formula: primary2 ~ sulfonylurea + meglitinide + nphl_insulin + reg_insulin +
## othbol_insulin + premix_insulin + female + baseline_age +
## BMI + yrsdiab + fpg + (1 | MaskID)
## Data: fdata
##
## REML criterion at convergence: -398227.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.206e-05 -5.617e-06 -3.839e-06 -2.198e-06 2.545e-04
##
## Random effects:
## Groups Name Variance Std.Dev.
## MaskID (Intercept) 1.549e-02 1.245e-01
## Residual 1.760e-11 4.195e-06
## Number of obs: 22664, groups: MaskID, 4445
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.423e-02 2.260e-02 0.630
## sulfonylurea -1.565e-11 9.878e-08 0.000
## meglitinide 3.967e-12 1.248e-07 0.000
## nphl_insulin 5.118e-12 1.012e-07 0.000
## reg_insulin 5.963e-12 1.577e-07 0.000
## othbol_insulin -7.573e-12 2.443e-07 0.000
## premix_insulin -2.584e-12 7.044e-08 0.000
## female 1.630e-02 3.875e-03 4.206
## baseline_age 1.872e-03 2.898e-04 6.458
## BMI -2.772e-03 3.518e-04 -7.881
## yrsdiab 3.111e-03 2.441e-04 12.746
## fpg 6.199e-15 6.191e-10 0.000
##
## Correlation of Fixed Effects:
## (Intr) slfnyl mgltnd nphl_n rg_nsl othbl_ prmx_n female bsln_g
## sulfonylure 0.000
## meglitinide 0.000 0.049
## nphl_insuln 0.000 0.188 -0.075
## reg_insulin 0.000 0.054 0.044 -0.090
## othbol_nsln 0.000 0.011 0.032 -0.056 -0.061
## premix_nsln 0.000 0.164 0.050 -0.252 0.073 0.108
## female 0.006 0.000 0.000 0.000 0.000 0.000 0.000
## baseline_ag -0.845 0.000 0.000 0.000 0.000 0.000 0.000 0.013
## BMI -0.622 0.000 0.000 0.000 0.000 0.000 0.000 -0.157 0.144
## yrsdiab 0.004 0.000 0.000 0.000 0.000 0.000 0.000 -0.027 -0.217
## fpg 0.000 0.011 0.071 0.209 -0.107 -0.004 -0.015 0.000 0.000
## BMI yrsdib
## sulfonylure
## meglitinide
## nphl_insuln
## reg_insulin
## othbol_nsln
## premix_nsln
## female
## baseline_ag
## BMI
## yrsdiab 0.056
## fpg 0.000 0.000
## optimizer (nloptwrap) convergence code: -4 (NLOPT_ROUNDOFF_LIMITED: Roundoff errors led to a breakdown of the optimization algorithm. In this case, the returned minimum may still be useful. (e.g. this error occurs in NEWUOA if one tries to achieve a tolerance too close to machine precision.))
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
mod8 <- lmer(primary ~ sulfonylurea + meglitinide + nphl_insulin + reg_insulin + othbol_insulin + premix_insulin + female + baseline_age + BMI + yrsdiab + fpg + (1|MaskID), data = fdata)
summary(mod8)## Linear mixed model fit by REML ['lmerMod']
## Formula: primary ~ sulfonylurea + meglitinide + nphl_insulin + reg_insulin +
## othbol_insulin + premix_insulin + female + baseline_age +
## BMI + yrsdiab + fpg + (1 | MaskID)
## Data: fdata
##
## REML criterion at convergence: -378259.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.849e-05 -3.980e-06 -2.650e-06 -1.400e-06 5.217e-04
##
## Random effects:
## Groups Name Variance Std.Dev.
## MaskID (Intercept) 4.706e-02 2.169e-01
## Residual 4.018e-11 6.339e-06
## Number of obs: 22664, groups: MaskID, 4445
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 8.142e-02 3.930e-02 2.072
## sulfonylurea -1.998e-11 1.493e-07 0.000
## meglitinide -1.205e-12 1.886e-07 0.000
## nphl_insulin 6.553e-12 1.529e-07 0.000
## reg_insulin 6.012e-12 2.382e-07 0.000
## othbol_insulin -4.232e-12 3.691e-07 0.000
## premix_insulin -2.920e-12 1.064e-07 0.000
## female 1.317e-02 6.752e-03 1.950
## baseline_age 1.719e-03 5.043e-04 3.408
## BMI -4.329e-03 6.126e-04 -7.067
## yrsdiab 5.338e-03 4.254e-04 12.546
## fpg 3.236e-14 9.355e-10 0.000
##
## Correlation of Fixed Effects:
## (Intr) slfnyl mgltnd nphl_n rg_nsl othbl_ prmx_n female bsln_g
## sulfonylure 0.000
## meglitinide 0.000 0.049
## nphl_insuln 0.000 0.188 -0.075
## reg_insulin 0.000 0.054 0.044 -0.090
## othbol_nsln 0.000 0.011 0.032 -0.056 -0.061
## premix_nsln 0.000 0.164 0.050 -0.252 0.073 0.108
## female 0.006 0.000 0.000 0.000 0.000 0.000 0.000
## baseline_ag -0.845 0.000 0.000 0.000 0.000 0.000 0.000 0.013
## BMI -0.622 0.000 0.000 0.000 0.000 0.000 0.000 -0.157 0.142
## yrsdiab 0.004 0.000 0.000 0.000 0.000 0.000 0.000 -0.027 -0.217
## fpg 0.000 0.011 0.071 0.209 -0.107 -0.004 -0.015 0.000 0.000
## BMI yrsdib
## sulfonylure
## meglitinide
## nphl_insuln
## reg_insulin
## othbol_nsln
## premix_nsln
## female
## baseline_ag
## BMI
## yrsdiab 0.056
## fpg 0.000 0.000