Service as a Freelancer

If you want hire me as a freelancer please use the following links.

Read the Data

data4 <- read.csv("Data4.csv",header = T,sep = ",")
data5 <- read.csv("Data5.csv",header = T,sep = ",")
dataMerge <- read.csv("DataMerge.csv",header = T,sep = ",")
dataMerge <- dataMerge[complete.cases(dataMerge),]

Prepare the Data

data <- dataMerge[1:4504,]
data <- cbind(data,data4[,c(4:9)])
data <- cbind(data,data5[,c(3:4)])
head(data)
##   X MaskID Visit sulfonylurea meglitinide nphl_insulin reg_insulin la_insulin
## 1 1 100001   BLR            0           0            1           0          1
## 2 2 100001   F12            0           0            1           0          0
## 3 3 100001   F24            0           0            1           0          0
## 4 4 100001   F36            0           0            1           0          1
## 5 5 100001   F48            0           0            1           0          1
## 6 6 100001   F60            0           0            1           0          1
##   othbol_insulin premix_insulin hba1c fpg female baseline_age Race edu      BMI
## 1              1              0   8.7  96      0         60.8    3   4 35.91218
## 2              1              0   7.7 163      1         55.0    3   1 35.12173
## 3              1              0   7.0  95      1         72.5    3   4 32.02074
## 4              0              0   7.3 215      0         47.1    3   3 35.08068
## 5              0              0   8.0  99      1         56.7    3   2 41.93726
## 6              0              0   8.2 150      0         60.0    3   3 30.62379
##   yrsdiab primary primary2
## 1      26       0        0
## 2      14       0        0
## 3       9       0        0
## 4       6       0        0
## 5       5       0        0
## 6      11       0        0

Summary of the Data

summary(data)
##        X            MaskID          Visit            sulfonylurea   
##  Min.   :   1   Min.   :100001   Length:4504        Min.   :0.0000  
##  1st Qu.:1277   1st Qu.:100398   Class :character   1st Qu.:0.0000  
##  Median :2606   Median :100895   Mode  :character   Median :0.0000  
##  Mean   :2602   Mean   :100873                      Mean   :0.3153  
##  3rd Qu.:3913   3rd Qu.:101319                      3rd Qu.:1.0000  
##  Max.   :5231   Max.   :101773                      Max.   :1.0000  
##                                                                     
##   meglitinide      nphl_insulin     reg_insulin        la_insulin    
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:1.0000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :0.0000   Median :1.0000   Median :0.00000   Median :1.0000  
##  Mean   :0.1139   Mean   :0.8302   Mean   :0.08059   Mean   :0.5653  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.00000   Max.   :1.0000  
##                                                                      
##  othbol_insulin    premix_insulin       hba1c             fpg       
##  Min.   :0.00000   Min.   :0.0000   Min.   : 4.300   Min.   : 30.0  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.: 6.600   1st Qu.:103.0  
##  Median :0.00000   Median :0.0000   Median : 7.300   Median :136.0  
##  Mean   :0.01976   Mean   :0.4338   Mean   : 7.441   Mean   :144.6  
##  3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.: 8.100   3rd Qu.:175.0  
##  Max.   :1.00000   Max.   :1.0000   Max.   :15.100   Max.   :726.0  
##                                                                     
##      female        baseline_age        Race           edu       
##  Min.   :0.0000   Min.   :44.40   Min.   :1.00   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:57.50   1st Qu.:3.00   1st Qu.:2.000  
##  Median :0.0000   Median :61.80   Median :3.00   Median :3.000  
##  Mean   :0.3921   Mean   :62.52   Mean   :2.68   Mean   :2.719  
##  3rd Qu.:1.0000   3rd Qu.:66.83   3rd Qu.:3.00   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :79.30   Max.   :4.00   Max.   :4.000  
##                                                  NA's   :3      
##       BMI           yrsdiab         primary           primary2      
##  Min.   :18.41   Min.   : 0.00   Min.   : 0.0000   Min.   :0.00000  
##  1st Qu.:28.63   1st Qu.: 7.00   1st Qu.: 0.0000   1st Qu.:0.00000  
##  Median :32.33   Median :12.00   Median : 0.0000   Median :0.00000  
##  Mean   :32.72   Mean   :13.19   Mean   : 0.1232   Mean   :0.08837  
##  3rd Qu.:36.65   3rd Qu.:18.00   3rd Qu.: 0.0000   3rd Qu.:0.00000  
##  Max.   :48.50   Max.   :35.00   Max.   :14.0000   Max.   :1.00000  
##  NA's   :3       NA's   :46

Correlation of the Data

#Correlation Matrix
res <- cor(data[,c(4:20)])
round(res, 2)
##                sulfonylurea meglitinide nphl_insulin reg_insulin la_insulin
## sulfonylurea           1.00        0.00        -0.28       -0.14      -0.24
## meglitinide            0.00        1.00         0.03       -0.08      -0.16
## nphl_insulin          -0.28        0.03         1.00        0.13       0.47
## reg_insulin           -0.14       -0.08         0.13        1.00      -0.30
## la_insulin            -0.24       -0.16         0.47       -0.30       1.00
## othbol_insulin        -0.06       -0.05         0.05        0.08      -0.08
## premix_insulin        -0.19        0.00         0.17        0.03       0.10
## hba1c                  0.05       -0.11        -0.14        0.13      -0.15
## fpg                   -0.01       -0.15        -0.13        0.12      -0.07
## female                 0.00       -0.02         0.01        0.02       0.00
## baseline_age           0.00        0.00        -0.01        0.01      -0.02
## Race                  -0.01        0.01         0.01        0.01       0.00
## edu                      NA          NA           NA          NA         NA
## BMI                      NA          NA           NA          NA         NA
## yrsdiab                  NA          NA           NA          NA         NA
## primary                0.02       -0.02        -0.01        0.00      -0.01
## primary2              -0.01       -0.02         0.01        0.00       0.00
##                othbol_insulin premix_insulin hba1c   fpg female baseline_age
## sulfonylurea            -0.06          -0.19  0.05 -0.01   0.00         0.00
## meglitinide             -0.05           0.00 -0.11 -0.15  -0.02         0.00
## nphl_insulin             0.05           0.17 -0.14 -0.13   0.01        -0.01
## reg_insulin              0.08           0.03  0.13  0.12   0.02         0.01
## la_insulin              -0.08           0.10 -0.15 -0.07   0.00        -0.02
## othbol_insulin           1.00          -0.11 -0.02  0.02   0.03         0.00
## premix_insulin          -0.11           1.00  0.04  0.00   0.01        -0.01
## hba1c                   -0.02           0.04  1.00  0.53  -0.03         0.02
## fpg                      0.02           0.00  0.53  1.00  -0.03        -0.01
## female                   0.03           0.01 -0.03 -0.03   1.00        -0.03
## baseline_age             0.00          -0.01  0.02 -0.01  -0.03         1.00
## Race                    -0.01           0.00  0.00  0.01  -0.12         0.04
## edu                        NA             NA    NA    NA     NA           NA
## BMI                        NA             NA    NA    NA     NA           NA
## yrsdiab                    NA             NA    NA    NA     NA           NA
## primary                 -0.02           0.03  0.00  0.00   0.01         0.05
## primary2                -0.01           0.04  0.00  0.00   0.02         0.07
##                 Race edu BMI yrsdiab primary primary2
## sulfonylurea   -0.01  NA  NA      NA    0.02    -0.01
## meglitinide     0.01  NA  NA      NA   -0.02    -0.02
## nphl_insulin    0.01  NA  NA      NA   -0.01     0.01
## reg_insulin     0.01  NA  NA      NA    0.00     0.00
## la_insulin      0.00  NA  NA      NA   -0.01     0.00
## othbol_insulin -0.01  NA  NA      NA   -0.02    -0.01
## premix_insulin  0.00  NA  NA      NA    0.03     0.04
## hba1c           0.00  NA  NA      NA    0.00     0.00
## fpg             0.01  NA  NA      NA    0.00     0.00
## female         -0.12  NA  NA      NA    0.01     0.02
## baseline_age    0.04  NA  NA      NA    0.05     0.07
## Race            1.00  NA  NA      NA   -0.09    -0.06
## edu               NA   1  NA      NA      NA       NA
## BMI               NA  NA   1      NA      NA       NA
## yrsdiab           NA  NA  NA       1      NA       NA
## primary        -0.09  NA  NA      NA    1.00     0.80
## primary2       -0.06  NA  NA      NA    0.80     1.00
#Correlation Plot
corrplot(res, type = "upper", 
         tl.col = "black", tl.srt = 45)

#Correlation chart
chart.Correlation(data[,c(4:20)], histogram=TRUE, pch=19)

Regression Analysis

Regression using panel data may mitigate omitted variable bias when there is no information on variables that correlate with both the regressors of interest and the independent variable and if these variables are constant in the time dimension or across entities. Provided that panel data is available panel regression methods may improve upon multiple regression models which produce results that are not internally valid in such a setting. So, here we have applied build panel modesl on the given dataset. For the setting we have all other as independent variables, while, primary and parimary2 as dependent variables.

We have used plm(), a convenient R function that enables us to estimate linear panel regression models which comes with the package plm (Croissant, Millo, and Tappe 2020). Usage of plm() is very similar as for the function lm() which have been used by the majority of the developer in the literature. Sometimes panel data is also called longitudinal data as it adds a temporal dimension to cross-sectional data. Let us have a look at the dataset by checking its structure and listing the first few observations.

str(data)
## 'data.frame':    4504 obs. of  20 variables:
##  $ X             : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ MaskID        : int  100001 100001 100001 100001 100001 100001 100001 100001 100001 100002 ...
##  $ Visit         : chr  "BLR" "F12" "F24" "F36" ...
##  $ sulfonylurea  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ meglitinide   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ nphl_insulin  : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ reg_insulin   : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ la_insulin    : int  1 0 0 1 1 1 1 1 1 0 ...
##  $ othbol_insulin: int  1 1 1 0 0 0 0 0 0 1 ...
##  $ premix_insulin: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ hba1c         : num  8.7 7.7 7 7.3 8 8.2 7.6 7 7.4 8.2 ...
##  $ fpg           : num  96 163 95 215 99 150 88 79 198 109 ...
##  $ female        : int  0 1 1 0 1 0 0 0 1 0 ...
##  $ baseline_age  : num  60.8 55 72.5 47.1 56.7 60 55 70.4 52.7 69.7 ...
##  $ Race          : int  3 3 3 3 3 3 3 3 3 1 ...
##  $ edu           : int  4 1 4 3 2 3 4 4 4 4 ...
##  $ BMI           : num  35.9 35.1 32 35.1 41.9 ...
##  $ yrsdiab       : int  26 14 9 6 5 11 6 22 14 19 ...
##  $ primary       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ primary2      : int  0 0 0 0 0 0 0 0 0 0 ...

We find that the dataset consists of 4504 observations on 20 variables. Since all variables are observed for all entities and over all time periods, the panel is balanced. If there were missing data for at least one entity in at least one time period we would call the panel unbalanced. As in the preprocessing step we have made the data balanced by adding picking the complete cases. Now, the data is in the shape that is suitable for the regression analysis.

Pooled Model with Primary2

#Model 1: with primary as dependent
mod1 <- plm(primary2 ~ Visit + sulfonylurea + meglitinide + nphl_insulin + reg_insulin  + othbol_insulin + premix_insulin + female + baseline_age + BMI + yrsdiab + fpg + la_insulin + hba1c +  Race + edu, data = data, index = c("Visit"), model = "pooling")
summary(mod1)
## Pooling Model
## 
## Call:
## plm(formula = primary2 ~ Visit + sulfonylurea + meglitinide + 
##     nphl_insulin + reg_insulin + othbol_insulin + premix_insulin + 
##     female + baseline_age + BMI + yrsdiab + fpg + la_insulin + 
##     hba1c + Race + edu, data = data, model = "pooling", index = c("Visit"))
## 
## Unbalanced Panel: n = 9, T = 85-721, N = 4452
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -0.252426 -0.111235 -0.078343 -0.045524  0.988935 
## 
## Coefficients:
##                   Estimate  Std. Error t-value  Pr(>|t|)    
## (Intercept)     1.0008e-01  6.3689e-02  1.5713 0.1161818    
## VisitEXIT       3.3455e-03  1.7486e-02  0.1913 0.8482839    
## VisitF12       -1.6616e-02  1.6847e-02 -0.9863 0.3240495    
## VisitF24       -5.7540e-03  1.7333e-02 -0.3320 0.7399304    
## VisitF36       -1.0745e-02  1.7729e-02 -0.6060 0.5445150    
## VisitF48        1.7519e-02  1.8587e-02  0.9426 0.3459545    
## VisitF60       -5.4969e-03  2.0219e-02 -0.2719 0.7857395    
## VisitF72       -4.9481e-02  3.0310e-02 -1.6325 0.1026481    
## VisitF84        8.7122e-03  3.3741e-02  0.2582 0.7962562    
## sulfonylurea   -4.6230e-03  9.8537e-03 -0.4692 0.6389769    
## meglitinide    -1.5122e-02  1.4017e-02 -1.0788 0.2807225    
## nphl_insulin    1.1504e-02  1.4628e-02  0.7865 0.4316455    
## reg_insulin    -1.1808e-02  1.8237e-02 -0.6475 0.5173687    
## othbol_insulin -1.6582e-02  3.1200e-02 -0.5315 0.5951116    
## premix_insulin  1.4394e-02  9.7494e-03  1.4764 0.1399062    
## female          9.6322e-03  8.8559e-03  1.0877 0.2768062    
## baseline_age    1.9413e-03  6.5812e-04  2.9498 0.0031964 ** 
## BMI            -2.8714e-03  7.9591e-04 -3.6076 0.0003124 ***
## yrsdiab         3.0081e-03  5.5214e-04  5.4481 5.367e-08 ***
## fpg             7.5541e-06  8.8405e-05  0.0854 0.9319088    
## la_insulin     -1.2663e-02  1.1393e-02 -1.1115 0.2664205    
## hba1c          -5.9730e-04  4.4615e-03 -0.1339 0.8935046    
## Race           -1.8397e-02  4.8797e-03 -3.7701 0.0001653 ***
## edu            -1.1166e-02  4.3001e-03 -2.5966 0.0094468 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    358.31
## Residual Sum of Squares: 349.39
## R-Squared:      0.024879
## Adj. R-Squared: 0.019814
## F-statistic: 4.91198 on 23 and 4428 DF, p-value: 1.2011e-13
yhat <- mod1$df.residual
ggplot(data, aes(x = meglitinide + nphl_insulin + reg_insulin  + othbol_insulin + premix_insulin + female + baseline_age + BMI + yrsdiab + fpg + la_insulin + hba1c +  Race + edu, y = primary2))+
   geom_point() +
   geom_smooth(method=lm)
## `geom_smooth()` using formula 'y ~ x'

kable(tidy(mod1), digits=3, caption="Pooled model")
Pooled model
term estimate std.error statistic p.value
(Intercept) 0.100 0.064 1.571 0.116
VisitEXIT 0.003 0.017 0.191 0.848
VisitF12 -0.017 0.017 -0.986 0.324
VisitF24 -0.006 0.017 -0.332 0.740
VisitF36 -0.011 0.018 -0.606 0.545
VisitF48 0.018 0.019 0.943 0.346
VisitF60 -0.005 0.020 -0.272 0.786
VisitF72 -0.049 0.030 -1.632 0.103
VisitF84 0.009 0.034 0.258 0.796
sulfonylurea -0.005 0.010 -0.469 0.639
meglitinide -0.015 0.014 -1.079 0.281
nphl_insulin 0.012 0.015 0.786 0.432
reg_insulin -0.012 0.018 -0.647 0.517
othbol_insulin -0.017 0.031 -0.531 0.595
premix_insulin 0.014 0.010 1.476 0.140
female 0.010 0.009 1.088 0.277
baseline_age 0.002 0.001 2.950 0.003
BMI -0.003 0.001 -3.608 0.000
yrsdiab 0.003 0.001 5.448 0.000
fpg 0.000 0.000 0.085 0.932
la_insulin -0.013 0.011 -1.111 0.266
hba1c -0.001 0.004 -0.134 0.894
Race -0.018 0.005 -3.770 0.000
edu -0.011 0.004 -2.597 0.009

Fixed Effect Model with Primary

#Model 2: Fixed Effect Model with primary
mod2 <- plm(primary ~ Visit + sulfonylurea + meglitinide + nphl_insulin + reg_insulin  + othbol_insulin + premix_insulin + female + baseline_age + BMI + yrsdiab + fpg + la_insulin + hba1c +  Race + edu,
            data = data, index = c("Visit"), model = "within")
summary(mod2)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = primary ~ Visit + sulfonylurea + meglitinide + 
##     nphl_insulin + reg_insulin + othbol_insulin + premix_insulin + 
##     female + baseline_age + BMI + yrsdiab + fpg + la_insulin + 
##     hba1c + Race + edu, data = data, model = "within", index = c("Visit"))
## 
## Unbalanced Panel: n = 9, T = 85-721, N = 4452
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -0.366197 -0.161183 -0.103620 -0.052078 13.708217 
## 
## Coefficients:
##                   Estimate  Std. Error t-value  Pr(>|t|)    
## sulfonylurea    0.02194554  0.01712442  1.2815 0.2000729    
## meglitinide    -0.03868340  0.02436005 -1.5880 0.1123611    
## nphl_insulin    0.00171108  0.02542100  0.0673 0.9463381    
## reg_insulin    -0.01375097  0.03169416 -0.4339 0.6644080    
## othbol_insulin -0.06013299  0.05422176 -1.1090 0.2674821    
## premix_insulin  0.03426828  0.01694327  2.0225 0.0431816 *  
## female          0.00023663  0.01539050  0.0154 0.9877336    
## baseline_age    0.00202976  0.00114372  1.7747 0.0760175 .  
## BMI            -0.00461218  0.00138320 -3.3344 0.0008618 ***
## yrsdiab         0.00503180  0.00095956  5.2439 1.646e-07 ***
## fpg             0.00010721  0.00015364  0.6978 0.4853225    
## la_insulin     -0.01996153  0.01979985 -1.0082 0.3134302    
## hba1c          -0.00621048  0.00775348 -0.8010 0.4231789    
## Race           -0.04809614  0.00848035 -5.6715 1.506e-08 ***
## edu            -0.00781413  0.00747301 -1.0456 0.2957817    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    1079.7
## Residual Sum of Squares: 1055.2
## R-Squared:      0.022634
## Adj. R-Squared: 0.017557
## F-statistic: 6.8362 on 15 and 4428 DF, p-value: 6.7098e-15
yhat <- mod2$df.residual
ggplot(data, aes(x = meglitinide + nphl_insulin + reg_insulin  + othbol_insulin + premix_insulin + female + baseline_age + BMI + yrsdiab + fpg + la_insulin + hba1c +  Race + edu, y = primary))+
   geom_point() +
   geom_smooth(method=lm)
## `geom_smooth()` using formula 'y ~ x'

kable(tidy(mod2), digits=3, caption="Fixed Effect Model")
Fixed Effect Model
term estimate std.error statistic p.value
sulfonylurea 0.022 0.017 1.282 0.200
meglitinide -0.039 0.024 -1.588 0.112
nphl_insulin 0.002 0.025 0.067 0.946
reg_insulin -0.014 0.032 -0.434 0.664
othbol_insulin -0.060 0.054 -1.109 0.267
premix_insulin 0.034 0.017 2.023 0.043
female 0.000 0.015 0.015 0.988
baseline_age 0.002 0.001 1.775 0.076
BMI -0.005 0.001 -3.334 0.001
yrsdiab 0.005 0.001 5.244 0.000
fpg 0.000 0.000 0.698 0.485
la_insulin -0.020 0.020 -1.008 0.313
hba1c -0.006 0.008 -0.801 0.423
Race -0.048 0.008 -5.671 0.000
edu -0.008 0.007 -1.046 0.296

Pooled Model with Primary

#Model 3: with primary as dependent
mod3 <- plm(primary ~ Visit + sulfonylurea + meglitinide + nphl_insulin + reg_insulin  + othbol_insulin + premix_insulin + female + baseline_age + BMI + yrsdiab + fpg + la_insulin + hba1c +  Race + edu, data = data, index = c("Visit"), model = "pooling")
summary(mod3)
## Pooling Model
## 
## Call:
## plm(formula = primary ~ Visit + sulfonylurea + meglitinide + 
##     nphl_insulin + reg_insulin + othbol_insulin + premix_insulin + 
##     female + baseline_age + BMI + yrsdiab + fpg + la_insulin + 
##     hba1c + Race + edu, data = data, model = "pooling", index = c("Visit"))
## 
## Unbalanced Panel: n = 9, T = 85-721, N = 4452
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -0.366197 -0.161183 -0.103620 -0.052078 13.708217 
## 
## Coefficients:
##                   Estimate  Std. Error t-value  Pr(>|t|)    
## (Intercept)     0.25088949  0.11068390  2.2667 0.0234553 *  
## VisitEXIT       0.00915279  0.03038929  0.3012 0.7632878    
## VisitF12        0.02373331  0.02927743  0.8106 0.4176188    
## VisitF24       -0.00136914  0.03012293 -0.0455 0.9637494    
## VisitF36       -0.00019669  0.03081115 -0.0064 0.9949068    
## VisitF48        0.01893606  0.03230162  0.5862 0.5577534    
## VisitF60       -0.00101221  0.03513882 -0.0288 0.9770206    
## VisitF72       -0.06147710  0.05267495 -1.1671 0.2432316    
## VisitF84        0.00626220  0.05863717  0.1068 0.9149559    
## sulfonylurea    0.02194554  0.01712442  1.2815 0.2000729    
## meglitinide    -0.03868340  0.02436005 -1.5880 0.1123611    
## nphl_insulin    0.00171108  0.02542100  0.0673 0.9463381    
## reg_insulin    -0.01375097  0.03169416 -0.4339 0.6644080    
## othbol_insulin -0.06013299  0.05422176 -1.1090 0.2674821    
## premix_insulin  0.03426828  0.01694327  2.0225 0.0431816 *  
## female          0.00023663  0.01539050  0.0154 0.9877336    
## baseline_age    0.00202976  0.00114372  1.7747 0.0760175 .  
## BMI            -0.00461218  0.00138320 -3.3344 0.0008618 ***
## yrsdiab         0.00503180  0.00095956  5.2439 1.646e-07 ***
## fpg             0.00010721  0.00015364  0.6978 0.4853225    
## la_insulin     -0.01996153  0.01979985 -1.0082 0.3134302    
## hba1c          -0.00621048  0.00775348 -0.8010 0.4231789    
## Race           -0.04809614  0.00848035 -5.6715 1.506e-08 ***
## edu            -0.00781413  0.00747301 -1.0456 0.2957817    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    1080.5
## Residual Sum of Squares: 1055.2
## R-Squared:      0.023421
## Adj. R-Squared: 0.018348
## F-statistic: 4.61717 on 23 and 4428 DF, p-value: 1.766e-12
yhat <- mod3$df.residual
yhat
## [1] 4428
kable(tidy(mod3), digits=3, caption="Pooled model")
Pooled model
term estimate std.error statistic p.value
(Intercept) 0.251 0.111 2.267 0.023
VisitEXIT 0.009 0.030 0.301 0.763
VisitF12 0.024 0.029 0.811 0.418
VisitF24 -0.001 0.030 -0.045 0.964
VisitF36 0.000 0.031 -0.006 0.995
VisitF48 0.019 0.032 0.586 0.558
VisitF60 -0.001 0.035 -0.029 0.977
VisitF72 -0.061 0.053 -1.167 0.243
VisitF84 0.006 0.059 0.107 0.915
sulfonylurea 0.022 0.017 1.282 0.200
meglitinide -0.039 0.024 -1.588 0.112
nphl_insulin 0.002 0.025 0.067 0.946
reg_insulin -0.014 0.032 -0.434 0.664
othbol_insulin -0.060 0.054 -1.109 0.267
premix_insulin 0.034 0.017 2.023 0.043
female 0.000 0.015 0.015 0.988
baseline_age 0.002 0.001 1.775 0.076
BMI -0.005 0.001 -3.334 0.001
yrsdiab 0.005 0.001 5.244 0.000
fpg 0.000 0.000 0.698 0.485
la_insulin -0.020 0.020 -1.008 0.313
hba1c -0.006 0.008 -0.801 0.423
Race -0.048 0.008 -5.671 0.000
edu -0.008 0.007 -1.046 0.296

Fixed Effect Model with Primary2

#Model 4: Fixed Effect Model with primary
mod4 <- plm(primary2 ~ Visit + sulfonylurea + meglitinide + nphl_insulin + reg_insulin  + othbol_insulin + premix_insulin + female + baseline_age + BMI + yrsdiab + fpg + la_insulin + hba1c +  Race + edu,
            data = data, index = c("Visit"), model = "within")
summary(mod4)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = primary2 ~ Visit + sulfonylurea + meglitinide + 
##     nphl_insulin + reg_insulin + othbol_insulin + premix_insulin + 
##     female + baseline_age + BMI + yrsdiab + fpg + la_insulin + 
##     hba1c + Race + edu, data = data, model = "within", index = c("Visit"))
## 
## Unbalanced Panel: n = 9, T = 85-721, N = 4452
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -0.252426 -0.111235 -0.078343 -0.045524  0.988935 
## 
## Coefficients:
##                   Estimate  Std. Error t-value  Pr(>|t|)    
## sulfonylurea   -4.6230e-03  9.8537e-03 -0.4692 0.6389769    
## meglitinide    -1.5122e-02  1.4017e-02 -1.0788 0.2807225    
## nphl_insulin    1.1504e-02  1.4628e-02  0.7865 0.4316455    
## reg_insulin    -1.1808e-02  1.8237e-02 -0.6475 0.5173687    
## othbol_insulin -1.6582e-02  3.1200e-02 -0.5315 0.5951116    
## premix_insulin  1.4394e-02  9.7494e-03  1.4764 0.1399062    
## female          9.6322e-03  8.8559e-03  1.0877 0.2768062    
## baseline_age    1.9413e-03  6.5812e-04  2.9498 0.0031964 ** 
## BMI            -2.8714e-03  7.9591e-04 -3.6076 0.0003124 ***
## yrsdiab         3.0081e-03  5.5214e-04  5.4481 5.367e-08 ***
## fpg             7.5541e-06  8.8405e-05  0.0854 0.9319088    
## la_insulin     -1.2663e-02  1.1393e-02 -1.1115 0.2664205    
## hba1c          -5.9730e-04  4.4615e-03 -0.1339 0.8935046    
## Race           -1.8397e-02  4.8797e-03 -3.7701 0.0001653 ***
## edu            -1.1166e-02  4.3001e-03 -2.5966 0.0094468 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    357.49
## Residual Sum of Squares: 349.39
## R-Squared:      0.022637
## Adj. R-Squared: 0.01756
## F-statistic: 6.83714 on 15 and 4428 DF, p-value: 6.6695e-15
yhat <- mod4$df.residual
yhat
## [1] 4428
kable(tidy(mod4), digits=3, caption="Fixed Effect model")
Fixed Effect model
term estimate std.error statistic p.value
sulfonylurea -0.005 0.010 -0.469 0.639
meglitinide -0.015 0.014 -1.079 0.281
nphl_insulin 0.012 0.015 0.786 0.432
reg_insulin -0.012 0.018 -0.647 0.517
othbol_insulin -0.017 0.031 -0.531 0.595
premix_insulin 0.014 0.010 1.476 0.140
female 0.010 0.009 1.088 0.277
baseline_age 0.002 0.001 2.950 0.003
BMI -0.003 0.001 -3.608 0.000
yrsdiab 0.003 0.001 5.448 0.000
fpg 0.000 0.000 0.085 0.932
la_insulin -0.013 0.011 -1.111 0.266
hba1c -0.001 0.004 -0.134 0.894
Race -0.018 0.005 -3.770 0.000
edu -0.011 0.004 -2.597 0.009

Linear Mixed-Mode: Primary2

mod5 <- lmer(primary2 ~ sulfonylurea + meglitinide + nphl_insulin + reg_insulin  + othbol_insulin + premix_insulin + female + baseline_age + BMI + yrsdiab + fpg + (1|hba1c), data = data)
summary(mod5)
## Linear mixed model fit by REML ['lmerMod']
## Formula: primary2 ~ sulfonylurea + meglitinide + nphl_insulin + reg_insulin +  
##     othbol_insulin + premix_insulin + female + baseline_age +  
##     BMI + yrsdiab + fpg + (1 | hba1c)
##    Data: data
## 
## REML criterion at convergence: 1449.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -0.7615 -0.3804 -0.2828 -0.1859  3.4951 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. 
##  hba1c    (Intercept) 6.065e-07 0.0007788
##  Residual             7.925e-02 0.2815118
## Number of obs: 4455, groups:  hba1c, 87
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)    -4.840e-03  5.401e-02  -0.090
## sulfonylurea   -1.910e-03  9.655e-03  -0.198
## meglitinide    -1.147e-02  1.348e-02  -0.851
## nphl_insulin    2.111e-03  1.198e-02   0.176
## reg_insulin    -3.176e-03  1.589e-02  -0.200
## othbol_insulin -1.911e-02  3.060e-02  -0.624
## premix_insulin  1.762e-02  8.813e-03   1.999
## female          1.622e-02  8.766e-03   1.851
## baseline_age    1.996e-03  6.554e-04   3.046
## BMI            -2.699e-03  7.956e-04  -3.392
## yrsdiab         3.105e-03  5.523e-04   5.621
## fpg             1.691e-05  7.563e-05   0.224
## 
## Correlation of Fixed Effects:
##             (Intr) slfnyl mgltnd nphl_n rg_nsl othbl_ prmx_n female bsln_g
## sulfonylure -0.133                                                        
## meglitinide -0.068  0.006                                                 
## nphl_insuln -0.221  0.244 -0.021                                          
## reg_insulin  0.024  0.100  0.058 -0.109                                   
## othbol_nsln -0.004  0.058  0.045 -0.048 -0.064                            
## premix_nsln -0.063  0.158  0.002 -0.126  0.006  0.131                     
## female      -0.002 -0.001  0.022 -0.002 -0.016 -0.023 -0.010              
## baseline_ag -0.806  0.015  0.004  0.014 -0.006  0.000  0.012  0.014       
## BMI         -0.587  0.006  0.014 -0.008 -0.007 -0.011  0.005 -0.157  0.146
## yrsdiab      0.007 -0.030  0.019 -0.008 -0.016 -0.018 -0.015 -0.026 -0.216
## fpg         -0.232  0.035  0.134  0.149 -0.117 -0.009 -0.019  0.041  0.005
##             BMI    yrsdib
## sulfonylure              
## meglitinide              
## nphl_insuln              
## reg_insulin              
## othbol_nsln              
## premix_nsln              
## female                   
## baseline_ag              
## BMI                      
## yrsdiab      0.057       
## fpg         -0.013  0.003

Linear Mixed-Model: Primary

mod6 <- lmer(primary ~ sulfonylurea + meglitinide + nphl_insulin + reg_insulin  + othbol_insulin + premix_insulin + female + baseline_age + BMI + yrsdiab + fpg + (1|hba1c), data = data)
summary(mod6)
## Linear mixed model fit by REML ['lmerMod']
## Formula: primary ~ sulfonylurea + meglitinide + nphl_insulin + reg_insulin +  
##     othbol_insulin + premix_insulin + female + baseline_age +  
##     BMI + yrsdiab + fpg + (1 | hba1c)
##    Data: data
## 
## REML criterion at convergence: 6368.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -0.6826 -0.3113 -0.2210 -0.1388 28.1809 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev.
##  hba1c    (Intercept) 0.0001157 0.01076 
##  Residual             0.2396552 0.48955 
## Number of obs: 4455, groups:  hba1c, 87
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)     5.554e-02  9.400e-02   0.591
## sulfonylurea    2.555e-02  1.680e-02   1.521
## meglitinide    -3.042e-02  2.346e-02  -1.297
## nphl_insulin   -9.145e-03  2.084e-02  -0.439
## reg_insulin    -7.301e-03  2.765e-02  -0.264
## othbol_insulin -4.540e-02  5.323e-02  -0.853
## premix_insulin  3.032e-02  1.533e-02   1.977
## female          1.326e-02  1.525e-02   0.870
## baseline_age    1.862e-03  1.140e-03   1.633
## BMI            -4.225e-03  1.384e-03  -3.053
## yrsdiab         5.286e-03  9.606e-04   5.503
## fpg             3.926e-05  1.320e-04   0.297
## 
## Correlation of Fixed Effects:
##             (Intr) slfnyl mgltnd nphl_n rg_nsl othbl_ prmx_n female bsln_g
## sulfonylure -0.133                                                        
## meglitinide -0.068  0.006                                                 
## nphl_insuln -0.220  0.244 -0.021                                          
## reg_insulin  0.024  0.100  0.058 -0.109                                   
## othbol_nsln -0.004  0.058  0.045 -0.048 -0.064                            
## premix_nsln -0.063  0.158  0.002 -0.126  0.007  0.131                     
## female      -0.002 -0.002  0.022 -0.002 -0.017 -0.023 -0.011              
## baseline_ag -0.805  0.015  0.004  0.014 -0.006  0.000  0.012  0.014       
## BMI         -0.587  0.006  0.014 -0.008 -0.007 -0.011  0.005 -0.157  0.146
## yrsdiab      0.007 -0.030  0.019 -0.008 -0.016 -0.018 -0.015 -0.026 -0.216
## fpg         -0.233  0.035  0.133  0.147 -0.114 -0.009 -0.018  0.041  0.005
##             BMI    yrsdib
## sulfonylure              
## meglitinide              
## nphl_insuln              
## reg_insulin              
## othbol_nsln              
## premix_nsln              
## female                   
## baseline_ag              
## BMI                      
## yrsdiab      0.057       
## fpg         -0.012  0.003

Result discussion

The first model is Pooled model with all the independent variables from data1, data2, data 3, data 4 and dependent variables from data5. While, the second is fixed effect model with the same variables. In case of Pooled model, the sampling distribution of the OLS estimator in the pooled regression model is normal in large samples. In this case, each MaskId is allowed its own intercept, so that the slope on the Primary and Primary2 variables can be interpreted as the effect of a change in the other variables of within dataset. The model implicitly assumes that within each MaskID, the Primary and Primary2 have the same marginal impact on other variables. The variance of the estimates can be estimated and we can compute standard errors,t-statistics and confidence intervals for coefficients. As you can see, the estimated coeffients for the most of the independent variables are negative, where, the estimated value for VisitF24 is -0.00136914, for meglitinide is-0.03868340, for reg_insulin is -0.01375097, for BMI is -0.00461218 and hba1c is -0.00621048. Similarly, R suared is 0.023421 and P value is 1.766e-12 which is significant as compare to fixed effect model. Results show that the fixed effect model with proportional odds is not good than the null model as indicated by the p-value.

Similarly, in case of fixed effect model, the P value is less than or equal to 0.01 which is equal to the null hypothesis ans suggested that pooled model is better than the fixed effect model.

In both linear relationship plots, each point represents observations of Primary, Primary2 and all other independent variables for a given MaskID. The regression results indicate a positive relationship between the variables for all the MaskID's. However, a very weak uphil relationhsip exist in 1st Figure with dependent variable Primary. While, in 2nd Figure, it seems that it is near to no relationship, however, exist a very weak uphil relationship. Similarly, in the fixed effect model it is negative relationship. The estimated coefficient on Primary is nagative and not significantly at 5%. The same behaviour can be find in the estimators of fixed effect model, where most of the estimators have negative value.

Testing the models

To decide between fixed effect or pooled model you can run a Hausman test where the null hypothesis is that the preferred model is pooled model vs. the alternative. It basically tests whether the unique errors are correlated with the regressors, the null hypothesis is they are not. If the p-value is significant (for example <0.05) then use pooled model, if not use fixed effects.

phtest(mod1,mod2)
## 
##  Hausman Test
## 
## data:  primary2 ~ Visit + sulfonylurea + meglitinide + nphl_insulin +  ...
## chisq = 40.146, df = 15, p-value = 0.000431
## alternative hypothesis: one model is inconsistent
  • In the results, we can see that p value is less than 0.05 which is 0.000431. Which is showing that pooled model is better to used.
phtest(mod3,mod4)
## 
##  Hausman Test
## 
## data:  primary ~ Visit + sulfonylurea + meglitinide + nphl_insulin +  ...
## chisq = 40.146, df = 15, p-value = 0.000431
## alternative hypothesis: one model is inconsistent
  • In the results, we can see that p value is less than 0.05 which is 0.000431. Which is showing that pooled model is better to used.

Repeating the Dependent Variable

data4 <- data4[,-3] #Removing the repeating primary variable
fdata <- left_join(data4,data5,"MaskID")
fdata <- left_join(fdata,dataMerge, "MaskID")
head(fdata,2)
##   X.x MaskID female baseline_age Race edu      BMI yrsdiab X.y primary primary2
## 1   1 100001      0         60.8    3   4 35.91218      26   1       0        0
## 2   1 100001      0         60.8    3   4 35.91218      26   1       0        0
##   X Visit sulfonylurea meglitinide nphl_insulin reg_insulin la_insulin
## 1 1   BLR            0           0            1           0          1
## 2 2   F12            0           0            1           0          0
##   othbol_insulin premix_insulin hba1c fpg
## 1              1              0   8.7  96
## 2              1              0   7.7 163

Linear Mixed-Mode: Primary2

mod7 <- lmer(primary2 ~ sulfonylurea + meglitinide + nphl_insulin + reg_insulin  + othbol_insulin + premix_insulin + female + baseline_age + BMI + yrsdiab + fpg + (1|MaskID), data = fdata)
summary(mod7)
## Linear mixed model fit by REML ['lmerMod']
## Formula: primary2 ~ sulfonylurea + meglitinide + nphl_insulin + reg_insulin +  
##     othbol_insulin + premix_insulin + female + baseline_age +  
##     BMI + yrsdiab + fpg + (1 | MaskID)
##    Data: fdata
## 
## REML criterion at convergence: -398227.8
## 
## Scaled residuals: 
##        Min         1Q     Median         3Q        Max 
## -5.206e-05 -5.617e-06 -3.839e-06 -2.198e-06  2.545e-04 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. 
##  MaskID   (Intercept) 1.549e-02 1.245e-01
##  Residual             1.760e-11 4.195e-06
## Number of obs: 22664, groups:  MaskID, 4445
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)     1.423e-02  2.260e-02   0.630
## sulfonylurea   -1.565e-11  9.878e-08   0.000
## meglitinide     3.967e-12  1.248e-07   0.000
## nphl_insulin    5.118e-12  1.012e-07   0.000
## reg_insulin     5.963e-12  1.577e-07   0.000
## othbol_insulin -7.573e-12  2.443e-07   0.000
## premix_insulin -2.584e-12  7.044e-08   0.000
## female          1.630e-02  3.875e-03   4.206
## baseline_age    1.872e-03  2.898e-04   6.458
## BMI            -2.772e-03  3.518e-04  -7.881
## yrsdiab         3.111e-03  2.441e-04  12.746
## fpg             6.199e-15  6.191e-10   0.000
## 
## Correlation of Fixed Effects:
##             (Intr) slfnyl mgltnd nphl_n rg_nsl othbl_ prmx_n female bsln_g
## sulfonylure  0.000                                                        
## meglitinide  0.000  0.049                                                 
## nphl_insuln  0.000  0.188 -0.075                                          
## reg_insulin  0.000  0.054  0.044 -0.090                                   
## othbol_nsln  0.000  0.011  0.032 -0.056 -0.061                            
## premix_nsln  0.000  0.164  0.050 -0.252  0.073  0.108                     
## female       0.006  0.000  0.000  0.000  0.000  0.000  0.000              
## baseline_ag -0.845  0.000  0.000  0.000  0.000  0.000  0.000  0.013       
## BMI         -0.622  0.000  0.000  0.000  0.000  0.000  0.000 -0.157  0.144
## yrsdiab      0.004  0.000  0.000  0.000  0.000  0.000  0.000 -0.027 -0.217
## fpg          0.000  0.011  0.071  0.209 -0.107 -0.004 -0.015  0.000  0.000
##             BMI    yrsdib
## sulfonylure              
## meglitinide              
## nphl_insuln              
## reg_insulin              
## othbol_nsln              
## premix_nsln              
## female                   
## baseline_ag              
## BMI                      
## yrsdiab      0.056       
## fpg          0.000  0.000
## optimizer (nloptwrap) convergence code: -4 (NLOPT_ROUNDOFF_LIMITED: Roundoff errors led to a breakdown of the optimization algorithm. In this case, the returned minimum may still be useful. (e.g. this error occurs in NEWUOA if one tries to achieve a tolerance too close to machine precision.))
## unable to evaluate scaled gradient
## Model failed to converge: degenerate  Hessian with 1 negative eigenvalues

Linear Mixed-Model: Primary

mod8 <- lmer(primary ~ sulfonylurea + meglitinide + nphl_insulin + reg_insulin  + othbol_insulin + premix_insulin + female + baseline_age + BMI + yrsdiab + fpg + (1|MaskID), data = fdata)
summary(mod8)
## Linear mixed model fit by REML ['lmerMod']
## Formula: primary ~ sulfonylurea + meglitinide + nphl_insulin + reg_insulin +  
##     othbol_insulin + premix_insulin + female + baseline_age +  
##     BMI + yrsdiab + fpg + (1 | MaskID)
##    Data: fdata
## 
## REML criterion at convergence: -378259.3
## 
## Scaled residuals: 
##        Min         1Q     Median         3Q        Max 
## -3.849e-05 -3.980e-06 -2.650e-06 -1.400e-06  5.217e-04 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. 
##  MaskID   (Intercept) 4.706e-02 2.169e-01
##  Residual             4.018e-11 6.339e-06
## Number of obs: 22664, groups:  MaskID, 4445
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)     8.142e-02  3.930e-02   2.072
## sulfonylurea   -1.998e-11  1.493e-07   0.000
## meglitinide    -1.205e-12  1.886e-07   0.000
## nphl_insulin    6.553e-12  1.529e-07   0.000
## reg_insulin     6.012e-12  2.382e-07   0.000
## othbol_insulin -4.232e-12  3.691e-07   0.000
## premix_insulin -2.920e-12  1.064e-07   0.000
## female          1.317e-02  6.752e-03   1.950
## baseline_age    1.719e-03  5.043e-04   3.408
## BMI            -4.329e-03  6.126e-04  -7.067
## yrsdiab         5.338e-03  4.254e-04  12.546
## fpg             3.236e-14  9.355e-10   0.000
## 
## Correlation of Fixed Effects:
##             (Intr) slfnyl mgltnd nphl_n rg_nsl othbl_ prmx_n female bsln_g
## sulfonylure  0.000                                                        
## meglitinide  0.000  0.049                                                 
## nphl_insuln  0.000  0.188 -0.075                                          
## reg_insulin  0.000  0.054  0.044 -0.090                                   
## othbol_nsln  0.000  0.011  0.032 -0.056 -0.061                            
## premix_nsln  0.000  0.164  0.050 -0.252  0.073  0.108                     
## female       0.006  0.000  0.000  0.000  0.000  0.000  0.000              
## baseline_ag -0.845  0.000  0.000  0.000  0.000  0.000  0.000  0.013       
## BMI         -0.622  0.000  0.000  0.000  0.000  0.000  0.000 -0.157  0.142
## yrsdiab      0.004  0.000  0.000  0.000  0.000  0.000  0.000 -0.027 -0.217
## fpg          0.000  0.011  0.071  0.209 -0.107 -0.004 -0.015  0.000  0.000
##             BMI    yrsdib
## sulfonylure              
## meglitinide              
## nphl_insuln              
## reg_insulin              
## othbol_nsln              
## premix_nsln              
## female                   
## baseline_ag              
## BMI                      
## yrsdiab      0.056       
## fpg          0.000  0.000