0.1 Loading data

load(url("http://www.ms.ut.ee/mart/andmeteadus/births.Rdata"))
data=births

### Available variables 
names(births)
## [1] "id"      "bweight" "lowbw"   "gestwks" "preterm" "matage"  "hyp"    
## [8] "sex"
### dataset looks like (6 first rows)

head(births)
##   id bweight lowbw gestwks preterm matage hyp sex
## 1  1    2974     0   38.52       0     34   0   2
## 2  2    3270     0      NA      NA     30   0   1
## 3  3    2620     0   38.15       0     35   0   2
## 4  4    3751     0   39.80       0     31   0   1
## 5  5    3200     0   38.89       0     33   1   1
## 6  6    3673     0   40.97       0     33   0   2

0.2 define for catergorical variable

data$lowbw=as.factor(data$lowbw) ## Indicator for birth weight less than 2500 g.
data$preterm=as.factor(data$preterm) ## Indicator for gestation period less than 37 weeks
data$hyp=as.factor(data$hyp) ## Indicator for maternal hypertension
data$sex=as.factor(data$sex) ##Sex of baby: 1:Male, 2:Female.

0.3 loading packages need for making combined table

library(table1)
## Warning: package 'table1' was built under R version 4.0.4
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
library(compareGroups)
## Warning: package 'compareGroups' was built under R version 4.0.3

0.4 solving for each task

0.4.1 the first oneside table (solved the first question: 1. Table 1: descriptive statistics)

table1(~lowbw+preterm+hyp+sex+bweight+gestwks+matage, data=data)
Overall
(N=500)
lowbw
0 440 (88.0%)
1 60 (12.0%)
preterm
0 427 (85.4%)
1 63 (12.6%)
Missing 10 (2.0%)
hyp
0 428 (85.6%)
1 72 (14.4%)
sex
1 264 (52.8%)
2 236 (47.2%)
bweight
Mean (SD) 3140 (637)
Median [Min, Max] 3190 [628, 4550]
gestwks
Mean (SD) 38.7 (2.31)
Median [Min, Max] 39.1 [24.7, 43.2]
Missing 10 (2.0%)
matage
Mean (SD) 34.0 (3.90)
Median [Min, Max] 34.0 [23.0, 43.0]

0.4.2 2. Table 2: the results of the statistical analysis

0.4.2.1 part a+b

t1=compareGroups(lowbw~preterm+hyp+sex+bweight+gestwks+matage, data=data)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
####### In this result, we will see the first, second, and the 4th collums are presented the for option a, b, d of required task.
createTable(t1)
## 
## --------Summary descriptives table by 'lowbw'---------
## 
## __________________________________________ 
##               0           1      p.overall 
##             N=440       N=60               
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## preterm:                          <0.001   
##     0    406 (94.2%) 21 (35.6%)            
##     1    25 (5.80%)  38 (64.4%)            
## hyp:                              <0.001   
##     0    388 (88.2%) 40 (66.7%)            
##     1    52 (11.8%)  20 (33.3%)            
## sex:                               0.249   
##     1    237 (53.9%) 27 (45.0%)            
##     2    203 (46.1%) 33 (55.0%)            
## bweight  3308 (426)  1884 (522)   <0.001   
## gestwks  39.2 (1.45) 34.9 (3.53)  <0.001   
## matage   34.1 (3.90) 33.8 (3.87)   0.579   
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯

0.4.2.2 results for this command will be reflected the actual test and type of data for each variables. The tests have been used including: chi-square test and independent t-test.

t1   
## 
## 
## -------- Summary of results by groups of 'lowbw'---------
## 
## 
##   var     N   p.value  method            selection
## 1 preterm 490 <0.001** categorical       ALL      
## 2 hyp     500 <0.001** categorical       ALL      
## 3 sex     500 0.249    categorical       ALL      
## 4 bweight 500 <0.001** continuous normal ALL      
## 5 gestwks 490 <0.001** continuous normal ALL      
## 6 matage  500 0.579    continuous normal ALL      
## -----
## Signif. codes:  0 '**' 0.05 '*' 0.1 ' ' 1

0.4.3 3. figure

# Library
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

0.4.4 plot for this task reveals the mean value maternal age by term and preterm

p <- ggplot(data, aes(x=preterm , y=matage, fill=preterm )) + 
  geom_boxplot()+

labs(title="mean value maternal age by term (0) and preterm (1)", x="Indicator for birth weight less than 2500 g", y = "Maternal age (weeks) ")
p

LS0tDQp0aXRsZTogIkhvbWVfdzEiDQphdXRob3I6ICJEciBLaGFuIg0KZGF0ZTogIjMvMTYvMjAyMSINCm91dHB1dDoNCiAgaHRtbF9kb2N1bWVudDoNCiAgICBjb2RlX2Rvd25sb2FkOiB5ZXMNCiAgICBjb2RlX2ZvbGRpbmc6IGhpZGUNCiAgICBudW1iZXJfc2VjdGlvbnM6IHllcw0KICAgIHRoZW1lOiBqb3VybmFsDQogICAgdG9jOiB5ZXMNCiAgICB0b2NfZmxvYXQ6IHllcw0KICB3b3JkX2RvY3VtZW50Og0KICAgIA0KICAgIHRvYzogeWVzDQotLS0NCg0KIyMgTG9hZGluZyBkYXRhDQpgYGB7cn0NCmxvYWQodXJsKCJodHRwOi8vd3d3Lm1zLnV0LmVlL21hcnQvYW5kbWV0ZWFkdXMvYmlydGhzLlJkYXRhIikpDQpgYGANCg0KDQpgYGB7cn0NCmRhdGE9YmlydGhzDQoNCiMjIyBBdmFpbGFibGUgdmFyaWFibGVzIA0KbmFtZXMoYmlydGhzKQ0KDQojIyMgZGF0YXNldCBsb29rcyBsaWtlICg2IGZpcnN0IHJvd3MpDQoNCmhlYWQoYmlydGhzKQ0KYGBgDQoNCg0KIyMgZGVmaW5lIGZvciBjYXRlcmdvcmljYWwgdmFyaWFibGUNCg0KYGBge3J9DQpkYXRhJGxvd2J3PWFzLmZhY3RvcihkYXRhJGxvd2J3KSAjIyBJbmRpY2F0b3IgZm9yIGJpcnRoIHdlaWdodCBsZXNzIHRoYW4gMjUwMCBnLg0KZGF0YSRwcmV0ZXJtPWFzLmZhY3RvcihkYXRhJHByZXRlcm0pICMjIEluZGljYXRvciBmb3IgZ2VzdGF0aW9uIHBlcmlvZCBsZXNzIHRoYW4gMzcgd2Vla3MNCmRhdGEkaHlwPWFzLmZhY3RvcihkYXRhJGh5cCkgIyMgSW5kaWNhdG9yIGZvciBtYXRlcm5hbCBoeXBlcnRlbnNpb24NCmRhdGEkc2V4PWFzLmZhY3RvcihkYXRhJHNleCkgIyNTZXggb2YgYmFieTogMTpNYWxlLCAyOkZlbWFsZS4NCmBgYA0KDQoNCiMjIGxvYWRpbmcgcGFja2FnZXMgbmVlZCBmb3IgbWFraW5nIGNvbWJpbmVkIHRhYmxlDQoNCmBgYHtyfQ0KbGlicmFyeSh0YWJsZTEpDQpsaWJyYXJ5KGNvbXBhcmVHcm91cHMpDQpgYGANCg0KDQojIyBzb2x2aW5nIGZvciBlYWNoIHRhc2sNCg0KIyMjIHRoZSBmaXJzdCBvbmVzaWRlIHRhYmxlIChzb2x2ZWQgdGhlIGZpcnN0IHF1ZXN0aW9uOiAxLiBUYWJsZSAxOiBkZXNjcmlwdGl2ZSBzdGF0aXN0aWNzKQ0KDQpgYGB7cn0NCnRhYmxlMSh+bG93YncrcHJldGVybStoeXArc2V4K2J3ZWlnaHQrZ2VzdHdrcyttYXRhZ2UsIGRhdGE9ZGF0YSkNCmBgYA0KDQoNCg0KIyMjIDIuIFRhYmxlIDI6IHRoZSByZXN1bHRzIG9mIHRoZSBzdGF0aXN0aWNhbCBhbmFseXNpcw0KDQojIyMjIHBhcnQgYStiDQoNCmBgYHtyfQ0KDQp0MT1jb21wYXJlR3JvdXBzKGxvd2J3fnByZXRlcm0raHlwK3NleCtid2VpZ2h0K2dlc3R3a3MrbWF0YWdlLCBkYXRhPWRhdGEpDQoNCmBgYA0KDQpgYGB7cn0NCiMjIyMjIyMgSW4gdGhpcyByZXN1bHQsIHdlIHdpbGwgc2VlIHRoZSBmaXJzdCwgc2Vjb25kLCBhbmQgdGhlIDR0aCBjb2xsdW1zIGFyZSBwcmVzZW50ZWQgdGhlIGZvciBvcHRpb24gYSwgYiwgZCBvZiByZXF1aXJlZCB0YXNrLg0KY3JlYXRlVGFibGUodDEpDQoNCmBgYA0KDQojIyMjIHJlc3VsdHMgZm9yIHRoaXMgY29tbWFuZCB3aWxsIGJlIHJlZmxlY3RlZCB0aGUgYWN0dWFsIHRlc3QgYW5kIHR5cGUgb2YgZGF0YSBmb3IgZWFjaCB2YXJpYWJsZXMuIFRoZSB0ZXN0cyBoYXZlIGJlZW4gdXNlZCBpbmNsdWRpbmc6IGNoaS1zcXVhcmUgdGVzdCBhbmQgaW5kZXBlbmRlbnQgdC10ZXN0LiANCmBgYHtyfQ0KdDEgICANCmBgYA0KDQojIyMgMy4gZmlndXJlIA0KYGBge3J9DQojIExpYnJhcnkNCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkoZHBseXIpDQpgYGANCg0KIyMjIHBsb3QgZm9yIHRoaXMgdGFzayByZXZlYWxzIHRoZSBtZWFuIHZhbHVlIG1hdGVybmFsIGFnZSBieSB0ZXJtIGFuZCBwcmV0ZXJtDQpgYGB7cn0NCnAgPC0gZ2dwbG90KGRhdGEsIGFlcyh4PXByZXRlcm0gLCB5PW1hdGFnZSwgZmlsbD1wcmV0ZXJtICkpICsgDQogIGdlb21fYm94cGxvdCgpKw0KDQpsYWJzKHRpdGxlPSJtZWFuIHZhbHVlIG1hdGVybmFsIGFnZSBieSB0ZXJtICgwKSBhbmQgcHJldGVybSAoMSkiLCB4PSJJbmRpY2F0b3IgZm9yIGJpcnRoIHdlaWdodCBsZXNzIHRoYW4gMjUwMCBnIiwgeSA9ICJNYXRlcm5hbCBhZ2UgKHdlZWtzKSAiKQ0KcA0KYGBgDQoNCg0K