# Analyse statistique du fichier trial

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(labelled)
library(questionr)
library(gtsummary)
theme_gtsummary_language("fr", decimal.mark = "," ,big.mark = "" )
## Setting theme `language: fr`
#Importation des donnees ----

trial <- readxl::read_excel("donnees/trial.xlsx")
var_label(trial$age) <- "Age de l'enqueté"
save(trial, file = "donnees/trial.RData")

#Statistique univarié -----

#Quantitative

mean(trial$age, na.rm = TRUE)
## [1] 47.2381
median(trial$age, na.rm = TRUE)
## [1] 47
quantile(trial$age, na.rm = TRUE)
##   0%  25%  50%  75% 100% 
##    6   38   47   57   83
min(trial$age, na.rm = TRUE)
## [1] 6
max(trial$age, na.rm = TRUE)
## [1] 83
var(trial$age, na.rm = TRUE)
## [1] 204.8313
sd(trial$age, na.rm = TRUE)
## [1] 14.31193
#Variable categorielle
freq(trial$age)
##     n   % val%
## 6   1 0.5  0.5
## 9   1 0.5  0.5
## 10  1 0.5  0.5
## 17  1 0.5  0.5
## 19  2 1.0  1.1
## 20  1 0.5  0.5
## 21  2 1.0  1.1
## 23  1 0.5  0.5
## 25  2 1.0  1.1
## 26  2 1.0  1.1
## 27  1 0.5  0.5
## 28  2 1.0  1.1
## 30  1 0.5  0.5
## 31  7 3.5  3.7
## 32  2 1.0  1.1
## 33  2 1.0  1.1
## 34  6 3.0  3.2
## 35  2 1.0  1.1
## 36  5 2.5  2.6
## 37  4 2.0  2.1
## 38  7 3.5  3.7
## 39  5 2.5  2.6
## 40  2 1.0  1.1
## 41  3 1.5  1.6
## 42  4 2.0  2.1
## 43  7 3.5  3.7
## 44  6 3.0  3.2
## 45  6 3.0  3.2
## 46  3 1.5  1.6
## 47  7 3.5  3.7
## 48  7 3.5  3.7
## 49  6 3.0  3.2
## 50  4 2.0  2.1
## 51  6 3.0  3.2
## 52  4 2.0  2.1
## 53  6 3.0  3.2
## 54  5 2.5  2.6
## 55  2 1.0  1.1
## 56  3 1.5  1.6
## 57  5 2.5  2.6
## 58  3 1.5  1.6
## 59  1 0.5  0.5
## 60  4 2.0  2.1
## 61  5 2.5  2.6
## 62  1 0.5  0.5
## 63  4 2.0  2.1
## 64  1 0.5  0.5
## 65  3 1.5  1.6
## 66  4 2.0  2.1
## 67  4 2.0  2.1
## 68  3 1.5  1.6
## 69  2 1.0  1.1
## 70  1 0.5  0.5
## 71  3 1.5  1.6
## 74  1 0.5  0.5
## 75  1 0.5  0.5
## 76  2 1.0  1.1
## 78  1 0.5  0.5
## 83  1 0.5  0.5
## NA 11 5.5   NA
table(trial$age) %>% sort()
## 
##  6  9 10 17 20 23 27 30 59 62 64 70 74 75 78 83 19 21 25 26 28 32 33 35 40 55 
##  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  2  2  2  2  2  2  2  2  2  2 
## 69 76 41 46 56 58 65 68 71 37 42 50 52 60 63 66 67 36 39 54 57 61 34 44 45 49 
##  2  2  3  3  3  3  3  3  3  4  4  4  4  4  4  4  4  5  5  5  5  5  6  6  6  6 
## 51 53 31 38 43 47 48 
##  6  6  7  7  7  7  7
#Tableau descriptif -----
tbl_summary(trial)
Caractéristique N = 2001
trt
Drug A 98 (49%)
Drug B 102 (51%)
Age de l'enqueté 47 (38 – 57)
Manquant 11
marker 0,64 (0,22 – 1,39)
Manquant 10
stage
T1 53 (26%)
T2 54 (27%)
T3 43 (22%)
T4 50 (25%)
grade
I 68 (34%)
II 68 (34%)
III 64 (32%)
response 61 (32%)
Manquant 7
death 112 (56%)
ttdeath 22,4 (16,0 – 24,0)
1 n (%); Médiane (EI)
#Statistique bivariée

ggplot(trial) +
  aes(x = stage, fill = trt, weight = age) +
  geom_bar() +
  scale_fill_hue(direction = 1) +
  labs(
    title = "Titre du graphique",
    subtitle = "Sous-titre",
    caption = "enquete 2021"
  ) +
  theme_minimal()