# Analyse statistique du fichier trial
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(labelled)
library(questionr)
library(gtsummary)
theme_gtsummary_language("fr", decimal.mark = "," ,big.mark = "" )
## Setting theme `language: fr`
#Importation des donnees ----
trial <- readxl::read_excel("donnees/trial.xlsx")
var_label(trial$age) <- "Age de l'enqueté"
save(trial, file = "donnees/trial.RData")
#Statistique univarié -----
#Quantitative
mean(trial$age, na.rm = TRUE)
## [1] 47.2381
median(trial$age, na.rm = TRUE)
## [1] 47
quantile(trial$age, na.rm = TRUE)
## 0% 25% 50% 75% 100%
## 6 38 47 57 83
min(trial$age, na.rm = TRUE)
## [1] 6
max(trial$age, na.rm = TRUE)
## [1] 83
var(trial$age, na.rm = TRUE)
## [1] 204.8313
sd(trial$age, na.rm = TRUE)
## [1] 14.31193
#Variable categorielle
freq(trial$age)
## n % val%
## 6 1 0.5 0.5
## 9 1 0.5 0.5
## 10 1 0.5 0.5
## 17 1 0.5 0.5
## 19 2 1.0 1.1
## 20 1 0.5 0.5
## 21 2 1.0 1.1
## 23 1 0.5 0.5
## 25 2 1.0 1.1
## 26 2 1.0 1.1
## 27 1 0.5 0.5
## 28 2 1.0 1.1
## 30 1 0.5 0.5
## 31 7 3.5 3.7
## 32 2 1.0 1.1
## 33 2 1.0 1.1
## 34 6 3.0 3.2
## 35 2 1.0 1.1
## 36 5 2.5 2.6
## 37 4 2.0 2.1
## 38 7 3.5 3.7
## 39 5 2.5 2.6
## 40 2 1.0 1.1
## 41 3 1.5 1.6
## 42 4 2.0 2.1
## 43 7 3.5 3.7
## 44 6 3.0 3.2
## 45 6 3.0 3.2
## 46 3 1.5 1.6
## 47 7 3.5 3.7
## 48 7 3.5 3.7
## 49 6 3.0 3.2
## 50 4 2.0 2.1
## 51 6 3.0 3.2
## 52 4 2.0 2.1
## 53 6 3.0 3.2
## 54 5 2.5 2.6
## 55 2 1.0 1.1
## 56 3 1.5 1.6
## 57 5 2.5 2.6
## 58 3 1.5 1.6
## 59 1 0.5 0.5
## 60 4 2.0 2.1
## 61 5 2.5 2.6
## 62 1 0.5 0.5
## 63 4 2.0 2.1
## 64 1 0.5 0.5
## 65 3 1.5 1.6
## 66 4 2.0 2.1
## 67 4 2.0 2.1
## 68 3 1.5 1.6
## 69 2 1.0 1.1
## 70 1 0.5 0.5
## 71 3 1.5 1.6
## 74 1 0.5 0.5
## 75 1 0.5 0.5
## 76 2 1.0 1.1
## 78 1 0.5 0.5
## 83 1 0.5 0.5
## NA 11 5.5 NA
table(trial$age) %>% sort()
##
## 6 9 10 17 20 23 27 30 59 62 64 70 74 75 78 83 19 21 25 26 28 32 33 35 40 55
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2
## 69 76 41 46 56 58 65 68 71 37 42 50 52 60 63 66 67 36 39 54 57 61 34 44 45 49
## 2 2 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 5 5 5 5 5 6 6 6 6
## 51 53 31 38 43 47 48
## 6 6 7 7 7 7 7
#Tableau descriptif -----
tbl_summary(trial)
| Caractéristique |
N = 200 |
| trt |
|
| Drug A |
98 (49%) |
| Drug B |
102 (51%) |
| Age de l'enqueté |
47 (38 – 57) |
| Manquant |
11 |
| marker |
0,64 (0,22 – 1,39) |
| Manquant |
10 |
| stage |
|
| T1 |
53 (26%) |
| T2 |
54 (27%) |
| T3 |
43 (22%) |
| T4 |
50 (25%) |
| grade |
|
| I |
68 (34%) |
| II |
68 (34%) |
| III |
64 (32%) |
| response |
61 (32%) |
| Manquant |
7 |
| death |
112 (56%) |
| ttdeath |
22,4 (16,0 – 24,0) |
#Statistique bivariée
ggplot(trial) +
aes(x = stage, fill = trt, weight = age) +
geom_bar() +
scale_fill_hue(direction = 1) +
labs(
title = "Titre du graphique",
subtitle = "Sous-titre",
caption = "enquete 2021"
) +
theme_minimal()
