Moment R

Statistiques descriptives

Edmond Noack

2027-04-04

📊 Introduction

Cette présentation couvre les notions de base en statistiques descriptives avec R :

  • équivalent de la proc freq
  • équivalent de la proc means
  • équivalent de la proc summary

1- 📁 Chargement des packages

library(gmodels)
library(tidyverse) 
library(survey)   
library(questionr)
library(expss)
library(maditr)
library(janitor)
library(srvyr)

2- 📄 Exemple de jeu de données

Présentation avec une base de l’enquête EFE : Base_efe

head(Base_efe)
# A tibble: 6 × 1,010
  sirus_id  strate2023         taille7_bds secteur12 sirus_id_2 ech2020 expl2020
  <chr>     <chr>              <chr>       <chr>     <chr>      <chr>      <dbl>
1 423690874 2_02_NA_PME        3           02        423690874  1              1
2 423712934 5_10_AKTO_ETI      7           10        423712934  1              1
3 423728773 2_02_OPCOEP_PME    2           02        423728773  1              1
4 423748276 2_04_OpcoMobilite… 2           04        423748276  1              1
5 423754241 5_07_AFDAS_ETI     7           07        423754241  1             NA
6 423765684 5_10_AKTO_ETI      7           10        423765684  1              1
# ℹ 1,003 more variables: sirus_id_3 <chr>, ech2021 <chr>, expl2021 <dbl>,
#   sirus_id_4 <chr>, ech2022 <chr>, expl2022 <dbl>, sirus_id_5 <chr>,
#   poids_2023 <dbl>, sirus_id_6 <chr>, ech2023 <chr>, num_groupe <dbl>,
#   sirus_id_7 <chr>, sirus_mere <chr>, procedure <chr>, categorie <chr>,
#   questionnaire <chr>, cle_a2tot <dbl>, cle_c0tot <dbl>, cle_c1tot <dbl>,
#   cle_c3tot <dbl>, cle_rec1 <chr>, cle_rec6 <chr>, commentaire <chr>,
#   a2tot <dbl>, c0tot <dbl>, d2f5 <dbl>, d2f6 <dbl>, d2f7 <dbl>, …

3- 📊 Tableaux croisés simples sans package spécifique

⚠️ Attention ici on n’est pas en Dplyr

   
      01   02   03   04   05   06   07   08   09   10   11   12
  1  391  674  709 1091  253  397  300  468  924  368 1076  861
  2  299  448  275  616  199  225  189  333  385  316  350  375
  3  115  431  212  367  195  141  192  181  265  302  207  478
  4   62  502  498  372  467  249  431  125  538  535  396  596
  5    3  326   76  255  110   25   85   51  172  153   52  303
  6    3  163   40  121   55   19   46   43   94   85   25  188
  7    1  273   60  220   75   37   97  160  115  138   32  265

✅ Ecriture en Dplyr

⚠️ Attention pas hyper lisible car deux colonne il faut pivoter la base

Tab2 <- Base_efe |> 
  count(taille7_bds, secteur12)

print(Tab2)
# A tibble: 84 × 3
   taille7_bds secteur12     n
   <chr>       <chr>     <int>
 1 1           01          391
 2 1           02          674
 3 1           03          709
 4 1           04         1091
 5 1           05          253
 6 1           06          397
 7 1           07          300
 8 1           08          468
 9 1           09          924
10 1           10          368
# ℹ 74 more rows

✅ Ecriture en Dplyr

✅ Format tableau croisé

➡️ Utilisation de pivot_wider

Base_efe |> 
  count(taille7_bds, secteur12) |> 
  pivot_wider(
    names_from = secteur12, # nom des futures colonnes
    values_from = n, # valeurs des futures colonnes
    values_fill = 0 # si croisement n'existe pas mettre 0
  )
# A tibble: 7 × 13
  taille7_bds  `01`  `02`  `03`  `04`  `05`  `06`  `07`  `08`  `09`  `10`  `11`
  <chr>       <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 1             391   674   709  1091   253   397   300   468   924   368  1076
2 2             299   448   275   616   199   225   189   333   385   316   350
3 3             115   431   212   367   195   141   192   181   265   302   207
4 4              62   502   498   372   467   249   431   125   538   535   396
5 5               3   326    76   255   110    25    85    51   172   153    52
6 6               3   163    40   121    55    19    46    43    94    85    25
7 7               1   273    60   220    75    37    97   160   115   138    32
# ℹ 1 more variable: `12` <int>

✅ Pourcentage colonne

Base_efe |>
  group_by(secteur12) |> # on regroupe par colonne (ici secteur)
  count(taille7_bds, secteur12) |> # on compte les occurrences de croisement
  mutate(pct=round(100 * n / sum(n),1)) |> # on calcul le %
  select(-n)  |>   # on enlève les effectifs 
  pivot_wider(names_from = secteur12, # on pivote
              values_from = pct,
              values_fill = 0)
# A tibble: 7 × 13
  taille7_bds  `01`  `02`  `03`  `04`  `05`  `06`  `07`  `08`  `09`  `10`  `11`
  <chr>       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1            44.7  23.9  37.9  35.9  18.7  36.3  22.4  34.4  37.1  19.4  50.3
2 2            34.2  15.9  14.7  20.2  14.7  20.6  14.1  24.5  15.4  16.7  16.4
3 3            13.2  15.3  11.3  12.1  14.4  12.9  14.3  13.3  10.6  15.9   9.7
4 4             7.1  17.8  26.6  12.2  34.5  22.8  32.2   9.2  21.6  28.2  18.5
5 5             0.3  11.6   4.1   8.4   8.1   2.3   6.3   3.7   6.9   8.1   2.4
6 6             0.3   5.8   2.1   4     4.1   1.7   3.4   3.2   3.8   4.5   1.2
7 7             0.1   9.7   3.2   7.2   5.5   3.4   7.2  11.8   4.6   7.3   1.5
# ℹ 1 more variable: `12` <dbl>

✅ Pourcentage ligne

Base_efe |>
  group_by(taille7_bds) |> # on regroupe par ligne (ici taille)
  count(taille7_bds, secteur12) |> # on compte les occurrences de croisement
  mutate(pct=round(100 * n / sum(n),1)) |> # on calcul le %
  select(-n)  |>   # on enlève les effectifs 
  pivot_wider(names_from = secteur12, # on pivote
              values_from = pct,
              values_fill = 0)
# A tibble: 7 × 13
# Groups:   taille7_bds [7]
  taille7_bds  `01`  `02`  `03`  `04`  `05`  `06`  `07`  `08`  `09`  `10`  `11`
  <chr>       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1             5.2   9     9.4  14.5   3.4   5.3   4     6.2  12.3   4.9  14.3
2 2             7.5  11.2   6.9  15.4   5     5.6   4.7   8.3   9.6   7.9   8.7
3 3             3.7  14     6.9  11.9   6.3   4.6   6.2   5.9   8.6   9.8   6.7
4 4             1.3  10.5  10.4   7.8   9.8   5.2   9     2.6  11.3  11.2   8.3
5 5             0.2  20.2   4.7  15.8   6.8   1.6   5.3   3.2  10.7   9.5   3.2
6 6             0.3  18.5   4.5  13.7   6.2   2.2   5.2   4.9  10.7   9.6   2.8
7 7             0.1  18.5   4.1  14.9   5.1   2.5   6.6  10.9   7.8   9.4   2.2
# ℹ 1 more variable: `12` <dbl>

✅ Pourcentage total

Base_efe |>
  # On groupe pas
  count(taille7_bds, secteur12) |> # on compte les occurrences de croisement
  mutate(pct=round(100 * n / sum(n),1)) |> # on calcul le %
  select(-n)  |>   # on enlève les effectifs 
  pivot_wider(names_from = secteur12, # on pivote
              values_from = pct,
              values_fill = 0)
# A tibble: 7 × 13
  taille7_bds  `01`  `02`  `03`  `04`  `05`  `06`  `07`  `08`  `09`  `10`  `11`
  <chr>       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1             1.7   2.9   3     4.7   1.1   1.7   1.3   2     4     1.6   4.6
2 2             1.3   1.9   1.2   2.6   0.9   1     0.8   1.4   1.6   1.4   1.5
3 3             0.5   1.8   0.9   1.6   0.8   0.6   0.8   0.8   1.1   1.3   0.9
4 4             0.3   2.2   2.1   1.6   2     1.1   1.8   0.5   2.3   2.3   1.7
5 5             0     1.4   0.3   1.1   0.5   0.1   0.4   0.2   0.7   0.7   0.2
6 6             0     0.7   0.2   0.5   0.2   0.1   0.2   0.2   0.4   0.4   0.1
7 7             0     1.2   0.3   0.9   0.3   0.2   0.4   0.7   0.5   0.6   0.1
# ℹ 1 more variable: `12` <dbl>

🏋️ Avec Pondération

je rajoute wt = poids

Base_efe |>
  group_by(taille7_bds) |>  # on regroupe par ligne (ici taille)
  count(taille7_bds, secteur12, wt = poids_2023) |>  # on compte les occurrences avec les poids
  mutate(pct = round(100 * n / sum(n), 1)) |>  # on calcule le % pondéré
  select(-n) |>  # on enlève les effectifs
  pivot_wider(names_from = secteur12,  # on pivote
              values_from = pct,
              values_fill = 0)
# A tibble: 7 × 13
# Groups:   taille7_bds [7]
  taille7_bds  `01`  `02`  `03`  `04`  `05`  `06`  `07`  `08`  `09`  `10`  `11`
  <chr>       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1             6.1   6.8  12    17.6   3     6.8   3.9   6.6  13.8   6.1  10.4
2 2             1.7  10    11.4  18     4.7  12     3.7   2.6  13     8.7   6.8
3 3             1.2  12.9  10.5  13.8   6.7   9.1   5.7   2.3   9.1  10.9   4.6
4 4             0.7  18.2   5.5  17.1   7.7   5.6   4.5   1.3   8.1  10.9   4.5
5 5             0.1  24.9   5.4  15.7   5.2   1     5.7   2.6   9.5   7.6   2.9
6 6             0.3  23     4.2  12     6.1   2     4.5   4.7   9.9  12.2   2  
7 7             0.1  18.5   4.1  14.9   5.1   2.5   6.6  10.9   7.8   9.4   2.2
# ℹ 1 more variable: `12` <dbl>

4- 📊 Tableaux croisés simples avec questionr

Table_questionR <- wtd.table(Base_efe$secteur12, Base_efe$taille7_bds, weights = Base_efe$poids_2023)

print(Table_questionR)
              1            2            3            4            5
01 4.706185e+04 1.648329e+03 6.664322e+02 2.434912e+02 3.153846e+00
02 5.197706e+04 9.847583e+03 7.338618e+03 6.111454e+03 1.006553e+03
03 9.230302e+04 1.122976e+04 5.960385e+03 1.843610e+03 2.202504e+02
04 1.353282e+05 1.767356e+04 7.860906e+03 5.750496e+03 6.346673e+02
05 2.336567e+04 4.645477e+03 3.834528e+03 2.573192e+03 2.111989e+02
06 5.223819e+04 1.179397e+04 5.193111e+03 1.895287e+03 4.140476e+01
07 2.981856e+04 3.664893e+03 3.255340e+03 1.524095e+03 2.314540e+02
08 5.044481e+04 2.601404e+03 1.286382e+03 4.506342e+02 1.054844e+02
09 1.060156e+05 1.274408e+04 5.203057e+03 2.714119e+03 3.846817e+02
10 4.690996e+04 8.596791e+03 6.182807e+03 3.642339e+03 3.061103e+02
11 7.980305e+04 6.694437e+03 2.608386e+03 1.499829e+03 1.188944e+02
12 5.469811e+04 7.210291e+03 7.530242e+03 5.304700e+03 7.851645e+02
              6            7
01 4.333333e+00 1.000000e+00
02 3.724319e+02 2.730000e+02
03 6.856190e+01 6.000000e+01
04 1.950900e+02 2.200000e+02
05 9.942216e+01 7.500000e+01
06 3.300000e+01 3.700000e+01
07 7.337601e+01 9.700000e+01
08 7.685118e+01 1.600000e+02
09 1.601157e+02 1.150000e+02
10 1.971721e+02 1.380000e+02
11 3.290000e+01 3.200000e+01
12 3.061639e+02 2.650000e+02

⚠️ Attention on obtient une table et non un dataframe

Si on veut repasser en dataframe il faut transformer la table et la pivoter

Df_questionR <- as.data.frame(Table_questionR) |> 
  pivot_wider(
    names_from = Var1,  # Valeurs des colonnes (ici taille7_bds)
    values_from = Freq,        # Valeurs des fréquences pondérées
    values_fill = list(Freq = 0) # Remplacer les NA par 0
  )

Pourcentage ligne

Table_questionR <- wtd.table(Base_efe$secteur12, Base_efe$taille7_bds, weights = Base_efe$poids_2023)

# Convertir en pourcentages par ligne
Table_questionR_pct_ligne <- prop.table(Table_questionR, margin = 1) * 100

# Afficher le tableau avec des pourcentages
print(Table_questionR_pct_ligne)
              1            2            3            4            5
01 94.828103633  3.321328903  1.342839192  0.490626882  0.006354898
02 67.566994499 12.801255370  9.539753851  7.944516477  1.308457855
03 82.645414209 10.054801031  5.336753963  1.650714499  0.197205738
04 80.714445301 10.541126927  4.688519052  3.429796789  0.378537739
05 67.134075098 13.347349879 11.017335666  7.393275916  0.606815005
06 73.335318902 16.557135726  7.290422643  2.660725580  0.058126660
07 77.120852578  9.478649059  8.419406733  3.941823757  0.598618056
08 91.508920644  4.719051097  2.333548874  0.817468736  0.191353031
09 83.256161480 10.008172700  4.086062676  2.131451334  0.302098070
10 71.104594283 13.030735215  9.371697059  5.520938433  0.463992065
11 87.898988774  7.373581567  2.873004134  1.651985416  0.130956067
12 71.876934091  9.474798680  9.895235514  6.970726703  1.031757957
              6            7
01  0.008731526  0.002014968
02  0.484138667  0.354883281
03  0.061388319  0.053722240
04  0.116358509  0.131215683
05  0.285659005  0.215489430
06  0.046327516  0.051942973
07  0.189775102  0.250874715
08  0.139411152  0.290246467
09  0.125741979  0.090311761
10  0.298867047  0.209175898
11  0.036237673  0.035246369
12  0.402319554  0.348227502

Pourcentage colonne

Table_questionR_pct_col <- prop.table(Table_questionR, margin = 2) * 100

print(Table_questionR_pct_col)
             1           2           3           4           5           6
01  6.11221389  1.67597266  1.17081852  0.72568595  0.07789163  0.26758582
02  6.75058237 10.01273605 12.89281962 18.21419598 24.85920036 22.99788342
03 11.98796392 11.41809664 10.47147706  5.49458034  5.43960030  4.23373689
04 17.57590654 17.96995865 13.81039980 17.13841887 15.67459859 12.04692136
05  3.03464458  4.72338597  6.73667355  7.66898049  5.21605198  6.13937556
06  6.78449676 11.99176834  9.12349542  5.64859497  1.02258778  2.03776890
07  3.87272087  3.72635636  5.71912992  4.54231734  5.71629933  4.53101047
08  6.55157936  2.64503133  2.25997467  1.34304208  2.60518576  4.74560456
09 13.76890628 12.95780434  9.14096786  8.08899146  9.50061649  9.88723392
10  6.09248740  8.74096666 10.86223805 10.85539863  7.56011334 12.17549053
11 10.36451555  6.80670901  4.58253170  4.46999702  2.93637508  2.03159385
12  7.10398249  7.33121399 13.22947384 15.80979687 19.39147934 18.90579472
             7
01  0.06788866
02 18.53360489
03  4.07331976
04 14.93550577
05  5.09164969
06  2.51188052
07  6.58520027
08 10.86218601
09  7.80719620
10  9.36863544
11  2.17243720
12 17.99049559

Pourcentage total

Table_questionR_pct_tot <- prop.table(Table_questionR) * 100

print(Table_questionR_pct_tot)
              1            2            3            4            5
01 4.872183e+00 1.706469e-01 6.899387e-02 2.520797e-02 3.265089e-04
02 5.381041e+00 1.019493e+00 7.597467e-01 6.327019e-01 1.042057e-01
03 9.555875e+00 1.162586e+00 6.170621e-01 1.908638e-01 2.280191e-02
04 1.401015e+01 1.829694e+00 8.138178e-01 5.953329e-01 6.570534e-02
05 2.418983e+00 4.809333e-01 3.969780e-01 2.663954e-01 2.186483e-02
06 5.408074e+00 1.220997e+00 5.376284e-01 1.962138e-01 4.286520e-03
07 3.087033e+00 3.794162e-01 3.370163e-01 1.577853e-01 2.396179e-02
08 5.222411e+00 2.693161e-01 1.331755e-01 4.665291e-02 1.092051e-02
09 1.097550e+01 1.319359e+00 5.386580e-01 2.809852e-01 3.982502e-02
10 4.856458e+00 8.900019e-01 6.400888e-01 3.770812e-01 3.169075e-02
11 8.261788e+00 6.930565e-01 2.700389e-01 1.552731e-01 1.230880e-02
12 5.662744e+00 7.464614e-01 7.795850e-01 5.491809e-01 8.128589e-02
              6            7
01 4.486180e-04 1.035272e-04
02 3.855684e-02 2.826293e-02
03 7.098023e-03 6.211633e-03
04 2.019713e-02 2.277599e-02
05 1.029290e-02 7.764542e-03
06 3.416398e-03 3.830507e-03
07 7.596414e-03 1.004214e-02
08 7.956189e-03 1.656436e-02
09 1.657633e-02 1.190563e-02
10 2.041268e-02 1.428676e-02
11 3.406046e-03 3.312871e-03
12 3.169630e-02 2.743471e-02

Avec somme ligne colonne

Table_questionR_with_margins <- addmargins(Table_questionR_pct_col)
print(Table_questionR_with_margins)
               1            2            3            4            5
01    6.11221389   1.67597266   1.17081852   0.72568595   0.07789163
02    6.75058237  10.01273605  12.89281962  18.21419598  24.85920036
03   11.98796392  11.41809664  10.47147706   5.49458034   5.43960030
04   17.57590654  17.96995865  13.81039980  17.13841887  15.67459859
05    3.03464458   4.72338597   6.73667355   7.66898049   5.21605198
06    6.78449676  11.99176834   9.12349542   5.64859497   1.02258778
07    3.87272087   3.72635636   5.71912992   4.54231734   5.71629933
08    6.55157936   2.64503133   2.25997467   1.34304208   2.60518576
09   13.76890628  12.95780434   9.14096786   8.08899146   9.50061649
10    6.09248740   8.74096666  10.86223805  10.85539863   7.56011334
11   10.36451555   6.80670901   4.58253170   4.46999702   2.93637508
12    7.10398249   7.33121399  13.22947384  15.80979687  19.39147934
Sum 100.00000000 100.00000000 100.00000000 100.00000000 100.00000000
               6            7          Sum
01    0.26758582   0.06788866  10.09805713
02   22.99788342  18.53360489 114.26102268
03    4.23373689   4.07331976  53.11877491
04   12.04692136  14.93550577 109.15170960
05    6.13937556   5.09164969  38.61076182
06    2.03776890   2.51188052  39.12059269
07    4.53101047   6.58520027  34.69303456
08    4.74560456  10.86218601  31.01260378
09    9.88723392   7.80719620  71.15171656
10   12.17549053   9.36863544  65.65533004
11    2.03159385   2.17243720  33.36415939
12   18.90579472  17.99049559  99.76223684
Sum 100.00000000 100.00000000 700.00000000

5- 📊 Tableaux croisés simples avec janitor

pourcentage ligne

Base_efe %>%
  tabyl(naf_interim, naf_asso)  |> 
  adorn_percentages("row") |> 
  adorn_pct_formatting()
 naf_interim     0     1
           0 83.9% 16.1%
           1 98.9%  1.1%

Pourcentage colonne

Base_efe %>%
  tabyl(naf_interim, naf_asso)  |> 
  adorn_percentages("col") |> 
  adorn_pct_formatting()
 naf_interim     0     1
           0 99.1% 99.9%
           1  0.9%  0.1%

Pourcentage colonne avec somme

Base_efe %>%
  tabyl(naf_interim, naf_asso) %>%
  adorn_totals("row") %>%
  adorn_totals("col") %>%
  adorn_percentages("col") %>%
  adorn_pct_formatting()
 naf_interim      0      1  Total
           0  99.1%  99.9%  99.3%
           1   0.9%   0.1%   0.7%
       Total 100.0% 100.0% 100.0%

Pourcentage ligne avec somme

Base_efe %>%
  tabyl(naf_interim, naf_asso) %>%
  adorn_totals("row") %>%
  adorn_totals("col") %>%
  adorn_percentages("row") %>%
  adorn_pct_formatting()
 naf_interim     0     1  Total
           0 83.9% 16.1% 100.0%
           1 98.9%  1.1% 100.0%
       Total 84.0% 16.0% 100.0%

6- 📊 Tableaux croisés simples avec gmodels

le plus proche de SAS

CrossTable(Base_efe$naf_interim, Base_efe$naf_asso,
           prop.r = TRUE, prop.c = TRUE, prop.t = TRUE, chisq = TRUE)

 
   Cell Contents
|-------------------------|
|                       N |
| Chi-square contribution |
|           N / Row Total |
|           N / Col Total |
|         N / Table Total |
|-------------------------|

 
Total Observations in Table:  23345 

 
                     | Base_efe$naf_asso 
Base_efe$naf_interim |         0 |         1 | Row Total | 
---------------------|-----------|-----------|-----------|
                   0 |     19448 |      3723 |     23171 | 
                     |     0.034 |     0.180 |           | 
                     |     0.839 |     0.161 |     0.993 | 
                     |     0.991 |     0.999 |           | 
                     |     0.833 |     0.159 |           | 
---------------------|-----------|-----------|-----------|
                   1 |       172 |         2 |       174 | 
                     |     4.539 |    23.908 |           | 
                     |     0.989 |     0.011 |     0.007 | 
                     |     0.009 |     0.001 |           | 
                     |     0.007 |     0.000 |           | 
---------------------|-----------|-----------|-----------|
        Column Total |     19620 |      3725 |     23345 | 
                     |     0.840 |     0.160 |           | 
---------------------|-----------|-----------|-----------|

 
Statistics for All Table Factors


Pearson's Chi-squared test 
------------------------------------------------------------
Chi^2 =  28.66078     d.f. =  1     p =  8.623242e-08 

Pearson's Chi-squared test with Yates' continuity correction 
------------------------------------------------------------
Chi^2 =  27.55914     d.f. =  1     p =  1.523635e-07 

 

Avec Pondération

# Table pondérée
table_pond <- xtabs(poids_2023 ~ naf_interim + naf_asso, data = Base_efe)

# CrossTable dessus
CrossTable(table_pond,
           prop.r = TRUE, prop.c = TRUE, prop.t = TRUE, chisq = TRUE)

 
   Cell Contents
|-------------------------|
|                       N |
| Chi-square contribution |
|           N / Row Total |
|           N / Col Total |
|         N / Table Total |
|-------------------------|

 
Total Observations in Table:  965929.5 

 
             | naf_asso 
 naf_interim |                0 |                1 |        Row Total | 
-------------|------------------|------------------|------------------|
           0 |           873275 |            88725 |           962000 | 
             |            0.143 |            1.418 |                  | 
             |            0.908 |            0.092 |            0.996 | 
             |            0.996 |            1.000 |                  | 
             |            0.904 |            0.092 |                  | 
-------------|------------------|------------------|------------------|
           1 |             3922 |                6 |             3928 | 
             |           35.116 |          347.152 |                  | 
             |            0.998 |            0.002 |            0.004 | 
             |            0.004 |            0.000 |                  | 
             |            0.004 |            0.000 |                  | 
-------------|------------------|------------------|------------------|
Column Total |           877197 |            88732 |           965929 | 
             |            0.908 |            0.092 |                  | 
-------------|------------------|------------------|------------------|

 
Statistics for All Table Factors


Pearson's Chi-squared test 
------------------------------------------------------------
Chi^2 =  383.8294     d.f. =  1     p =  1.824954e-85 

Pearson's Chi-squared test with Yates' continuity correction 
------------------------------------------------------------
Chi^2 =  382.7458     d.f. =  1     p =  3.141672e-85 

 

7- 📊 Tableaux croisés simples avec survey

des <- svydesign(
  ids = ~1,              # Pas de grappes, donc on met ~1
  data = Base_efe,       # Utilise ton dataframe
  weights = ~poids_2023        # Pondération individuelle pour chaque observation
)

# Table pondérée (comptage des observations pour 'naf_interim' et 'naf_asso')
freq_table <- svytable(~ naf_interim + naf_asso, design = des)
prop.table(freq_table, margin = 2) *100
           naf_asso
naf_interim            0            1
          0 99.552891348 99.992164747
          1  0.447108652  0.007835253
addmargins(prop.table(freq_table, margin = 1) * 100)
           naf_asso
naf_interim           0           1         Sum
        0    90.7770215   9.2229785 100.0000000
        1    99.8230486   0.1769514 100.0000000
        Sum 190.6000702   9.3999298 200.0000000

8- 📊 Moyenne, Médiane, Quantile sans package spéciques

Base_efe %>%
  group_by(naf_interim) %>%
  summarise(
    moyenne = mean(a2tot, na.rm = TRUE),     
    mediane = median(a2tot, na.rm = TRUE),  
    q1 = quantile(a2tot, 0.25, na.rm = TRUE), 
    q3 = quantile(a2tot, 0.75, na.rm = TRUE), 
    min = min(a2tot, na.rm = TRUE),           
    max = max(a2tot, na.rm = TRUE)            
  )
# A tibble: 2 × 7
  naf_interim moyenne mediane    q1    q3   min     max
        <int>   <dbl>   <dbl> <dbl> <dbl> <dbl>   <dbl>
1           0    468.      19   5     91      1 4798211
2           1   2633.      91  48.8  253.     1  121336

9- 📊 Moyenne, Médiane, Quantile avec survey

       mean     SE
a2tot 49.49 32.417
$a2tot
    quantile ci.2.5 ci.97.5        se
0.1        1      1       2 0.2550935
0.5        3      3       4 0.2550935
0.9       19     19      20 0.2550935

attr(,"hasci")
[1] TRUE
attr(,"class")
[1] "newsvyquantile"
# A tibble: 2 × 9
  naf_asso moyenne moyenne_se mediane_q10 mediane_q50 mediane_q75 mediane_q10_se
     <int>   <dbl>      <dbl>       <dbl>       <dbl>       <dbl>          <dbl>
1        0    52.6     35.7             1           3           7          0.255
2        1    18.8      0.862           1           3          10          0.255
# ℹ 2 more variables: mediane_q50_se <dbl>, mediane_q75_se <dbl>