#library(gstat)
#library(spacetime)
library(vegan)

## Loading required package: permute

## Loading required package: lattice

## This is vegan 2.6-4

library(GGally)

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 4.3.3

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

library(ggplot2)
library(ggthemes)
library(tidyverse)

## Warning: package 'lubridate' was built under R version 4.3.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
library(tidyr)
library(readxl)
library(openxlsx)

## Warning: package 'openxlsx' was built under R version 4.3.3

library(sf)

## Warning: package 'sf' was built under R version 4.3.3

## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE

library(readxl)
library(lubridate)
library(mgcv)

## Loading required package: nlme

## Warning: package 'nlme' was built under R version 4.3.3

## 
## Attaching package: 'nlme'
## 
## The following object is masked from 'package:dplyr':
## 
##     collapse
## 
## This is mgcv 1.9-1. For overview type 'help("mgcv-package")'.

library(mice)

## 
## Attaching package: 'mice'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     cbind, rbind

library(reshape2)

## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths

library(caret)

## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
## 
## The following object is masked from 'package:vegan':
## 
##     tolerance

library(stringr)
#library(rgdal)
#library(automap)
library(MapGAM)

## Loading required package: sp

## Warning: package 'sp' was built under R version 4.3.3

## Loading required package: gam
## Loading required package: splines
## Loading required package: foreach
## 
## Attaching package: 'foreach'
## 
## The following objects are masked from 'package:purrr':
## 
##     accumulate, when
## 
## Loaded gam 1.22-3
## 
## 
## Attaching package: 'gam'
## 
## The following objects are masked from 'package:mgcv':
## 
##     gam, gam.control, gam.fit, s
## 
## Loading required package: survival
## 
## Attaching package: 'survival'
## 
## The following object is masked from 'package:caret':
## 
##     cluster

#library(gstat)
library(forecast)

## Warning: package 'forecast' was built under R version 4.3.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo 
## 
## Attaching package: 'forecast'
## 
## The following object is masked from 'package:nlme':
## 
##     getResponse

library(zoo)

## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

#library(flextable)
library(parallel)
library(cowplot)

## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:lubridate':
## 
##     stamp
## 
## The following object is masked from 'package:ggthemes':
## 
##     theme_map

library(fields)

## Warning: package 'fields' was built under R version 4.3.3

## Loading required package: spam
## Spam version 2.10-0 (2023-10-23) is loaded.
## Type 'help( Spam)' or 'demo( spam)' for a short introduction 
## and overview of this package.
## Help for individual functions is also obtained by adding the
## suffix '.spam' to the function name, e.g. 'help( chol.spam)'.
## 
## Attaching package: 'spam'
## 
## The following objects are masked from 'package:base':
## 
##     backsolve, forwardsolve
## 
## Loading required package: viridisLite
## 
## Try help(fields) to get started.

library(FRK)

## 
## Attaching package: 'FRK'
## 
## The following object is masked from 'package:stats':
## 
##     simulate

library(xtable)

## 
## Attaching package: 'xtable'
## 
## The following object is masked from 'package:spam':
## 
##     display

library(leaps)

## Warning: package 'leaps' was built under R version 4.3.3

library(lmtest) 
library(nlme)
library(maps)

## 
## Attaching package: 'maps'
## 
## The following object is masked from 'package:purrr':
## 
##     map

library(animation)
library(rstatix)

## 
## Attaching package: 'rstatix'
## 
## The following object is masked from 'package:stats':
## 
##     filter

library(ggmap)

## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
##   Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service/>
##   OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles/>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
## 
## Attaching package: 'ggmap'
## 
## 
## The following object is masked from 'package:cowplot':
## 
##     theme_nothing

library(sf)
library(leaflet)

## 
## Attaching package: 'leaflet'
## 
## The following object is masked from 'package:fields':
## 
##     addLegend

#Base Accessibility and Affordability

Acces=read_excel("Acces.xlsx")

## New names:
## • `` -> `...1`

Afford=read_excel("Afford.xlsx")

## New names:
## • `` -> `...1`

Variables1=names(Acces)
Variables2=names(Afford)

#DEPENDENT VARIABLES Accessibility UNMET_ACCES Affordability UNMET_AFFORD

#INDEPENDENT VARIABLES Country (Country), AGE_group, Education_new, Employment_new, Marital status (Marstat), Degree of urbanization (Degurb) Body Mass Index (BMI_group), Smoking,Phisycal activity (PA_MET_group), Self-perceived health status (SPHS), Limitation in activities because of health problems (Limit)

table(Acces$INCOME_new)

## 
##     1     2     3     4     5 
## 23597 25159 23764 23586 22122

table(Acces$COUNTRY)

## 
##    AT    BE    BG    CY    CZ    DE    DK    EE    EL    ES    FI    FR    HR 
##  6745  3100  2653  2598  3847  9643  1710  2144  2907  9568  2063  6648  1458 
##    HU    IE    IS    IT    LT    LU    LV    MT    NL    NO    PL    PT    RO 
##  2781   880  1274 14446  2238  1151  2776  1936  2297  3593  6655  7096  7171 
##    SE    SI    SK 
##  2611  3692  2547

table(Acces$AGE_group)

## 
##  Adolescents Adults older Adults young      Elderly 
##         4258        41199        33108        39663

table(Acces$Education_new)

## 
##   Primary Secondary  Tertiary 
##     38797     44598     34833

table(Acces$Employment_new)

## 
##   Employed    Retired Unemployed 
##      50362      38792      29074

table(Acces$Marstat)

## 
## Divorced  Married    Never  Widowed 
##    11424    59608    26887    20309

table(Acces$Degurb)

## 
##            Cities       Rural areas Towns and suburbs 
##             43809             35874             38545

table(Acces$BMI_new)

## 
## Normal weight         Obese    Overweight 
##         59735         21068         37425

table(Acces$Smoking_new)

## 
##      Daily No smoking Occasional 
##      15997      97741       4490

table(Acces$PA_MET_group)

## 
##     High Inactive      Low Moderate 
##    29041    17324    20256    51607

table(Acces$SPHS)

## 
##       Bad      Fair      Good Very good 
##     13432     34429     47600     22767

table(Acces$Limit)

## 
##          Limited      Not limited Severely limited 
##            30032            77308            10888

table(Acces$Nutrition)

## 
## Insufficient     Moderate   Sufficient 
##        12466        23803        81959

#Extraction of Accessibility Data

Data=Acces%>%dplyr::select("UNMET_ACCES","COUNTRY","AGE_group","Education_new","Employment_new","Marstat","Degurb","BMI_new","Smoking_new","PA_MET_group","SPHS","Limit","INCOME_new","Nutrition")

Analysis of the distribution of participant by Country

###Acces

Count_df <- as.data.frame(table(Acces$COUNTRY))
names(Count_df) <- c("COUNTRY", "Count")

Count_df2<- as.data.frame(table(Afford$COUNTRY))
names(Count_df2) <- c("COUNTRY", "Count")

ggplot(Count_df, aes(x = Count, y = COUNTRY, fill = COUNTRY)) +
  geom_col() +
  theme_minimal() +
  labs(x = "Participant in Acces Data", y = "Country") +
  theme(legend.position = "none")  # Cache la légende si elle est redondante

#Analysing the global distribution of UNMET_ACCES

library(ggplot2)
df=Data
# Distribution cible
ggplot(Data, aes(x=UNMET_ACCES)) + geom_bar() + theme_minimal()

##Proportion by country of UNMET_ACCES

ggplot(df, aes(x=COUNTRY, fill=UNMET_ACCES)) + geom_bar(position="fill") +
  ylab("Proportion by country") + theme_minimal()

## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

UNMET_ACCES accordin to the BMI_new

ggplot(df, aes(x=BMI_new, fill=UNMET_ACCES)) + geom_bar(position="fill") +
  ylab("Proportion of UNMET_ACCES wrt BMI ") + theme_minimal()

## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

ggplot(df, aes(x=Marstat, fill=UNMET_ACCES)) + geom_bar(position="fill") +
  ylab("Proportion of UNMET_ACCES wrt Marstat ") + theme_minimal()

## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

ggplot(df, aes(x=Degurb, fill=UNMET_ACCES)) + geom_bar(position="fill") +
  ylab("Proportion of UNMET_ACCES wrt Degurb") + theme_minimal()

## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

ggplot(df, aes(x=Smoking_new, fill=UNMET_ACCES)) + geom_bar(position="fill") +
  ylab("Proportion of UNMET_ACCES wrt Smoking_new") + theme_minimal()

## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

ggplot(df, aes(x=PA_MET_group, fill=UNMET_ACCES)) + geom_bar(position="fill") +
  ylab("Proportion of UNMET_ACCES wrt PA_MET_group") + theme_minimal()

## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

ggplot(df, aes(x=SPHS, fill=UNMET_ACCES)) + geom_bar(position="fill") +
  ylab("Proportion of UNMET_ACCES wrt SPHS") + theme_minimal()

## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

ggplot(df, aes(x=Education_new, fill=UNMET_ACCES)) + geom_bar(position="fill") +
  ylab("Proportion of UNMET_ACCES wrt Education_new") + theme_minimal()

## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

ggplot(df, aes(x=Employment_new, fill=UNMET_ACCES)) + geom_bar(position="fill") +
  ylab("Proportion of UNMET_ACCES wrt Employment_new") + theme_minimal()

## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

##Proportion by Age group

ggplot(df, aes(x = COUNTRY, fill = UNMET_ACCES)) + 
  geom_bar(position = "fill") +
  ylab("Proportion by Age group") + 
  theme_minimal() +
  facet_wrap(~ AGE_group)

## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

ggplot(df, aes(x = COUNTRY, fill = UNMET_ACCES)) + 
  geom_bar(position = position_dodge()) +
  ylab("Count by country") +
  theme_minimal() +
  facet_wrap(~ AGE_group)

## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

library(ggplot2)
library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

# Liste des variables catégorielles à analyser
vars <- c("Marstat", "Degurb", "BMI_new", "Smoking_new", "Nutrition", 
          "PA_MET_group", "SPHS", "Limit", "AGE_group", "Education_new", "Employment_new")

vars1 <- c("Marstat", "Degurb", "Smoking_new", "Nutrition")
# Génération des graphiques à barres empilées en proportion
plots <- lapply(vars1, function(v) {
  ggplot(df, aes_string(x = v, fill = "UNMET_ACCES")) +
    geom_bar(position = "fill") +
    ylab("Proportion") +
    xlab(v) +
    scale_fill_manual(values = c("0" = "#619CFF", "1" = "#F8766D"), name = "UNMET_ACCES") +
    theme_minimal() +
    ggtitle(paste("Impact de", v, "sur UNMET_ACCES")) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
})

## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Afficher les graphiques 3 par 3 (ajuster selon ta résolution)
gridExtra::grid.arrange(grobs = plots, ncol = 3)

## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.

## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.

## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.

## Warning: The following aesthetics were dropped during statistical transformation: fill.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.

library(randomForest)

## Warning: package 'randomForest' was built under R version 4.3.3

## randomForest 4.7-1.2

## Type rfNews() to see new features/changes/bug fixes.

## 
## Attaching package: 'randomForest'

## The following object is masked from 'package:gridExtra':
## 
##     combine

## The following object is masked from 'package:dplyr':
## 
##     combine

## The following object is masked from 'package:ggplot2':
## 
##     margin

library(ggplot2)
library(dplyr)

# Variables mises à jour
vars <- c("Marstat", "Degurb", "BMI_new", "Smoking_new", "Nutrition", 
          "PA_MET_group", "SPHS", "Limit", "AGE_group", "Education_new", "Employment_new")

# Sous-ensemble du jeu de données
df_rf <- df %>%
  select(all_of(c("UNMET_ACCES", vars))) %>%
  mutate(UNMET_ACCES = factor(UNMET_ACCES))

# Supprimer les lignes incomplètes
df_rf <- na.omit(df_rf)

# set.seed(123)  # Pour reproductibilité
# 
# rf_model <- randomForest(UNMET_ACCES ~ ., data = df_rf, importance = TRUE, ntree = 500)

# Extraire l’importance des variables
# importance_df <- data.frame(
#   Variable = rownames(rf_model$importance),
#   MeanDecreaseGini = rf_model$importance[, "MeanDecreaseGini"]
# )
# 
# # Trier par ordre décroissant
# importance_df <- importance_df %>%
#   arrange(desc(MeanDecreaseGini))
# 
# # Afficher le graphique
# ggplot(importance_df, aes(x = reorder(Variable, MeanDecreaseGini), y = MeanDecreaseGini)) +
#   geom_col(fill = "steelblue") +
#   coord_flip() +
#   labs(title = " Variable Importance - Random Forest Models",
#        y = "Mean Decrease in Gini",
#        x = "Variable") +
#   theme_minimal()

# Régression logistique
mod <- glm(UNMET_ACCES ~ AGE_group + Education_new + Employment_new + Marstat + Degurb + BMI_new + Smoking_new + PA_MET_group, data = Acces, family = binomial())

# Résumé du modèle
summary(mod)

## 
## Call:
## glm(formula = UNMET_ACCES ~ AGE_group + Education_new + Employment_new + 
##     Marstat + Degurb + BMI_new + Smoking_new + PA_MET_group, 
##     family = binomial(), data = Acces)
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -1.573533   0.058260 -27.009  < 2e-16 ***
## AGE_groupAdults older     0.466604   0.048727   9.576  < 2e-16 ***
## AGE_groupAdults young     0.466930   0.047409   9.849  < 2e-16 ***
## AGE_groupElderly          0.406750   0.052175   7.796 6.40e-15 ***
## Education_newSecondary   -0.086211   0.018507  -4.658 3.19e-06 ***
## Education_newTertiary     0.065120   0.020991   3.102 0.001920 ** 
## Employment_newRetired    -0.061562   0.026532  -2.320 0.020328 *  
## Employment_newUnemployed  0.152623   0.019597   7.788 6.81e-15 ***
## MarstatMarried           -0.228785   0.024431  -9.365  < 2e-16 ***
## MarstatNever             -0.091608   0.028512  -3.213 0.001314 ** 
## MarstatWidowed           -0.167599   0.030175  -5.554 2.79e-08 ***
## DegurbRural areas        -0.056135   0.017816  -3.151 0.001628 ** 
## DegurbTowns and suburbs  -0.052586   0.017249  -3.049 0.002299 ** 
## BMI_newObese              0.271690   0.019690  13.798  < 2e-16 ***
## BMI_newOverweight         0.064515   0.016872   3.824 0.000131 ***
## Smoking_newNo smoking    -0.108852   0.020922  -5.203 1.96e-07 ***
## Smoking_newOccasional     0.076875   0.039872   1.928 0.053849 .  
## PA_MET_groupInactive      0.155962   0.024484   6.370 1.89e-10 ***
## PA_MET_groupLow           0.060932   0.023045   2.644 0.008193 ** 
## PA_MET_groupModerate     -0.005156   0.018417  -0.280 0.779519    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 122022  on 118227  degrees of freedom
## Residual deviance: 121279  on 118208  degrees of freedom
## AIC: 121319
## 
## Number of Fisher Scoring iterations: 4

##Odds ratios et intervalles de confiance

# library(broom)
# # Odds ratios
# tidy(mod, exponentiate = TRUE, conf.int = TRUE)

#Évaluation du modèle : ROC, AUC

# library(pROC)
# 
# # Probabilités prédites
# probs <- predict(mod, type = "response")
# 
# # ROC curve
# roc_obj <- roc(Acces$UNMET_ACCES, probs)
# plot(roc_obj, col = "blue", main = "Courbe ROC")
# auc(roc_obj)

# Modèle avec interaction entre Age_group et Education_new
modele_inter <- glm(UNMET_ACCES ~ AGE_group * Education_new + Employment_new + BMI_new,
                    data = Acces, family = binomial)
summary(modele_inter)

## 
## Call:
## glm(formula = UNMET_ACCES ~ AGE_group * Education_new + Employment_new + 
##     BMI_new, family = binomial, data = Acces)
## 
## Coefficients:
##                                              Estimate Std. Error z value
## (Intercept)                                  -1.87103    0.05263 -35.549
## AGE_groupAdults older                         0.54280    0.05536   9.806
## AGE_groupAdults young                         0.41849    0.06239   6.707
## AGE_groupElderly                              0.48317    0.05570   8.674
## Education_newSecondary                        0.25917    0.09475   2.735
## Education_newTertiary                         0.51708    0.28084   1.841
## Employment_newRetired                        -0.06903    0.02664  -2.592
## Employment_newUnemployed                      0.15578    0.01969   7.912
## BMI_newObese                                  0.27212    0.01955  13.919
## BMI_newOverweight                             0.05452    0.01680   3.245
## AGE_groupAdults older:Education_newSecondary -0.36497    0.09924  -3.678
## AGE_groupAdults young:Education_newSecondary -0.27596    0.10403  -2.653
## AGE_groupElderly:Education_newSecondary      -0.36758    0.09899  -3.713
## AGE_groupAdults older:Education_newTertiary  -0.53590    0.28264  -1.896
## AGE_groupAdults young:Education_newTertiary  -0.28466    0.28394  -1.003
## AGE_groupElderly:Education_newTertiary       -0.58749    0.28317  -2.075
##                                              Pr(>|z|)    
## (Intercept)                                   < 2e-16 ***
## AGE_groupAdults older                         < 2e-16 ***
## AGE_groupAdults young                        1.98e-11 ***
## AGE_groupElderly                              < 2e-16 ***
## Education_newSecondary                       0.006233 ** 
## Education_newTertiary                        0.065600 .  
## Employment_newRetired                        0.009555 ** 
## Employment_newUnemployed                     2.54e-15 ***
## BMI_newObese                                  < 2e-16 ***
## BMI_newOverweight                            0.001176 ** 
## AGE_groupAdults older:Education_newSecondary 0.000235 ***
## AGE_groupAdults young:Education_newSecondary 0.007984 ** 
## AGE_groupElderly:Education_newSecondary      0.000204 ***
## AGE_groupAdults older:Education_newTertiary  0.057951 .  
## AGE_groupAdults young:Education_newTertiary  0.316079    
## AGE_groupElderly:Education_newTertiary       0.038015 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 122022  on 118227  degrees of freedom
## Residual deviance: 121474  on 118212  degrees of freedom
## AIC: 121506
## 
## Number of Fisher Scoring iterations: 4

# install.packages("interactions")
# library(interactions)
# 
# # Visualiser l'interaction dans un modèle glm
# interact_plot(modele_inter, pred = "AGE_group", modx = "Education_new")

# Sélection de variables avec interactions
# Modèle complet avec toutes les variables et interactions choisies
modele_complet <- glm(UNMET_ACCES ~ (AGE_group + Education_new + Employment_new + BMI_new)^2, 
                      data = Acces, family = binomial)

# Sélection backward sur AIC
modele_sel <- step(modele_complet, direction = "backward")

## Start:  AIC=121466.8
## UNMET_ACCES ~ (AGE_group + Education_new + Employment_new + BMI_new)^2
## 
##                                Df Deviance    AIC
## - Education_new:BMI_new         4   121389 121461
## <none>                              121387 121467
## - Employment_new:BMI_new        4   121396 121468
## - AGE_group:BMI_new             6   121403 121471
## - AGE_group:Education_new       6   121414 121482
## - Education_new:Employment_new  4   121411 121483
## - AGE_group:Employment_new      6   121432 121500
## 
## Step:  AIC=121461.1
## UNMET_ACCES ~ AGE_group + Education_new + Employment_new + BMI_new + 
##     AGE_group:Education_new + AGE_group:Employment_new + AGE_group:BMI_new + 
##     Education_new:Employment_new + Employment_new:BMI_new
## 
##                                Df Deviance    AIC
## <none>                              121389 121461
## - Employment_new:BMI_new        4   121399 121463
## - AGE_group:BMI_new             6   121405 121465
## - AGE_group:Education_new       6   121416 121476
## - Education_new:Employment_new  4   121413 121477
## - AGE_group:Employment_new      6   121435 121495

summary(modele_sel)

## 
## Call:
## glm(formula = UNMET_ACCES ~ AGE_group + Education_new + Employment_new + 
##     BMI_new + AGE_group:Education_new + AGE_group:Employment_new + 
##     AGE_group:BMI_new + Education_new:Employment_new + Employment_new:BMI_new, 
##     family = binomial, data = Acces)
## 
## Coefficients:
##                                                 Estimate Std. Error z value
## (Intercept)                                     -1.19237    0.13698  -8.705
## AGE_groupAdults older                           -0.08359    0.13902  -0.601
## AGE_groupAdults young                           -0.16801    0.14190  -1.184
## AGE_groupElderly                                -0.26495    0.16280  -1.627
## Education_newSecondary                           0.04238    0.10498   0.404
## Education_newTertiary                            0.19695    0.28585   0.689
## Employment_newRetired                           -8.53843   36.19875  -0.236
## Employment_newUnemployed                        -0.59375    0.13599  -4.366
## BMI_newObese                                     0.26868    0.21768   1.234
## BMI_newOverweight                                0.30055    0.12785   2.351
## AGE_groupAdults older:Education_newSecondary    -0.22939    0.10292  -2.229
## AGE_groupAdults young:Education_newSecondary    -0.14996    0.10726  -1.398
## AGE_groupElderly:Education_newSecondary         -0.30824    0.11330  -2.721
## AGE_groupAdults older:Education_newTertiary     -0.28909    0.28562  -1.012
## AGE_groupAdults young:Education_newTertiary     -0.07872    0.28652  -0.275
## AGE_groupElderly:Education_newTertiary          -0.38208    0.29280  -1.305
## AGE_groupAdults older:Employment_newRetired      8.44276   36.19876   0.233
## AGE_groupAdults young:Employment_newRetired      8.42364   36.20022   0.233
## AGE_groupElderly:Employment_newRetired           8.54085   36.19883   0.236
## AGE_groupAdults older:Employment_newUnemployed   0.73253    0.13804   5.306
## AGE_groupAdults young:Employment_newUnemployed   0.53350    0.13883   3.843
## AGE_groupElderly:Employment_newUnemployed        0.78340    0.16004   4.895
## AGE_groupAdults older:BMI_newObese              -0.04999    0.21717  -0.230
## AGE_groupAdults young:BMI_newObese               0.09112    0.21877   0.417
## AGE_groupElderly:BMI_newObese                    0.05272    0.22202   0.237
## AGE_groupAdults older:BMI_newOverweight         -0.23869    0.12744  -1.873
## AGE_groupAdults young:BMI_newOverweight         -0.20559    0.12879  -1.596
## AGE_groupElderly:BMI_newOverweight              -0.08663    0.13343  -0.649
## Education_newSecondary:Employment_newRetired     0.16268    0.06670   2.439
## Education_newTertiary:Employment_newRetired      0.11705    0.07973   1.468
## Education_newSecondary:Employment_newUnemployed  0.17399    0.04949   3.515
## Education_newTertiary:Employment_newUnemployed   0.26128    0.05569   4.692
## Employment_newRetired:BMI_newObese              -0.02948    0.06895  -0.428
## Employment_newUnemployed:BMI_newObese           -0.05005    0.05158  -0.970
## Employment_newRetired:BMI_newOverweight         -0.17799    0.06120  -2.908
## Employment_newUnemployed:BMI_newOverweight      -0.07241    0.04471  -1.620
##                                                 Pr(>|z|)    
## (Intercept)                                      < 2e-16 ***
## AGE_groupAdults older                           0.547654    
## AGE_groupAdults young                           0.236413    
## AGE_groupElderly                                0.103637    
## Education_newSecondary                          0.686443    
## Education_newTertiary                           0.490840    
## Employment_newRetired                           0.813529    
## Employment_newUnemployed                        1.26e-05 ***
## BMI_newObese                                    0.217096    
## BMI_newOverweight                               0.018731 *  
## AGE_groupAdults older:Education_newSecondary    0.025825 *  
## AGE_groupAdults young:Education_newSecondary    0.162064    
## AGE_groupElderly:Education_newSecondary         0.006517 ** 
## AGE_groupAdults older:Education_newTertiary     0.311475    
## AGE_groupAdults young:Education_newTertiary     0.783517    
## AGE_groupElderly:Education_newTertiary          0.191913    
## AGE_groupAdults older:Employment_newRetired     0.815580    
## AGE_groupAdults young:Employment_newRetired     0.815998    
## AGE_groupElderly:Employment_newRetired          0.813477    
## AGE_groupAdults older:Employment_newUnemployed  1.12e-07 ***
## AGE_groupAdults young:Employment_newUnemployed  0.000122 ***
## AGE_groupElderly:Employment_newUnemployed       9.83e-07 ***
## AGE_groupAdults older:BMI_newObese              0.817933    
## AGE_groupAdults young:BMI_newObese              0.677021    
## AGE_groupElderly:BMI_newObese                   0.812296    
## AGE_groupAdults older:BMI_newOverweight         0.061075 .  
## AGE_groupAdults young:BMI_newOverweight         0.110421    
## AGE_groupElderly:BMI_newOverweight              0.516174    
## Education_newSecondary:Employment_newRetired    0.014729 *  
## Education_newTertiary:Employment_newRetired     0.142078    
## Education_newSecondary:Employment_newUnemployed 0.000439 ***
## Education_newTertiary:Employment_newUnemployed  2.71e-06 ***
## Employment_newRetired:BMI_newObese              0.668945    
## Employment_newUnemployed:BMI_newObese           0.331860    
## Employment_newRetired:BMI_newOverweight         0.003635 ** 
## Employment_newUnemployed:BMI_newOverweight      0.105302    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 122022  on 118227  degrees of freedom
## Residual deviance: 121389  on 118192  degrees of freedom
## AIC: 121461
## 
## Number of Fisher Scoring iterations: 8

library(glmnet)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following object is masked from 'package:spam':
## 
##     det

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## Loaded glmnet 4.1-8

# Création de la matrice de design avec interactions
X <- model.matrix(UNMET_ACCES ~ (AGE_group + Education_new + Employment_new + BMI_new)^2, data = Acces)[,-1]
y <- Acces$UNMET_ACCES

# Modèle LASSO pénalisé
cvfit <- cv.glmnet(X, y, family = "binomial", alpha = 1)

# Coefficients sélectionnés au lambda optimal
coef(cvfit, s = "lambda.min")

## 40 x 1 sparse Matrix of class "dgCMatrix"
##                                                           s1
## (Intercept)                                     -1.443459733
## AGE_groupAdults older                            0.098580335
## AGE_groupAdults young                            0.056519526
## AGE_groupElderly                                 .          
## Education_newSecondary                          -0.051515152
## Education_newTertiary                            .          
## Employment_newRetired                            .          
## Employment_newUnemployed                        -0.175753642
## BMI_newObese                                     0.221837610
## BMI_newOverweight                                0.048507708
## AGE_groupAdults older:Education_newSecondary    -0.040302407
## AGE_groupAdults young:Education_newSecondary     .          
## AGE_groupElderly:Education_newSecondary         -0.049927872
## AGE_groupAdults older:Education_newTertiary     -0.022726564
## AGE_groupAdults young:Education_newTertiary      0.146460445
## AGE_groupElderly:Education_newTertiary          -0.081918056
## AGE_groupAdults older:Employment_newRetired     -0.012455237
## AGE_groupAdults young:Employment_newRetired      .          
## AGE_groupElderly:Employment_newRetired           .          
## AGE_groupAdults older:Employment_newUnemployed   0.342690253
## AGE_groupAdults young:Employment_newUnemployed   0.150576528
## AGE_groupElderly:Employment_newUnemployed        0.345231444
## AGE_groupAdults older:BMI_newObese               .          
## AGE_groupAdults young:BMI_newObese               0.117194874
## AGE_groupElderly:BMI_newObese                    0.048930453
## AGE_groupAdults older:BMI_newOverweight         -0.006452737
## AGE_groupAdults young:BMI_newOverweight          0.003610472
## AGE_groupElderly:BMI_newOverweight               0.056913825
## Education_newSecondary:Employment_newRetired     .          
## Education_newTertiary:Employment_newRetired      .          
## Education_newSecondary:Employment_newUnemployed  0.089245482
## Education_newTertiary:Employment_newUnemployed   0.195947150
## Education_newSecondary:BMI_newObese             -0.015365110
## Education_newTertiary:BMI_newObese               .          
## Education_newSecondary:BMI_newOverweight        -0.005236298
## Education_newTertiary:BMI_newOverweight          0.035045803
## Employment_newRetired:BMI_newObese               0.007260170
## Employment_newUnemployed:BMI_newObese           -0.002897701
## Employment_newRetired:BMI_newOverweight         -0.084719563
## Employment_newUnemployed:BMI_newOverweight       .

# packages nécessaires
#install.packages(c("glmnet", "ggplot2"))
library(glmnet)
library(ggplot2)

# Exemple avec un dataframe df, variable cible UNMET_ACESS
# Préparer la matrice de design avec interactions 2 à 2 :
X <- model.matrix(UNMET_ACCES ~ (AGE_group + Education_new + Employment_new + BMI_new)^2, data = Acces)[,-1]
y <- Acces$UNMET_ACCES

# Ajuster Lasso (régression logistique)
cvfit <- cv.glmnet(X, y, family = "binomial", alpha = 1)

# Extraire coefficients au lambda.min
coef_lasso <- coef(cvfit, s = "lambda.min")
# Convertir en dataframe
coef_df <- data.frame(
  variable = rownames(coef_lasso),
  coefficient = as.numeric(coef_lasso)
)
# Retirer l'intercept
coef_df <- coef_df[coef_df$variable != "(Intercept)", ]

# Ne garder que les coefficients non nuls
coef_df <- coef_df[coef_df$coefficient != 0, ]

# Ordre décroissant de l'importance absolue
coef_df$importance <- abs(coef_df$coefficient)
coef_df <- coef_df[order(coef_df$importance, decreasing = TRUE), ]

# Graphique avec ggplot2
ggplot(coef_df, aes(x = reorder(variable, importance), y = coefficient, fill = coefficient > 0)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(title = "Importance des variables par Lasso (coefficients non nuls)",
       x = "Variable",
       y = "Coefficient") +
  theme_minimal() +
  scale_fill_manual(name = "Signe", values = c("TRUE" = "blue", "FALSE" = "red"),
                    labels = c("Négatif", "Positif"))

ggplot(Count_df2, aes(x = Count, y = COUNTRY, fill = COUNTRY)) +
  geom_col() +
  theme_minimal() +
  labs(x = "Participant in Afford Data", y = "Country") +
  theme(legend.position = "none")  # Cache la légende si elle est redondante

Acces %>%  ggplot(aes(x = "", AGE_group, col = COUNTRY)) + 
  geom_boxplot() + facet_wrap(~AGE_group, scales = "free")

Acces %>% ggplot(aes(UNMET_ACCES,AGE_group)) + geom_col(position = "dodge") + #scale_x_date(date_labels = "%B") +
  xlab("") + ylab("") + ggtitle("") + theme_bw()

Acces %>% ggplot(aes(UNMET_ACCES, AGE_group, fill = AGE_group)) + geom_col() + ylab("")

library(corrplot)

## corrplot 0.92 loaded

library(GGally)
library(knitr)
library(lattice)
library(ggpubr)

## 
## Attaching package: 'ggpubr'

## The following object is masked from 'package:cowplot':
## 
##     get_legend

## The following object is masked from 'package:forecast':
## 
##     gghistogram

library("FactoMineR")
library("factoextra")

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

library(PBSmapping)

## 
## -----------------------------------------------------------
## PBS Mapping 2.73.4 -- Copyright (C) 2003-2025 Fisheries and Oceans Canada
## 
## PBS Mapping comes with ABSOLUTELY NO WARRANTY;
## for details see the file COPYING.
## This is free software, and you are welcome to redistribute
## it under certain conditions, as outlined in the above file.
## 
## A complete user guide 'PBSmapping-UG.pdf' is located at 
## /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/PBSmapping/doc/PBSmapping-UG.pdf
## 
## Packaged on 2023-11-03
## Pacific Biological Station, Nanaimo
## 
## All available PBS packages can be found at
## https://github.com/pbs-software
## 
## To see demos, type '.PBSfigs()'.
## Note: function 'importShapefile()' is temporarily unavailable.
## -----------------------------------------------------------

Analyse factorielle

Pourcentage variance expliquee

# quali.var <- get_famd_var(res.famd, "quali.var")
# #quali.var
# quali.var$coord

# quali.var <- get_famd_var(res.famd, "quali.var")
# #quali.var
# quali.var$coord

MIEUX VOIR LES CONTRIBUTIONS POUR LES COMMENTAIRES

REPRESENTATION CLUSTERS

# resCAH <-HCPC(res.famd, graph = FALSE)
# Cluster=resCAH[["data.clust"]][["clust"]]
# X$Classe=Cluster
# #Y=cbind(coordinat,Y)
# fviz_cluster(resCAH,repel = TRUE, show.clust.cent = TRUE, # Show cluster centers
#              palette = "jco",ggtheme = theme_minimal(),main = "Factor map",geom = "point"
# )

#summary(Cluster)

Data-EHIS

Solym Manou-Abi

2024-06-12