## Warning: le package 'tidyverse' a été compilé avec la version R 4.5.3
## Warning: le package 'ggplot2' a été compilé avec la version R 4.5.3
## Warning: le package 'tibble' a été compilé avec la version R 4.5.3
## Warning: le package 'tidyr' a été compilé avec la version R 4.5.3
## Warning: le package 'readr' a été compilé avec la version R 4.5.3
## Warning: le package 'dplyr' a été compilé avec la version R 4.5.3
## Warning: le package 'stringr' a été compilé avec la version R 4.5.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(FactoMineR) # Analyses factorielles (ACP, etc.)
library(factoextra) # Visualisation des résultats d'ACP## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
## Warning: le package 'corrplot' a été compilé avec la version R 4.5.3
## corrplot 0.95 loaded
## Warning: le package 'data.table' a été compilé avec la version R 4.5.3
##
## Attachement du package : 'data.table'
##
## Les objets suivants sont masqués depuis 'package:lubridate':
##
## hour, isoweek, isoyear, mday, minute, month, quarter, second, wday,
## week, yday, year
##
## Les objets suivants sont masqués depuis 'package:dplyr':
##
## between, first, last
##
## L'objet suivant est masqué depuis 'package:purrr':
##
## transpose
## Warning: le package 'psych' a été compilé avec la version R 4.5.3
##
## Attachement du package : 'psych'
##
## Les objets suivants sont masqués depuis 'package:ggplot2':
##
## %+%, alpha
## Warning: le package 'ggrepel' a été compilé avec la version R 4.5.3
## Warning: le package 'haven' a été compilé avec la version R 4.5.3
my_data <- read_dta("C:/Users/USER/Documents/Data/guns_crimes.dta")
### Vérification de la structure des données
str(my_data)## tibble [1,173 × 14] (S3: tbl_df/tbl/data.frame)
## $ year : num [1:1173] 1977 1978 1979 1980 1981 ...
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ violent : num [1:1173] 414 419 413 448 470 ...
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ murder : num [1:1173] 14.2 13.3 13.2 13.2 11.9 ...
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ robbery : num [1:1173] 96.8 99.1 109.5 132.1 126.5 ...
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ prisoners : num [1:1173] 83 94 144 141 149 183 215 243 256 267 ...
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ afam : num [1:1173] 8.38 8.35 8.33 8.41 8.48 ...
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ cauc : num [1:1173] 55.1 55.1 55.1 54.9 54.9 ...
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ male : num [1:1173] 18.2 18 17.8 17.7 17.7 ...
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ population: num [1:1173] 3.78 3.83 3.87 3.9 3.92 ...
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ income : num [1:1173] 9563 9932 9877 9541 9548 ...
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ density : num [1:1173] 0.0746 0.0756 0.0762 0.0768 0.0772 ...
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ state : chr [1:1173] "Alabama" "Alabama" "Alabama" "Alabama" ...
## ..- attr(*, "format.stata")= chr "%20s"
## $ law : chr [1:1173] "no" "no" "no" "no" ...
## ..- attr(*, "format.stata")= chr "%9s"
## $ Etat : dbl+lbl [1:1173] 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## ..@ format.stata: chr "%20.0g"
## ..@ labels : Named num [1:51] 1 2 3 4 5 6 7 8 9 10 ...
## .. ..- attr(*, "names")= chr [1:51] "Alabama" "Alaska" "Arizona" "Arkansas" ...
## # A tibble: 1,173 × 14
## year violent murder robbery prisoners afam cauc male population income
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1977 414. 14.2 96.8 83 8.38 55.1 18.2 3.78 9563.
## 2 1978 419. 13.3 99.1 94 8.35 55.1 18.0 3.83 9932
## 3 1979 413. 13.2 110. 144 8.33 55.1 17.8 3.87 9877.
## 4 1980 448. 13.2 132. 141 8.41 54.9 17.7 3.90 9541.
## 5 1981 470. 11.9 126. 149 8.48 54.9 17.7 3.92 9548.
## 6 1982 448. 10.6 112 183 8.51 54.9 17.5 3.93 9479.
## 7 1983 416 9.20 98.4 215 8.55 54.8 17.4 3.93 9783
## 8 1984 431. 9.40 96.1 243 8.56 54.8 17.1 3.95 10357.
## 9 1985 458. 9.80 105. 256 8.56 54.7 16.9 3.97 10726.
## 10 1986 558 10.1 112. 267 8.57 54.5 16.6 3.99 11092.
## # ℹ 1,163 more rows
## # ℹ 4 more variables: density <dbl>, state <chr>, law <chr>, Etat <dbl+lbl>
## tibble [1,173 × 14] (S3: tbl_df/tbl/data.frame)
## $ year : num [1:1173] 1977 1978 1979 1980 1981 ...
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ violent : num [1:1173] 414 419 413 448 470 ...
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ murder : num [1:1173] 14.2 13.3 13.2 13.2 11.9 ...
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ robbery : num [1:1173] 96.8 99.1 109.5 132.1 126.5 ...
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ prisoners : num [1:1173] 83 94 144 141 149 183 215 243 256 267 ...
## ..- attr(*, "format.stata")= chr "%8.0g"
## $ afam : num [1:1173] 8.38 8.35 8.33 8.41 8.48 ...
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ cauc : num [1:1173] 55.1 55.1 55.1 54.9 54.9 ...
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ male : num [1:1173] 18.2 18 17.8 17.7 17.7 ...
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ population: num [1:1173] 3.78 3.83 3.87 3.9 3.92 ...
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ income : num [1:1173] 9563 9932 9877 9541 9548 ...
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ density : num [1:1173] 0.0746 0.0756 0.0762 0.0768 0.0772 ...
## ..- attr(*, "format.stata")= chr "%9.0g"
## $ state : chr [1:1173] "Alabama" "Alabama" "Alabama" "Alabama" ...
## ..- attr(*, "format.stata")= chr "%20s"
## $ law : chr [1:1173] "no" "no" "no" "no" ...
## ..- attr(*, "format.stata")= chr "%9s"
## $ Etat : dbl+lbl [1:1173] 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## ..@ format.stata: chr "%20.0g"
## ..@ labels : Named num [1:51] 1 2 3 4 5 6 7 8 9 10 ...
## .. ..- attr(*, "names")= chr [1:51] "Alabama" "Alaska" "Arizona" "Arkansas" ...
## [1] 1173 14
## year violent murder robbery
## Min. :1977 Min. : 47.0 Min. : 0.200 Min. : 6.4
## 1st Qu.:1982 1st Qu.: 283.1 1st Qu.: 3.700 1st Qu.: 71.1
## Median :1988 Median : 443.0 Median : 6.400 Median : 124.1
## Mean :1988 Mean : 503.1 Mean : 7.665 Mean : 161.8
## 3rd Qu.:1994 3rd Qu.: 650.9 3rd Qu.: 9.800 3rd Qu.: 192.7
## Max. :1999 Max. :2921.8 Max. :80.600 Max. :1635.1
## prisoners afam cauc male
## Min. : 19.0 Min. : 0.2482 Min. :21.78 Min. :12.21
## 1st Qu.: 114.0 1st Qu.: 2.2022 1st Qu.:59.94 1st Qu.:14.65
## Median : 187.0 Median : 4.0262 Median :65.06 Median :15.90
## Mean : 226.6 Mean : 5.3362 Mean :62.95 Mean :16.08
## 3rd Qu.: 291.0 3rd Qu.: 6.8507 3rd Qu.:69.20 3rd Qu.:17.53
## Max. :1913.0 Max. :26.9796 Max. :76.53 Max. :22.35
## population income density state
## Min. : 0.4028 Min. : 8555 Min. :7.071e-04 Length:1173
## 1st Qu.: 1.1877 1st Qu.:11935 1st Qu.:3.191e-02 Class :character
## Median : 3.2713 Median :13402 Median :8.157e-02 Mode :character
## Mean : 4.8163 Mean :13725 Mean :3.520e-01
## 3rd Qu.: 5.6856 3rd Qu.:15271 3rd Qu.:1.777e-01
## Max. :33.1451 Max. :23647 Max. :1.110e+01
## law Etat
## Length:1173 Min. : 1
## Class :character 1st Qu.:13
## Mode :character Median :26
## Mean :26
## 3rd Qu.:39
## Max. :51
## $year
## [1] "numeric"
##
## $violent
## [1] "numeric"
##
## $murder
## [1] "numeric"
##
## $robbery
## [1] "numeric"
##
## $prisoners
## [1] "numeric"
##
## $afam
## [1] "numeric"
##
## $cauc
## [1] "numeric"
##
## $male
## [1] "numeric"
##
## $population
## [1] "numeric"
##
## $income
## [1] "numeric"
##
## $density
## [1] "numeric"
##
## $state
## [1] "character"
##
## $law
## [1] "character"
##
## $Etat
## [1] "haven_labelled" "vctrs_vctr" "double"
### Sélection d'une année de référence (1977)
data_1977 <- my_data %>% filter(year == 1977)
### Variables potentiellement log-normales : violent, murder, robbery, prisoners, income, density
par(mfrow = c(2, 3))
hist(data_1977$violent, main = "Violent", col = "lightblue")
hist(data_1977$murder, main = "Murder", col = "lightblue")
hist(data_1977$robbery, main = "Robbery", col = "lightblue")
hist(data_1977$prisoners, main = "Prisoners", col = "lightblue")
hist(data_1977$income, main = "Income", col = "lightblue")
hist(data_1977$density, main = "Density", col = "lightblue")### Création des variables log-transformées
my_data <- my_data %>%
mutate(
ln_violent = log(violent),
ln_murder = log(murder),
ln_robbery = log(robbery),
ln_prisoners = log(prisoners),
ln_income = log(income),
ln_density = log(density + 0.001) # +0.001 pour éviter log(0)
)
### Vérification après transformation
data_1977_trans <- my_data %>% filter(year == 1977)
par(mfrow = c(2, 3))
hist(data_1977_trans$ln_violent, main = "ln(Violent)", col = "lightgreen")
hist(data_1977_trans$ln_murder, main = "ln(Murder)", col = "lightgreen")
hist(data_1977_trans$ln_robbery, main = "ln(Robbery)", col = "lightgreen")
hist(data_1977_trans$ln_prisoners, main = "ln(Prisoners)", col = "lightgreen")
hist(data_1977_trans$ln_income, main = "ln(Income)", col = "lightgreen")
hist(data_1977_trans$ln_density, main = "ln(Density)", col = "lightgreen")
### Definition: Une série temporelle est un ensemble
d’observations d’un même phénomène, ordonnées dans le temps.
### Code 10 = Floride (d'après nos données)
florida_data <- my_data %>% filter(state == "Florida") %>% arrange(year)
head(florida_data)## # A tibble: 6 × 20
## year violent murder robbery prisoners afam cauc male population income
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1977 687. 10.2 188. 211 5.09 60.0 16.4 8.86 11524.
## 2 1978 766. 11 206 221 5.04 59.9 16.1 9.10 12103.
## 3 1979 834. 12.2 249. 239 5.02 60.1 16.0 9.43 12182.
## 4 1980 984. 14.5 356. 220 5.08 59.9 15.7 9.84 12149.
## 5 1981 965. 15 349. 208 5.14 59.9 15.7 10.2 12270.
## 6 1982 897. 13.5 298. 224 5.18 59.8 15.6 10.5 12166.
## # ℹ 10 more variables: density <dbl>, state <chr>, law <chr>, Etat <dbl+lbl>,
## # ln_violent <dbl>, ln_murder <dbl>, ln_robbery <dbl>, ln_prisoners <dbl>,
## # ln_income <dbl>, ln_density <dbl>
### Sélection des variables pour l'ACP
acp_vars <- florida_data %>%
select(ln_violent, ln_murder, ln_robbery, ln_prisoners,
afam, cauc, male, ln_income, ln_density)
# Nommer les lignes par l'année
rownames(acp_vars) <- florida_data$year## Warning: Setting row names on a tibble is deprecated.
### ACP
acp_florida <- PCA(acp_vars, scale.unit = TRUE, graph = FALSE)
### Résultats
fviz_eig(acp_florida, addlabels = TRUE)## Warning in geom_bar(stat = "identity", fill = barfill, color = barcolor, :
## Ignoring empty aesthetic: `width`.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the ggpubr package.
## Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
### Trajectoire temporelle
fviz_pca_ind(acp_florida, repel = TRUE) +
geom_path(aes(color = florida_data$year), arrow = arrow(type = "closed"))### Évolution simple de la criminalité
ggplot(florida_data, aes(x = year, y = violent)) +
geom_line(color = "blue") +
geom_vline(xintercept = 1988, linetype = "dashed", color = "red") +
labs(title = "Évolution de la criminalité en Floride",
subtitle = "Ligne verticale = adoption loi CCW (1988)")
#### Interprétation :
### Préparation des données panel
panel_data <- my_data %>%
select(ln_violent, ln_murder, ln_robbery, ln_prisoners,
afam, cauc, male, ln_income, ln_density)
### Identifiants : état + année
rownames(panel_data) <- paste(my_data$state, my_data$year, sep = "_")## Warning: Setting row names on a tibble is deprecated.
### ACP
acp_panel <- PCA(panel_data, scale.unit = TRUE, graph = FALSE)
### Résultats
fviz_eig(acp_panel, addlabels = TRUE)## Warning in geom_bar(stat = "identity", fill = barfill, color = barcolor, :
## Ignoring empty aesthetic: `width`.
fviz_pca_ind(acp_panel,
geom = "point",
col.ind = as.factor(my_data$state),
palette = "Set1",
legend.title = "State")## Ignoring unknown labels:
## • fill : "State"
## • linetype : "State"
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: Removed 966 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 42 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '26'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '26'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '27'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '27'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '28'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '28'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '29'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '29'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '30'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '30'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '31'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '31'
### Constat : Les points d’un même État sont regroupés dans l’espace
factoriel, indiquant des caractéristiques structurelles propres à chaque
État. Certains États (Californie, New York, District of Columbia) sont
éloignés des autres (forte criminalité).
fviz_pca_ind(acp_panel,
geom = "point",
col.ind = my_data$year,
gradient.cols = c("blue", "red"),
legend.title = "Year")## Ignoring unknown labels:
## • fill : "Year"
## • linetype : "Year"
## • shape : "Year"
### Constat : Les années se superposent largement, mais on observe une
tendance temporelle : les années récentes (1990-1999) ont tendance à se
situer dans une certaine région du plan, suggérant une évolution commune
de la criminalité dans tous les États.
fviz_pca_ind(acp_panel, axes = c(1, 3),
geom = "point",
col.ind = as.factor(my_data$state),
palette = "Set1")## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: Removed 966 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 42 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '26'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '26'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '27'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '27'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '28'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '28'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '29'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '29'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '30'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '30'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '31'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '31'
### Constat : L’axe 3 capture des facteurs spécifiques comme le taux
d’incarcération ou la composition démographique. Les États avec des
politiques pénales très différentes se séparent sur cet axe.
### Coordonnées des individus
coord_ind <- as.data.frame(acp_panel$ind$coord)
coord_ind$law <- my_data$law
### Moyennes par groupe
coord_law_0 <- coord_ind %>% filter(law == "no") %>% summarise(across(starts_with("Dim"), mean))
coord_law_1 <- coord_ind %>% filter(law == "yes") %>% summarise(across(starts_with("Dim"), mean))
print("Coordonnées moyennes pour law = 0 :")## [1] "Coordonnées moyennes pour law = 0 :"
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## 1 0.2873594 0.2245298 0.08597448 0.1008676 0.04038231
## [1] "Coordonnées moyennes pour law = 1 :"
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## 1 -0.8953514 -0.6995875 -0.2678784 -0.3142823 -0.1258228
### Graphique avec points supplémentaires
fviz_pca_ind(acp_panel, repel = TRUE) +
geom_point(data = coord_law_0, aes(x = Dim.1, y = Dim.2),
color = "red", size = 5, shape = 18) +
geom_point(data = coord_law_1, aes(x = Dim.1, y = Dim.2),
color = "green", size = 5, shape = 18) +
annotate("text", x = coord_law_0$Dim.1, y = coord_law_0$Dim.2,
label = "law = 0", vjust = -1, color = "red") +
annotate("text", x = coord_law_1$Dim.1, y = coord_law_1$Dim.2,
label = "law = 1", vjust = -1, color = "green")
### Constat : Les points moyens pour law = 0 et law = 1 sont séparés,
indiquant que les États avec loi CCW ont des caractéristiques
différentes en moyenne.
### Données avec année
panel_data_year <- my_data %>%
select(ln_violent, ln_murder, ln_robbery, ln_prisoners,
afam, cauc, male, ln_income, ln_density, year)
### ACP avec année en variable supplémentaire
acp_panel_sup <- PCA(panel_data_year,
scale.unit = TRUE,
quanti.sup = 10, # year est la 10e colonne
graph = FALSE)
### Cercle des corrélations
fviz_pca_var(acp_panel_sup,
col.var = "black",
col.quanti.sup = "blue",
repel = TRUE)
### Interprétation : Le vecteur “year” indique la direction de
l’évolution temporelle. Sa position près de l’axe 1 signifie que les
variables de criminalité évoluent dans le temps principalement selon cet
axe.
state_means <- my_data %>%
group_by(state) %>%
summarise(
ln_violent = mean(ln_violent, na.rm = TRUE),
ln_murder = mean(ln_murder, na.rm = TRUE),
ln_robbery = mean(ln_robbery, na.rm = TRUE),
ln_prisoners = mean(ln_prisoners, na.rm = TRUE),
afam = mean(afam, na.rm = TRUE),
cauc = mean(cauc, na.rm = TRUE),
male = mean(male, na.rm = TRUE),
ln_income = mean(ln_income, na.rm = TRUE),
ln_density = mean(ln_density, na.rm = TRUE),
law_proportion = mean(as.numeric(law == "yes"), na.rm = TRUE)
)
head(state_means)## # A tibble: 6 × 11
## state ln_violent ln_murder ln_robbery ln_prisoners afam cauc male ln_income
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Alab… 6.30 2.37 4.85 5.60 8.74 55.1 16.1 9.34
## 2 Alas… 6.38 2.24 4.60 5.51 8.38 61.4 18.2 9.78
## 3 Ariz… 6.40 2.13 5.06 5.63 3.87 65.0 16.3 9.46
## 4 Arka… 6.06 2.18 4.53 5.39 5.56 60.0 15.5 9.29
## 5 Cali… 6.77 2.38 5.78 5.35 6.75 61.1 16.8 9.66
## 6 Colo… 6.17 1.74 4.76 5.08 2.61 71.2 16.4 9.60
## # ℹ 2 more variables: ln_density <dbl>, law_proportion <dbl>
### Données actives : moyennes par État (sans state, year, law_proportion)
active_data <- state_means %>% select(-state, -year, -law_proportion)
### ACP sur les moyennes
acp_states <- PCA(active_data, scale.unit = TRUE, graph = FALSE)
### Projection des observations originales
all_obs <- my_data %>%
select(ln_violent, ln_murder, ln_robbery, ln_prisoners,
afam, cauc, male, ln_income, ln_density)
coord_sup <- predict(acp_states, newdata = all_obs)
### Ajout des coordonnées
obs_with_coords <- my_data %>%
select(state, year) %>%
mutate(Dim1_sup = coord_sup$coord[, 1],
Dim2_sup = coord_sup$coord[, 2])### Graphique ACP sur moyennes (États)
p1 <- fviz_pca_ind(acp_states,
geom = "point",
col.ind = "blue",
repel = TRUE,
title = "ACP sur les moyennes par État")
### Graphique avec projection des observations annuelles
p2 <- ggplot(obs_with_coords, aes(x = Dim1_sup, y = Dim2_sup, color = as.factor(year))) +
geom_point(alpha = 0.6) +
labs(title = "Projection des observations annuelles",
x = "Dim1", y = "Dim2", color = "Year") +
theme_minimal()##
## Attachement du package : 'gridExtra'
## L'objet suivant est masqué depuis 'package:dplyr':
##
## combine