Pour faire des crêpes nous avons besoin de :
Les étapes sont :
Pima <- read.csv2(here::here("data/Pima.csv"))summary(Pima)## pregnant glucose pressure triceps
## Min. : 0.000 Min. : 56.0 Min. : 24.00 Min. : 7.00
## 1st Qu.: 1.000 1st Qu.: 99.0 1st Qu.: 62.00 1st Qu.:21.00
## Median : 2.000 Median :119.0 Median : 70.00 Median :29.00
## Mean : 3.301 Mean :122.6 Mean : 70.66 Mean :29.15
## 3rd Qu.: 5.000 3rd Qu.:143.0 3rd Qu.: 78.00 3rd Qu.:37.00
## Max. :17.000 Max. :198.0 Max. :110.00 Max. :63.00
## insulin mass pedigree age
## Min. : 14.00 Min. :18.20 Min. :0.0850 Min. :21.00
## 1st Qu.: 76.75 1st Qu.:28.40 1st Qu.:0.2697 1st Qu.:23.00
## Median :125.50 Median :33.20 Median :0.4495 Median :27.00
## Mean :156.06 Mean :33.09 Mean :0.5230 Mean :30.86
## 3rd Qu.:190.00 3rd Qu.:37.10 3rd Qu.:0.6870 3rd Qu.:36.00
## Max. :846.00 Max. :67.10 Max. :2.4200 Max. :81.00
## diabetes
## Length:392
## Class :character
## Mode :character
##
##
##
by(Pima, Pima$diabetes, summary)## Pima$diabetes: neg
## pregnant glucose pressure triceps
## Min. : 0.000 Min. : 56.0 Min. : 24.00 Min. : 7.00
## 1st Qu.: 1.000 1st Qu.: 94.0 1st Qu.: 60.00 1st Qu.:18.25
## Median : 2.000 Median :107.5 Median : 70.00 Median :27.00
## Mean : 2.721 Mean :111.4 Mean : 68.97 Mean :27.25
## 3rd Qu.: 4.000 3rd Qu.:126.0 3rd Qu.: 76.00 3rd Qu.:34.00
## Max. :13.000 Max. :197.0 Max. :106.00 Max. :60.00
## insulin mass pedigree age
## Min. : 15.0 Min. :18.20 Min. :0.0850 Min. :21.00
## 1st Qu.: 66.0 1st Qu.:26.12 1st Qu.:0.2610 1st Qu.:22.00
## Median :105.0 Median :31.25 Median :0.4135 Median :25.00
## Mean :130.9 Mean :31.75 Mean :0.4722 Mean :28.35
## 3rd Qu.:163.8 3rd Qu.:36.10 3rd Qu.:0.6242 3rd Qu.:30.00
## Max. :744.0 Max. :57.30 Max. :2.3290 Max. :81.00
## diabetes
## Length:262
## Class :character
## Mode :character
##
##
##
## ------------------------------------------------------------
## Pima$diabetes: pos
## pregnant glucose pressure triceps
## Min. : 0.000 Min. : 78.0 Min. : 30.00 Min. : 7.00
## 1st Qu.: 1.000 1st Qu.:124.2 1st Qu.: 66.50 1st Qu.:26.00
## Median : 3.000 Median :144.5 Median : 74.00 Median :33.00
## Mean : 4.469 Mean :145.2 Mean : 74.08 Mean :32.96
## 3rd Qu.: 7.000 3rd Qu.:171.8 3rd Qu.: 82.00 3rd Qu.:39.75
## Max. :17.000 Max. :198.0 Max. :110.00 Max. :63.00
## insulin mass pedigree age
## Min. : 14.0 Min. :22.90 Min. :0.1270 Min. :21.00
## 1st Qu.:127.5 1st Qu.:31.60 1st Qu.:0.3297 1st Qu.:27.25
## Median :169.5 Median :34.60 Median :0.5460 Median :33.00
## Mean :206.8 Mean :35.78 Mean :0.6256 Mean :35.94
## 3rd Qu.:239.2 3rd Qu.:38.35 3rd Qu.:0.7865 3rd Qu.:43.00
## Max. :846.0 Max. :67.10 Max. :2.4200 Max. :60.00
## diabetes
## Length:130
## Class :character
## Mode :character
##
##
##
library(ggplot2)
ggplot(Pima, aes(y=glucose, x=diabetes, fill=diabetes, colour=diabetes))+
geom_jitter(height=0, width=0.25)+
geom_boxplot(alpha=0.5, outlier.alpha=0, notch=TRUE)library(ggplot2)
ggplot(Pima, aes(y=insulin, x=diabetes, fill=diabetes, colour=diabetes))+
geom_jitter(height=0, width=0.25)+
geom_boxplot(alpha=0.5, outlier.alpha=0, notch=TRUE)\[ \sigma^{2}=\frac{1}{n} \sum_{i=1}^{n}\left(x_{i}-\mu\right)^{2} \]
Blablabla 1
Tralala 2
La sélection de variables est un sujet toujours d’actualité [@Heinze2017]