Reading data
# data are available at https://github.com/tuanvnguyen/Regression-Book/blob/master/Osteo%20data.csv
df = read.csv("~/Dropbox/_Books and websites/Regression analysis (sach Viet)/Datasets for book/Osteo data.csv")
head(df)
## id lean.mass fat.mass pcfat age height weight bmi osta osteo osteo.group
## 1 1 27.98 16.49 37.09 76 156.0 45.0 18.5 6.2 2 Osteoporosis
## 2 8 29.02 27.54 48.70 54 153.0 56.0 23.9 -0.4 1 Osteopenia
## 3 21 31.72 20.65 39.43 56 158.2 51.5 20.6 0.9 1 Osteopenia
## 4 38 35.96 21.96 37.92 54 154.0 51.0 21.5 0.6 1 Osteopenia
## 5 39 35.00 26.29 42.89 60 159.5 60.0 23.6 0.0 1 Osteopenia
## 6 53 32.58 19.82 37.82 53 156.0 51.0 21.0 0.4 1 Osteopenia
# Select only continuous variables from df
df1 = df[, c("age", "height", "weight", "bmi", "lean.mass", "fat.mass", "pcfat")]
Simple correlation matrix plot using package ‘psych’
# Using function pairs.panels on df1
pairs.panels(df1)

Using package ‘corrplot’
# Calculate correlations
cor.matrix = cor(df1)
corrplot(cor.matrix, method = "circle")

corrplot(cor.matrix, method = "ellipse")

corrplot(cor.matrix, method = "pie")

corrplot.mixed(cor.matrix, lower.col = "black", number.cex = 0.7)

Using ggpairs from package GGally
# can be used on df1, no need to calculate correlation coeff
ggpairs(df1)

# can be used for grouped data
df2 = df[, c("osteo.group", "age", "height", "weight", "bmi", "lean.mass", "fat.mass", "pcfat")]
ggpairs(df2, ggplot2::aes(colour=osteo.group))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
