country = c("Argentina", "Cameroon", "Chile", "Colombia", "Cuba", "Ghana", "Guyana", "Latvia", "Malaysia", "Mali", "Mongolia", "Morocco", "Senegal", "Serbia", "Thailand")
life.exp<-c(75.592,53.288,78.936,73.213,78.552,60.375,65.560,71.736,74.306,53.358, 66.564,70.012,62.653,73.532,73.627)
education = c(16.841,7.137,17.356,15.589,44.173,11.365, 8.220,31.364, 14.621, 14.979, 15.121, 16.930, 17.703, 61.638, 24.351)
infant.mor = c(13.500,67.700,7.800,16.500, 4.800, 52.500, 31.200 ,8.500 ,7.100 ,85.500 ,26.400, 29.900, 48.700, 6.900, 12.700)
health= c(9.525, 4.915, 8.400, 7.600, 12.100, 5.000, 6.200, 6.600, 4.600, 5.500, 5.700, 5.200, 5.700, 10.500, 4.200)
health.usd =c(734.093, 60.412, 801.915, 391.859, 672.204, 54.471, 166.718, 756.401, 316.478, 33.089, 96.537, 151.513, 59.658, 576.494, 160.136)
mydata <-data.frame(country, life.exp, infant.mor, education, health, health.usd)
mydata
## country life.exp infant.mor education health health.usd
## 1 Argentina 75.592 13.5 16.841 9.525 734.093
## 2 Cameroon 53.288 67.7 7.137 4.915 60.412
## 3 Chile 78.936 7.8 17.356 8.400 801.915
## 4 Colombia 73.213 16.5 15.589 7.600 391.859
## 5 Cuba 78.552 4.8 44.173 12.100 672.204
## 6 Ghana 60.375 52.5 11.365 5.000 54.471
## 7 Guyana 65.560 31.2 8.220 6.200 166.718
## 8 Latvia 71.736 8.5 31.364 6.600 756.401
## 9 Malaysia 74.306 7.1 14.621 4.600 316.478
## 10 Mali 53.358 85.5 14.979 5.500 33.089
## 11 Mongolia 66.564 26.4 15.121 5.700 96.537
## 12 Morocco 70.012 29.9 16.930 5.200 151.513
## 13 Senegal 62.653 48.7 17.703 5.700 59.658
## 14 Serbia 73.532 6.9 61.638 10.500 576.494
## 15 Thailand 73.627 12.7 24.351 4.200 160.136
# Correlation coeficient compute
corr.le.im = cov(life.exp,infant.mor)/(sd(life.exp)*sd(infant.mor))
print(paste0("Corelation coeficient = ", corr.le.im))
## [1] "Corelation coeficient = -0.96073302571571"
# Try to plot two features, add a linear regression line to show the correlation between them
library("ggplot2")
ggplot(data=mydata, aes(x=life.exp, y=infant.mor)) +
geom_point(colour="red", size=4, shape=1, fill=3) +geom_smooth(method='lm',formula=y~x)
# Correlation panel
panel.cor <- function(x, y){
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- round(cor(x, y), digits=3)
txt <- paste0("", r)
cex.cor <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex = cex.cor * r)
}
pairs(mydata[-1], upper.panel = panel.cor)
First, look at HEALTH.USD, it ’s has hight positive correlation values with LIFE_ExPECTANCY (0.776), HEATH (0.76) and hight negative correlation values with INFANT_MORTALITY. This implies poeple in such countries paying more money on health will result better health, longer life and less death rate of infants.
Secondly,the correlation value of 0.689 between EDUCATION and HEALTH also tolds us that health and education has strong relationship (but we cannot talk about the causiality here)
All other pairs having relatively high mutual corelation value (all absolulte values are greater than 0.5) also tell us there are notable correlations between them.