library(ggplot2)
library(titanic)
data("titanic_train")
titanic_data <- titanic_train
head(titanic_data)
## Calculates principal components with given formula on the data.
calc_pc <- function(formula, data) {
print('Starting PCA')
prcomp(formula, data)
}
predict_pc <- function(prcompValues, newdata, ...) {
print('Predicting PC values')
predict(prcompValues, newdata, ...)
}
show_pc <- function(data, formula) {
print('Starting show PC')
pca <- calc_pc(formula, data)
print(pca)
print(pca_plot(pca, 'PC1', 'PC2'))
print(pca_plot(pca, 'PC3', 'PC4'))
pca
}
pca_plot <- function(pca, byX = 'PC1', byY = 'PC2') {
loadings <- data.frame(pca$rotation, .names = row.names(pca$rotation))
theta <- seq(0, 2*pi, length.out = 100)
circle <- data.frame(x = cos(theta), y = sin(theta))
columnNameWithOffsetX <- paste(byX, '+ 0.2')
columnNameWithOffsetY <- paste(byY, '- 0.1')
p <- ggplot(circle, aes(x, y)) +
geom_path() +
geom_text(data=loadings,
mapping=aes_string(x = columnNameWithOffsetX, y = columnNameWithOffsetY,
label = '.names', color = '.names')) +
geom_jitter(data = loadings, mapping = aes_string(x = byX, y = byY, color = '.names'), shape = 2) +
coord_fixed(ratio = 1) +
labs(x = byX, y = byY)
}
formula <- ~ Age + Fare + Pclass + SibSp + Parch
LS0tCnRpdGxlOiAiUENBIG9uIFRpdGFuaWMgRGF0YXNldCIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyfQpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkodGl0YW5pYykgIAoKCmRhdGEoInRpdGFuaWNfdHJhaW4iKQp0aXRhbmljX2RhdGEgPC0gdGl0YW5pY190cmFpbgoKaGVhZCh0aXRhbmljX2RhdGEpCgojIyBDYWxjdWxhdGVzIHByaW5jaXBhbCBjb21wb25lbnRzIHdpdGggZ2l2ZW4gZm9ybXVsYSBvbiB0aGUgZGF0YS4KY2FsY19wYyA8LSBmdW5jdGlvbihmb3JtdWxhLCBkYXRhKSB7CiAgcHJpbnQoJ1N0YXJ0aW5nIFBDQScpCiAgcHJjb21wKGZvcm11bGEsIGRhdGEpCn0KCgpgYGAKCgpgYGAge3J9CnByZWRpY3RfcGMgPC0gZnVuY3Rpb24ocHJjb21wVmFsdWVzLCBuZXdkYXRhLCAuLi4pIHsKICBwcmludCgnUHJlZGljdGluZyBQQyB2YWx1ZXMnKQogIHByZWRpY3QocHJjb21wVmFsdWVzLCBuZXdkYXRhLCAuLi4pCn0KCmBgYAoKCgpgYGB7cn0Kc2hvd19wYyA8LSBmdW5jdGlvbihkYXRhLCBmb3JtdWxhKSB7CiAgcHJpbnQoJ1N0YXJ0aW5nIHNob3cgUEMnKQogIHBjYSA8LSBjYWxjX3BjKGZvcm11bGEsIGRhdGEpCgogIHByaW50KHBjYSkKICBwcmludChwY2FfcGxvdChwY2EsICdQQzEnLCAnUEMyJykpCiAgcHJpbnQocGNhX3Bsb3QocGNhLCAnUEMzJywgJ1BDNCcpKQogIHBjYQp9CgpwY2FfcGxvdCA8LSBmdW5jdGlvbihwY2EsIGJ5WCA9ICdQQzEnLCBieVkgPSAnUEMyJykgewogIGxvYWRpbmdzIDwtIGRhdGEuZnJhbWUocGNhJHJvdGF0aW9uLCAubmFtZXMgPSByb3cubmFtZXMocGNhJHJvdGF0aW9uKSkKCiAgdGhldGEgPC0gc2VxKDAsIDIqcGksIGxlbmd0aC5vdXQgPSAxMDApCiAgY2lyY2xlIDwtIGRhdGEuZnJhbWUoeCA9IGNvcyh0aGV0YSksIHkgPSBzaW4odGhldGEpKQoKICBjb2x1bW5OYW1lV2l0aE9mZnNldFggPC0gcGFzdGUoYnlYLCAnKyAwLjInKQogIGNvbHVtbk5hbWVXaXRoT2Zmc2V0WSA8LSBwYXN0ZShieVksICctIDAuMScpCiAgcCA8LSBnZ3Bsb3QoY2lyY2xlLCBhZXMoeCwgeSkpICsKICAgICAgZ2VvbV9wYXRoKCkgKwogICAgICBnZW9tX3RleHQoZGF0YT1sb2FkaW5ncywKICAgICAgICAgIG1hcHBpbmc9YWVzX3N0cmluZyh4ID0gY29sdW1uTmFtZVdpdGhPZmZzZXRYLCB5ID0gY29sdW1uTmFtZVdpdGhPZmZzZXRZLAogICAgICAgICAgICAgIGxhYmVsID0gJy5uYW1lcycsIGNvbG9yID0gJy5uYW1lcycpKSArCiAgICAgIGdlb21faml0dGVyKGRhdGEgPSBsb2FkaW5ncywgbWFwcGluZyA9IGFlc19zdHJpbmcoeCA9IGJ5WCwgeSA9IGJ5WSwgY29sb3IgPSAnLm5hbWVzJyksIHNoYXBlID0gMikgKwogICAgICBjb29yZF9maXhlZChyYXRpbyA9IDEpICsKICAgICAgbGFicyh4ID0gYnlYLCB5ID0gYnlZKQp9CgoKZm9ybXVsYSA8LSB+IEFnZSArIEZhcmUgKyBQY2xhc3MgKyBTaWJTcCArIFBhcmNoCgoKCmBgYAoKCiMgQ2FsbCBzaG93X3BjIHdpdGggdGhlIHNwZWNpZmllZCBmb3JtdWxhCmBgYHtyfQoKc2hvd19wYyh0aXRhbmljX2RhdGEsIGZvcm11bGEpCgpgYGAKCg==