d <- read.csv("https://stats.dip.jp/01_ds/data/seiseki_jp.csv")
head(d)
library(DT)
datatable(d, caption = "成績データ")
r <- prcomp(d[, -1], scale = T)
summary(r)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.4508 1.0479 0.70060 0.63795 0.54796 0.47059 0.42754
## Proportion of Variance 0.6674 0.1220 0.05454 0.04522 0.03336 0.02461 0.02031
## Cumulative Proportion 0.6674 0.7894 0.84394 0.88916 0.92252 0.94713 0.96744
## PC8 PC9
## Standard deviation 0.41376 0.34909
## Proportion of Variance 0.01902 0.01354
## Cumulative Proportion 0.98646 1.00000
options(digits = 1)
(variance <- r$sdev^2)
## [1] 6.0 1.1 0.5 0.4 0.3 0.2 0.2 0.2 0.1
(proportion_variance <- variance / sum(variance))
## [1] 0.67 0.12 0.05 0.05 0.03 0.02 0.02 0.02 0.01
evec <- r$rotation
datatable(round(evec, 2))
rownames(r$x) <- d$学籍番号
datatable(round(r$x, 2))
library(factoextra)
## 要求されたパッケージ ggplot2 をロード中です
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_screeplot(r, addlabels = T)

fviz_contrib(r, choice = "var", axes = 1, top = 5)

fviz_contrib(r, choice = "var", axes = 2, top = 5)

library("corrplot")
## corrplot 0.92 loaded
var <- get_pca_var(r)
corrplot(var$cor, is.corr = T, addCoef.col = "gray")

fviz_pca_var(r,
col.var = "contrib",
repel = T)

fviz_pca_biplot(r, col.ind = "contrib", repel = T)

d0 <- read.csv(file = "https://stats.dip.jp/01_ds/data/hand_writing_numbers0-9.csv")
library(DT)
datatable(d0)
d <- d0[, -1]
number <- d0$number
draw.images <- function(img, i.fr, i.to)
{
par(mfrow = c(5, 5),
mar = c(0, 0, 1, 0)+0.1,
cex.main = 0.9)
DX <- 8
DY <- 8
BIT <- 16
for (i in i.fr:i.to)
{
plot(NA, type = "n",axes = F,
xlim = c(0, DX),
ylim = c(0, DY),
xlab = "",
ylab = "",
main = paste("Fig.", i-1))
m <- matrix(unlist(img[i, ])/BIT, nrow = 8, byrow = T)
rasterImage(m, 0, 0, DX, DY)
}
}
draw.images(img = d, i.fr = 1, i.to = 20)

r <- prcomp(d, rank. = 2)
fviz_screeplot(r, addlabels = T)

fviz_contrib(r, choice = "var", axes = 1, top = 5)

fviz_contrib(r, choice = "var", axes = 2, top = 5)

fviz_pca_var(r,
col.var = "contrib",
repel = T)

fviz_pca_ind(r,
label = d0$number,
habillage = number,
addEllipses = T,
ellipse.level = 0.95)

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
d0 = pd.read_csv("https://stats.dip.jp/01_ds/data/hand_writing_numbers0-9.csv")
d1 = d0.loc[:, d0.columns!="number"]
pca = PCA(2)
r = pca.fit_transform(d1)
d = pd.DataFrame(data = r, columns = ['pc1', 'pc2'])
plt.scatter(d["pc1"], d["pc2"],
c = d0["number"],
edgecolor = 'none', alpha = 0.5,
cmap=plt.cm.get_cmap('Spectral', 10))
plt.xlabel('第1主成分')
plt.ylabel('第2主成分')
plt.colorbar()
## <matplotlib.colorbar.Colorbar object at 0x0000023471701670>

plt.show()

d <- read.csv("C:/Users/naruk/Downloads/UN_jp.csv")
head(d)
library(DT)
datatable(d, caption = "United Nations")
r <- prcomp(d[, 4:ncol(d)], scale = T)
summary(r)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5
## Standard deviation 1.901 0.855 0.6381 0.4287 0.2497
## Proportion of Variance 0.723 0.146 0.0814 0.0368 0.0125
## Cumulative Proportion 0.723 0.869 0.9508 0.9875 1.0000
rownames(r$x) <- d$国名
datatable(round(r$x, 2))
library(factoextra)
fviz_screeplot(r, addlabels = T)

fviz_contrib(r, choice = "var", axes = 1, top = 5)

fviz_contrib(r, choice = "var", axes = 2, top = 5)

library("corrplot")
var <- get_pca_var(r)
corrplot(var$cor, is.corr = T, addCoef.col = "gray")

fviz_pca_var(r,
col.var = "contrib",
repel = T)

fviz_pca_biplot(r, col.ind = "contrib", repel = T)
