## KIDSDRIV AGE HOMEKIDS YOJ INCOME HOME_VAL MVR_PTS REPEAT5
## 1 0 60 0 11 67349 0 3 1
## 2 0 43 0 11 91449 257252 0 0
## 3 0 48 0 11 52881 0 2 0
## 4 0 35 1 10 16039 124191 3 1
## 7 0 34 1 12 125301 0 0 0
## 9 1 40 1 11 50815 0 2 1
Komentaras
pca <- prcomp(insurance_data_cor, scale = TRUE)
fviz_screeplot(pca, addlabels = TRUE, choice = "variance")
summary(pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.4528 1.2341 1.1828 0.9368 0.88535 0.7222 0.65350
## Proportion of Variance 0.2638 0.1904 0.1749 0.1097 0.09798 0.0652 0.05338
## Cumulative Proportion 0.2638 0.4542 0.6291 0.7388 0.83675 0.9020 0.95534
## PC8
## Standard deviation 0.59775
## Proportion of Variance 0.04466
## Cumulative Proportion 1.00000
pcaDat <- get_pca(pca)
fviz_pca_biplot(pca, repel = TRUE, label = "var")
fviz_pca_var(pca, repel = TRUE,)
fviz_pca_var(pca,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
fviz_pca_biplot(pca, repel = TRUE,
col.var = "#2E9FDF", # Variables color
col.ind = "#696969" # Individuals color
)
## Warning: ggrepel: 8151 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
fviz_pca_ind(pca,
col.ind = "cos2", # Color by the quality of representation
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
## Warning: ggrepel: 8151 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
insurance_data_TIB <-as_tibble(insurance_data_cor)
head(insurance_data_TIB)
## # A tibble: 6 x 8
## KIDSDRIV AGE HOMEKIDS YOJ INCOME HOME_VAL MVR_PTS REPEAT5
## <int> <int> <int> <int> <dbl> <dbl> <int> <dbl>
## 1 0 60 0 11 67349 0 3 1
## 2 0 43 0 11 91449 257252 0 0
## 3 0 48 0 11 52881 0 2 0
## 4 0 35 1 10 16039 124191 3 1
## 5 0 34 1 12 125301 0 0 0
## 6 1 40 1 11 50815 0 2 1
insurance_data_TIB_PCA <- insurance_data_TIB %>% mutate(PCA1 = pca$x[, 1], PCA2 = pca$x[, 2])
head(insurance_data_TIB_PCA )
## # A tibble: 6 x 10
## KIDSDRIV AGE HOMEKIDS YOJ INCOME HOME_VAL MVR_PTS REPEAT5 PCA1 PCA2
## <int> <int> <int> <int> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0 60 0 11 67349 0 3 1 -0.101 1.07
## 2 0 43 0 11 91449 257252 0 0 -1.33 0.0790
## 3 0 48 0 11 52881 0 2 0 0.0193 1.04
## 4 0 35 1 10 16039 124191 3 1 1.53 0.143
## 5 0 34 1 12 125301 0 0 0 -0.0280 -0.269
## 6 1 40 1 11 50815 0 2 1 1.64 -0.772
ggplot(insurance_data_TIB_PCA, aes(PCA1, PCA2, col = REPEAT5)) + geom_point() + theme_bw()
# Informatyviausia pora
Neinformartyviausia pora:
insurance_data_TIB_PCA <- insurance_data_TIB %>% mutate(PCA7 = pca$x[, 7], PCA8 = pca$x[, 8])
ggplot(insurance_data_TIB_PCA, aes(PCA7, PCA8, col = REPEAT5)) + geom_point() + theme_bw()
ggplot(insurance_data, aes(x=HOMEKIDS, y=HOME_VAL, color=REPEAT5)) +
geom_point() + labs(title = "Relation between KIDS IN HOME, Home value", x = "KIDS IN HOME", y = "Home value, $")
ggplot(insurance_data, aes(x=YOJ, y=MVR_PTS, color=REPEAT5)) +
geom_point() + labs(title = "Relation between Motor vehicle record points (demerits), Years on job", x = "Years on job", y = "record points")