#install.packages("readxl")
library(readxl)
####Ładowanie zbioru danych####
dane <- read_excel("C:/Users/majko/OneDrive/Dokumenty/DOKTORAT/5_semestr/Przygotowanie_zajec_R_analiza_skupien/Dane_AW_zanieczyszczenie.xlsx", sheet = "Dane")dane<-as.data.frame(dane)str(dane)## 'data.frame': 16 obs. of 8 variables:
## $ Województwo: chr "DOLNOŚLĄSKIE" "KUJAWSKO-POMORSKIE" "LUBELSKIE" "LUBUSKIE" ...
## $ X1 : num 3.81 5.94 2.9 2.65 14.56 ...
## $ X2 : num 2.55 7.46 2.78 2.41 12.72 ...
## $ X3 : num 4.4 4.45 2.38 2.14 16.14 ...
## $ X4 : num 6.31 5.13 4.48 2.4 5.85 5.36 7.24 3.26 3.41 2.11 ...
## $ X5 : num 4.49 4.87 3.75 7.2 6.74 ...
## $ X6 : num 7.99 7 2.74 2.62 5.27 ...
## $ X7 : num 4.18 3.3 4.16 3.75 3.8 ...
województwa=dane[,1]
dane<-dane[,-1]row.names(dane)<-województwa
dane## X1 X2 X3 X4 X5 X6 X7
## DOLNOŚLĄSKIE 3.813 2.546 4.399 6.31 4.494 7.990 4.177
## KUJAWSKO-POMORSKIE 5.936 7.464 4.451 5.13 4.866 7.002 3.301
## LUBELSKIE 2.903 2.776 2.380 4.48 3.755 2.737 4.161
## LUBUSKIE 2.655 2.406 2.138 2.40 7.195 2.620 3.751
## ŁÓDZKIE 14.561 12.721 16.143 5.85 6.740 5.271 3.796
## MAŁOPOLSKIE 4.432 4.339 2.950 5.36 2.974 16.193 2.523
## MAZOWIECKIE 5.463 2.888 5.346 7.24 4.038 6.862 3.948
## OPOLSKIE 13.284 20.023 12.422 3.26 6.868 6.072 2.982
## PODKARPACKIE 2.254 2.154 1.311 3.41 4.489 3.922 2.571
## PODLASKIE 1.992 2.342 1.853 2.11 4.627 2.004 3.569
## POMORSKIE 3.247 2.262 2.938 4.83 3.783 7.135 4.198
## ŚLĄSKIE 9.376 34.142 8.317 23.61 7.400 25.602 5.682
## ŚWIĘTOKRZYSKIE 17.349 33.184 11.132 4.84 9.801 5.483 2.119
## WARMIŃSKO-MAZURSKIE 1.676 1.919 1.100 2.12 4.741 2.077 2.174
## WIELKOPOLSKIE 6.368 2.140 4.411 12.71 11.724 8.160 5.945
## ZACHODNIOPOMORSKIE 5.730 1.857 4.920 6.34 6.410 4.934 4.654
summary(dane)## X1 X2 X3 X4
## Min. : 1.676 Min. : 1.857 Min. : 1.100 Min. : 2.110
## 1st Qu.: 2.841 1st Qu.: 2.235 1st Qu.: 2.320 1st Qu.: 3.373
## Median : 4.947 Median : 2.661 Median : 4.405 Median : 4.985
## Mean : 6.315 Mean : 8.448 Mean : 5.388 Mean : 6.250
## 3rd Qu.: 7.120 3rd Qu.: 8.778 3rd Qu.: 6.089 3rd Qu.: 6.317
## Max. :17.349 Max. :34.142 Max. :16.143 Max. :23.610
## X5 X6 X7
## Min. : 2.974 Min. : 2.004 Min. :2.119
## 1st Qu.: 4.376 1st Qu.: 3.626 1st Qu.:2.879
## Median : 4.803 Median : 5.777 Median :3.773
## Mean : 5.869 Mean : 7.129 Mean :3.722
## 3rd Qu.: 6.950 3rd Qu.: 7.349 3rd Qu.:4.182
## Max. :11.724 Max. :25.602 Max. :5.945
library(laeken)## Warning: pakiet 'laeken' został zbudowany w wersji R 4.1.3
GINI<-gini(dane$X1)
GINI$value/100## [1] 0.3864801
pairs(dane)library(ggplot2)## Warning: pakiet 'ggplot2' został zbudowany w wersji R 4.1.3
library(gridExtra)
X1<-ggplot(dane, aes(x=X1)) +
geom_histogram(color="black", fill="grey", bins=5)
X2<-ggplot(dane, aes(x=X2)) +
geom_histogram(color="black", fill="grey", bins=5)
X3<-ggplot(dane, aes(x=X3)) +
geom_histogram(color="black", fill="grey", bins=5)
X4<-ggplot(dane, aes(x=X4)) +
geom_histogram(color="black", fill="grey", bins=5)
grid.arrange(X1, X2, X3, X4)X5<-ggplot(dane, aes(x=X5)) +
geom_histogram(color="black", fill="grey", bins=5)
X6<-ggplot(dane, aes(x=X6)) +
geom_histogram(color="black", fill="grey", bins=5)
X7<-ggplot(dane, aes(x=X7)) +
geom_histogram(color="black", fill="grey", bins=5)
grid.arrange( X5, X6, X7)boxplot(dane)kor=cor(dane)library(corrplot)## corrplot 0.92 loaded
corrplot(kor, method = "circle")corrplot(kor, method = "square")corrplot(kor, method = "ellipse")corrplot(kor, method = "number")corrplot(kor, method = "color")corrplot(kor, method = "pie") ## WYKRES KORELACJI MIESZANY
corrplot.mixed(kor,lower = "ellipse", upper = "number")dist(x = dane, method = "euclidean")^2## DOLNOŚLĄSKIE KUJAWSKO-POMORSKIE LUBELSKIE LUBUSKIE
## KUJAWSKO-POMORSKIE 31.970861
## LUBELSKIE 36.446647 56.052120
## LUBUSKIE 58.074562 73.979859 16.598757
## ŁÓDZKIE 369.765903 246.001200 441.573664 463.431437
## MAŁOPOLSKIE 78.935436 102.992971 190.237051 219.866316
## MAZOWIECKIE 6.133934 27.540423 40.121983 69.834074
## OPOLSKIE 479.559024 283.729991 529.757369 542.449180
## PODKARPACKIE 39.657774 64.732653 7.566827 12.338374
## PODLASKIE 63.699322 82.768973 8.561043 7.616194
## POMORSKIE 5.962920 38.665881 20.160953 39.142878
## ŚLĄSKIE 1664.787314 1438.055957 1965.338245 2072.332788
## ŚWIĘTOKRZYSKIE 1208.089191 864.553540 1258.324880 1267.695985
## WARMIŃSKO-MAZURSKIE 72.435986 94.725305 14.803943 11.155348
## WIELKOPOLSKIE 103.080629 141.353464 180.365128 181.336831
## ZACHODNIOPOMORSKIE 17.659672 41.656115 30.866573 39.806380
## ŁÓDZKIE MAŁOPOLSKIE MAZOWIECKIE OPOLSKIE
## KUJAWSKO-POMORSKIE
## LUBELSKIE
## LUBUSKIE
## ŁÓDZKIE
## MAŁOPOLSKIE 482.243283
## MAZOWIECKIE 307.823991 102.673860
## OPOLSKIE 76.824455 536.283102 430.252598
## PODKARPACKIE 497.452989 168.881700 52.529692 574.889944
## PODLASKIE 499.084689 226.861764 74.954632 575.068560
## POMORSKIE 425.189651 91.506668 17.111150 520.731496
## ŚLĄSKIE 1279.749499 1392.627172 1634.425226 1034.586074
## ŚWIĘTOKRZYSKIE 464.864528 1227.573683 1136.821459 203.590825
## WARMIŃSKO-MAZURSKIE 539.739699 229.877582 86.060756 613.124782
## WIELKOPOLSKIE 401.587012 217.542391 96.921063 557.831215
## ZACHODNIOPOMORSKIE 323.167519 155.798766 11.967730 457.133268
## PODKARPACKIE PODLASKIE POMORSKIE ŚLĄSKIE
## KUJAWSKO-POMORSKIE
## LUBELSKIE
## LUBUSKIE
## ŁÓDZKIE
## MAŁOPOLSKIE
## MAZOWIECKIE
## OPOLSKIE
## PODKARPACKIE
## PODLASKIE 6.781524
## POMORSKIE 19.130176 37.592188
## ŚLĄSKIE 2019.253706 2138.816654 1791.836116
## ŚWIĘTOKRZYSKIE 1320.075235 1321.595371 1265.445893 847.957549
## WARMIŃSKO-MAZURSKIE 5.723068 2.810244 43.905738 2184.318888
## WIELKOPOLSKIE 194.714937 232.002665 141.181769 1490.232413
## ZACHODNIOPOMORSKIE 42.833840 54.448472 24.491204 1794.610158
## ŚWIĘTOKRZYSKIE WARMIŃSKO-MAZURSKIE WIELKOPOLSKIE
## KUJAWSKO-POMORSKIE
## LUBELSKIE
## LUBUSKIE
## ŁÓDZKIE
## MAŁOPOLSKIE
## MAZOWIECKIE
## OPOLSKIE
## PODKARPACKIE
## PODLASKIE
## POMORSKIE
## ŚLĄSKIE
## ŚWIĘTOKRZYSKIE
## WARMIŃSKO-MAZURSKIE 1368.390039
## WIELKOPOLSKIE 1216.923572 245.160145
## ZACHODNIOPOMORSKIE 1175.447541 65.937970 81.635467
# dist(x = dane, method = "maximum")
# dist(x = dane, method = "manhattan")
# dist(x = dane, method = "minkowski")a_dend<-hclust(dist(x = dane, method = "euclidean")^2,method = "ward.D")a_dend$height## [1] 2.810244 5.962920 7.399647 11.967730 12.913434 14.458804
## [7] 23.732655 47.600651 76.824455 162.425241 187.391481 366.660746
## [13] 420.028750 1456.933260 4238.259587
plot(a_dend,hang=-1)odl<-dist(x = dane, method = "euclidean")^2
a_dend<-hclust(odl,method = "ward.D")
b_dend<-hclust(odl,method = "single")
c_dend<-hclust(odl,method = "complete")
d_dend<-hclust(odl,method = "average")
par(mfrow=c(2,2))
plot(a_dend,hang=-1)
plot(b_dend,hang=-1)
plot(c_dend,hang=-1)
plot(d_dend,hang=-1)#### metoda Warda
c1<-cophenetic(a_dend)
kof1<-cor(c1,odl)
### metoda najbliższego sąsiada
c2<-cophenetic(b_dend)
kof2<-cor(c2,odl)
### metoda najdalszego sąsiada
c3<-cophenetic(c_dend)
kof3<-cor(c3,odl)
#### metoda średnich
c4<-cophenetic(d_dend)
kof4<-cor(c4,odl)kofenetyczna<-cbind(kof1, kof2, kof3, kof4)
kofenetyczna## kof1 kof2 kof3 kof4
## [1,] 0.7446699 0.8868346 0.7606007 0.9057624
colnames(kofenetyczna)<-c("Ward","single","complete","average")
kofenetyczna## Ward single complete average
## [1,] 0.7446699 0.8868346 0.7606007 0.9057624
plot(d_dend, hang=-1)source("http://addictedtor.free.fr/packages/A2R/lastVersion/R/code.R")
A2Rplot(d_dend,k=3, boxes = TRUE, col.up="grey",
col.down = c("darkgrey","pink", "darkgreen"),
main="Dendogram")