library(gsheet)
library(dplyr)
df = matrix(c(4.1, 4.1,
4.1, 2.7,
2.7, 4.1,
2.5, 2.5),
nrow = 4, ncol=2, byrow = TRUE)
df
## [,1] [,2]
## [1,] 4.1 4.1
## [2,] 4.1 2.7
## [3,] 2.7 4.1
## [4,] 2.5 2.5
plt = plot(df,
xlim = c(0,4.5),
ylim = c(0,4.5),
xlab = 'Econ',
ylab = 'Poli') +
arrows(0,0,df[1,1],df[1,2], length = 0.1, lwd=2)+
arrows(0,0,df[2,1],df[2,2], length = 0.1, col='red', lwd=2)+ arrows(0,0,df[3,1],df[3,2], length = 0.1, col='blue', lwd=2)+
arrows(0,0,df[4,1],df[4,2], length = 0.1, col='green', lwd = 3)
\[ cos(\theta) = \frac{\sum_{i=1}^n a_i\cdot b_i} {\sqrt{\sum_{i=1}^n a_i\cdot a_i}\cdot\sqrt{\sum_{i=1}^n b_i\cdot b_i}} \]
cosine <- function(x,y){
sum(x*y) / (sqrt(sum(x*x)) * sqrt(sum(y*y)))
}
cosine((c(0,1)),c(1,0))
## [1] 0
cosine(df[1,],df[2,])
## [1] 0.979457
cosine(df[1,],df[3,])
## [1] 0.979457
cosine(df[1,],df[4,])
## [1] 1
cosine(df[2,],df[3,])
## [1] 0.9186722
cosine(df[2,],df[4,])
## [1] 0.979457
cosine(df[3,],df[4,])
## [1] 0.979457
Matrix = as.matrix(df)
sim <- Matrix / sqrt(rowSums(Matrix * Matrix))
sim <- sim %*% t(sim)
sim
## [,1] [,2] [,3] [,4]
## [1,] 1.000000 0.9794570 0.9794570 1.000000
## [2,] 0.979457 1.0000000 0.9186722 0.979457
## [3,] 0.979457 0.9186722 1.0000000 0.979457
## [4,] 1.000000 0.9794570 0.9794570 1.000000
cor(df)
## [,1] [,2]
## [1,] 1.0000000 0.1365639
## [2,] 0.1365639 1.0000000
#data = read.csv(choose.files(), header=TRUE)
data = gsheet2tbl('https://docs.google.com/spreadsheets/d/1GHX-CtPwHbfwcX4wvs4m24WqfbwJ1ghVC7jgnwVWJ1U/edit?usp=sharing')
print(dim(data))
## [1] 6 10
data
## # A tibble: 6 x 10
## univ policy econ business law etc num tuition ratio1 ratio2
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 khu 0.4 0.3 0.2 0.05 0.05 120 300 10 20
## 2 yonsei 0.3 0.3 0.3 0.1 0 203 700 22 10
## 3 korea 0.4 0.4 0.2 0 0 80 400 17 25
## 4 hufs 0.5 0.3 0.1 0.1 0 45 300 20 20
## 5 hanyang 0.2 0.3 0.3 0.1 0.1 50 400 15 30
## 6 ewha 0.5 0.3 0.1 0.1 0 95 500 10 15
df = as.data.frame(t(data))
colnames(df) <- lapply(df[1, ], as.character)
df <- df[-1,]
for (i in 1:dim(df)[2]){
df[,i] = as.numeric(df[,i])
}
Matrix = as.matrix(t(df))
sim <- Matrix / sqrt(rowSums(Matrix * Matrix))
sim <- sim %*% t(sim)
sim
## khu yonsei korea hufs hanyang ewha
## khu 1.0000000 0.9940354 0.9833101 0.9728614 0.9674651 0.9809516
## yonsei 0.9940354 1.0000000 0.9952219 0.9891772 0.9857535 0.9953643
## korea 0.9833101 0.9952219 1.0000000 0.9985303 0.9972546 0.9992097
## hufs 0.9728614 0.9891772 0.9985303 1.0000000 0.9992560 0.9975280
## hanyang 0.9674651 0.9857535 0.9972546 0.9992560 1.0000000 0.9968427
## ewha 0.9809516 0.9953643 0.9992097 0.9975280 0.9968427 1.0000000
cor(df)
## khu yonsei korea hufs hanyang ewha
## khu 1.0000000 0.9935781 0.9802924 0.9675563 0.9621013 0.9790002
## yonsei 0.9935781 1.0000000 0.9941584 0.9868130 0.9829980 0.9947214
## korea 0.9802924 0.9941584 1.0000000 0.9982190 0.9968986 0.9993288
## hufs 0.9675563 0.9868130 0.9982190 1.0000000 0.9992354 0.9971981
## hanyang 0.9621013 0.9829980 0.9968986 0.9992354 1.0000000 0.9962173
## ewha 0.9790002 0.9947214 0.9993288 0.9971981 0.9962173 1.0000000