require(plyr)
## Loading required package: plyr
require(reshape2)
## Loading required package: reshape2
require(ggplot2)
## Loading required package: ggplot2
require(e1071)
## Loading required package: e1071
## Loading required package: class
require(rpart)
## Loading required package: rpart
require(partykit)
## Loading required package: partykit
## Loading required package: grid
require(party)
## Loading required package: party
## Loading required package: survival
## Loading required package: splines
## Loading required package: modeltools
## Loading required package: stats4
## Attaching package: 'modeltools'
## The following object(s) are masked from 'package:plyr':
##
## empty
## Loading required package: coin
## Loading required package: mvtnorm
## Loading required package: zoo
## Attaching package: 'zoo'
## The following object(s) are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: strucchange
## Loading required package: vcd
## Loading required package: MASS
## Loading required package: colorspace
## Attaching package: 'party'
## The following object(s) are masked from 'package:partykit':
##
## ctree, ctree_control, edge_simple, node_barplot, node_boxplot, node_inner,
## node_surv, node_terminal
require(cluster)
## Loading required package: cluster
require(pvclust)
## Loading required package: pvclust
users <- read.table("users.csv", header=T, sep=",",dec=".", quote="\"")
artists <- read.table("a_data.csv", header=T, sep=",",dec=".", quote="\"")
data <- read.table("data.csv", header=T, sep=",",dec=".", quote="\"")
data <- join(data, users, by="user_id")
genres <- dcast(melt(artists), genre ~ ., length, subset=.(variable=="max_similarity"))
## Using name, genre, mood, origin, era as id variables
names(genres) <- c("genre", "artists_count")
genres_users <- dcast(melt(artists), genre ~ ., sum, na.rm=T, subset=.(variable=="users"))
## Using name, genre, mood, origin, era as id variables
names(genres_users) <- c("genre", "users_count")
genres_tracks <- dcast(melt(artists), genre ~ ., sum, na.rm=T, subset=.(variable=="tracks"))
## Using name, genre, mood, origin, era as id variables
names(genres_tracks) <- c("genre", "tracks_count")
genres_data <- join(genres, genres_tracks)
## Joining by: genre
genres_data$avg_tracks_per_artist <- genres_data$tracks_count / genres_data$artists_count
genre_origins <- dcast(melt(artists), origin ~ genre, sum, na.rm=T, subset=.(variable=="users"))
## Using name, genre, mood, origin, era as id variables
genre_moods <- dcast(melt(artists), genre ~ mood, sum, na.rm=T, subset=.(variable=="users"))
## Using name, genre, mood, origin, era as id variables
eras <- dcast(melt(artists), era ~ ., length, subset=.(variable=="tracks"))
## Using name, genre, mood, origin, era as id variables
eras_genres <- dcast(melt(artists), era ~ genre, length, subset=.(variable=="tracks"))
## Using name, genre, mood, origin, era as id variables
eras_moods <- dcast(melt(artists), era ~ mood, sum, na.rm=T, subset=.(variable=="tracks"))
## Using name, genre, mood, origin, era as id variables
Жанры
genres_data
## genre artists_count tracks_count avg_tracks_per_artist
## 1 alternative & punk 3944 33524 8.500
## 2 classical 424 1580 3.726
## 3 electronica 10746 34003 3.164
## 4 jazz 568 1797 3.164
## 5 other 2297 9651 4.202
## 6 pop 5448 32653 5.994
## 7 rock 3291 22756 6.915
## 8 soundtrack 1049 2874 2.740
## 9 traditional 1011 3140 3.106
## 10 unknown 1417 12350 8.716
## 11 urban 4023 19692 4.895
Подготовка данных
users_passed <- subset(data, data$passed_threshold > 25)
users_passed$perc_threshold <- users_passed$passed_threshold/users_passed$total_tracks
users_passed <- subset(users_passed, users_passed$perc_threshold > 0.5)
users_passed$age <- 2013 - users_passed$byear
users_passed$music_per_friend <- users_passed$total_friends_music/users_passed$total_friends
users_passed$perc_threshold <- users_passed$passed_threshold/users_passed$total_tracks
all_genres = c("urban", "alternative...punk", "traditional", "classical", "electronica",
"unknown", "jazz", "pop", "soundtrack", "other", "rock")
all_genres <- paste("genre_", all_genres, sep = "")
p_genres <- paste("perc_", all_genres, sep = "")
users_passed[, p_genres] <- users_passed[, all_genres]/users_passed$passed_threshold
users_passed$genre_kurtosis <- apply(users_passed[, p_genres], 1, kurtosis)
users_passed$log_total_friends = log(users_passed$total_friends)
users_passed$log_total_tracks = log(users_passed$total_tracks)
users_2 <- users_passed
users_passed <- subset(users_passed, users_passed$total_friends > 0)
users_passed <- subset(users_passed, users_passed$age < 70)
# users_passed <- subset(users_passed, users_passed$total_tracks>0)
dim(users_passed)
## [1] 144 98
up <- users_passed
up$genre_iqr <- NULL
up$genre_IQR <- NULL
# up$genre_kurtosis <- NULL
r <- rpart(genre_kurtosis ~ ., data = up)
plotcp(r)
cp <- as.data.frame(printcp(r))
##
## Regression tree:
## rpart(formula = genre_kurtosis ~ ., data = up)
##
## Variables actually used in tree construction:
## [1] mood_sensual mood_unknown
## [3] perc_genre_alternative...punk perc_genre_other
## [5] perc_genre_pop perc_genre_rock
## [7] perc_genre_unknown perc_genre_urban
## [9] total_artists
##
## Root node error: 527/144 = 3.7
##
## n= 144
##
## CP nsplit rel error xerror xstd
## 1 0.208 0 1.00 1.01 0.077
## 2 0.081 1 0.79 0.96 0.094
## 3 0.071 2 0.71 0.96 0.107
## 4 0.061 3 0.64 1.00 0.110
## 5 0.043 4 0.58 0.93 0.106
## 6 0.040 5 0.54 0.94 0.107
## 7 0.034 6 0.50 0.94 0.107
## 8 0.030 7 0.46 0.91 0.108
## 9 0.028 8 0.43 0.90 0.111
## 10 0.025 9 0.40 0.89 0.109
## 11 0.025 10 0.38 0.89 0.108
## 12 0.010 11 0.35 0.90 0.109
rp <- prune(r, as.numeric(subset(cp, nsplit == 5, CP)[1]))
plot(as.party(rp))
genre.columns <- c(grep("perc", names(up), value = T), grep("genre", names(up),
value = T))
genre.columns <- setdiff(genre.columns, c("genre_kurtosis"))
non.genre.columns <- setdiff(names(up), genre.columns)
up <- up[, non.genre.columns]
r2 <- rpart(genre_kurtosis ~ ., data = up)
plotcp(r)
users_test <- join(users, data[, c("user_id", "total_friends", "total_tracks")],
by = "user_id")
tt <- rpart(total_friends ~ ., data = users_test)
tt <- rpart(total_tracks ~ ., data = users_test)
plot(users_test$total_tracks, users_test$total_friends)
students <- subset(users_test, users_test$uni_city != "")
students$uni_same_city <- ifelse(students$city == students$uni_city, 1, 0)
students$age <- 2013 - students$byear
students <- subset(students, students$age < 50)
Анализ линейными моделями
hist(users_passed$pdict_size, breaks = 31, col = "grey", main = "", xlab = "Заполнено пунктов анкеты",
ylab = "Частота")
hist(users_passed$age, breaks = 100, col = "grey", xlab = "Возраст",
ylab = "Частота", main = "")
m <- lm(pdict_size ~ age, data = users_passed)
summary(m)
##
## Call:
## lm(formula = pdict_size ~ age, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.86 -5.23 -4.05 2.73 23.38
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.966 3.628 1.09 0.28
## age 0.140 0.138 1.02 0.31
##
## Residual standard error: 8.29 on 142 degrees of freedom
## Multiple R-squared: 0.00721, Adjusted R-squared: 0.000221
## F-statistic: 1.03 on 1 and 142 DF, p-value: 0.312
plot(users_passed$pdict_size ~ users_passed$age, col = "black", main = "", xlab = "Возраст",
ylab = "Заполнено пунктов анкеты") + abline(m, col = "red")
## numeric(0)
m <- lm(log_total_tracks ~ pdict_size, data = users_passed)
summary(m)
##
## Call:
## lm(formula = log_total_tracks ~ pdict_size, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6324 -0.7653 -0.0749 0.6694 2.7830
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.21828 0.10937 47.71 <2e-16 ***
## pdict_size -0.01017 0.00975 -1.04 0.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.967 on 142 degrees of freedom
## Multiple R-squared: 0.00761, Adjusted R-squared: 0.000618
## F-statistic: 1.09 on 1 and 142 DF, p-value: 0.299
plot(users_passed$log_total_tracks ~ users_passed$pdict_size, col = "black",
main = "", xlab = "Заполнено пунктов анкеты", ylab = "Всего аудиозаписей (log)") +
abline(m, col = "red")
## numeric(0)
m <- lm(log_total_tracks ~ pdict_size, data = users_passed)
summary(m)
##
## Call:
## lm(formula = log_total_tracks ~ pdict_size, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6324 -0.7653 -0.0749 0.6694 2.7830
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.21828 0.10937 47.71 <2e-16 ***
## pdict_size -0.01017 0.00975 -1.04 0.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.967 on 142 degrees of freedom
## Multiple R-squared: 0.00761, Adjusted R-squared: 0.000618
## F-statistic: 1.09 on 1 and 142 DF, p-value: 0.299
plot(users_passed$log_total_friends ~ users_passed$pdict_size, col = "black",
main = "", xlab = "Заполнено пунктов анкеты", ylab = "Всего друзей (log)") +
abline(m, col = "red")
## numeric(0)
m <- lm(log_total_tracks ~ log_total_friends, data = users_passed)
summary(m)
##
## Call:
## lm(formula = log_total_tracks ~ log_total_friends, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.657 -0.773 -0.104 0.625 2.582
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.8381 0.4310 8.91 2.3e-15 ***
## log_total_friends 0.2627 0.0854 3.07 0.0025 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.94 on 142 degrees of freedom
## Multiple R-squared: 0.0624, Adjusted R-squared: 0.0558
## F-statistic: 9.45 on 1 and 142 DF, p-value: 0.00253
plot(users_passed$log_total_friends ~ users_passed$log_total_tracks, col = "black",
main = "", xlab = "Всего аудиозаписей (log)", ylab = "Всего друзей (log)") +
abline(m, col = "red")
## numeric(0)
m <- lm(log_total_tracks ~ age, data = users_passed)
summary(m)
##
## Call:
## lm(formula = log_total_tracks ~ age, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5091 -0.8117 0.0146 0.5852 2.5808
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.4849 0.4089 15.86 <2e-16 ***
## age -0.0521 0.0156 -3.35 0.001 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.935 on 142 degrees of freedom
## Multiple R-squared: 0.0731, Adjusted R-squared: 0.0666
## F-statistic: 11.2 on 1 and 142 DF, p-value: 0.00104
plot(users_passed$log_total_tracks ~ users_passed$age, col = "black", main = "",
xlab = "Возраст", ylab = "Всего аудиозаписей (log)") +
abline(m, col = "red")
## numeric(0)
m <- lm(log_total_friends ~ age, data = users_passed)
summary(m)
##
## Call:
## lm(formula = log_total_friends ~ age, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.568 -0.458 -0.010 0.455 2.339
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.3602 0.3858 16.5 < 2e-16 ***
## age -0.0543 0.0147 -3.7 0.00031 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.882 on 142 degrees of freedom
## Multiple R-squared: 0.0878, Adjusted R-squared: 0.0813
## F-statistic: 13.7 on 1 and 142 DF, p-value: 0.000312
plot(users_passed$log_total_friends ~ users_passed$age, col = "black", main = "",
xlab = "Возраст", ylab = "Всего друзей (log)") + abline(m,
col = "red")
## numeric(0)
m <- lm(log(music_per_friend) ~ log_total_tracks, data = users_passed)
## Error: NA/NaN/Inf in 'y'
summary(m)
##
## Call:
## lm(formula = log_total_friends ~ age, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.568 -0.458 -0.010 0.455 2.339
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.3602 0.3858 16.5 < 2e-16 ***
## age -0.0543 0.0147 -3.7 0.00031 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.882 on 142 degrees of freedom
## Multiple R-squared: 0.0878, Adjusted R-squared: 0.0813
## F-statistic: 13.7 on 1 and 142 DF, p-value: 0.000312
plot(log(users_passed$music_per_friend) ~ users_passed$log_total_tracks, col = "black",
main = "", xlab = "Всего аудиозаписей (log)", ylab = "Аудиозаписей у друзей (среднее, log)") +
abline(m, col = "red")
## numeric(0)
dim(users_passed)
## [1] 144 98
Кластеризация
m <- lm(net_average_clustering ~ log_total_friends, data = users_passed)
summary(m)
##
## Call:
## lm(formula = net_average_clustering ~ log_total_friends, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.3900 -0.0547 0.0121 0.0684 0.3161
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3900 0.0515 7.57 4.4e-12 ***
## log_total_friends 0.0149 0.0102 1.46 0.15
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.112 on 142 degrees of freedom
## Multiple R-squared: 0.0149, Adjusted R-squared: 0.00792
## F-statistic: 2.14 on 1 and 142 DF, p-value: 0.146
plot(users_passed$net_average_clustering ~ users_passed$log_total_friends, col = "black",
main = "", xlab = "Всего друзей (log)", ylab = "Средний коэффициент кластеризации") +
abline(m, col = "red")
## numeric(0)
m <- lm(net_average_clustering ~ log_total_tracks, data = users_passed)
summary(m)
##
## Call:
## lm(formula = net_average_clustering ~ log_total_tracks, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4656 -0.0532 0.0190 0.0703 0.3057
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.53069 0.05089 10.43 <2e-16 ***
## log_total_tracks -0.01295 0.00973 -1.33 0.19
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.113 on 142 degrees of freedom
## Multiple R-squared: 0.0123, Adjusted R-squared: 0.00537
## F-statistic: 1.77 on 1 and 142 DF, p-value: 0.185
plot(users_passed$net_average_clustering ~ users_passed$log_total_tracks, col = "black",
main = "", xlab = "Всего аудиозаписей (log)", ylab = "Средний коэффициент кластеризации") +
abline(m, col = "red")
## numeric(0)
m <- lm(net_average_clustering ~ pdict_size, data = users_passed)
summary(m)
##
## Call:
## lm(formula = net_average_clustering ~ pdict_size, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4617 -0.0614 0.0172 0.0670 0.2972
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.460881 0.012802 36.00 <2e-16 ***
## pdict_size 0.000426 0.001141 0.37 0.71
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.113 on 142 degrees of freedom
## Multiple R-squared: 0.000981, Adjusted R-squared: -0.00605
## F-statistic: 0.139 on 1 and 142 DF, p-value: 0.709
plot(users_passed$net_average_clustering ~ users_passed$pdict_size, col = "black",
main = "", xlab = "Заполнено пунктов анкеты", ylab = "Средний коэффициент кластеризации") +
abline(m, col = "red")
## numeric(0)
m <- lm(net_average_clustering ~ age, data = users_passed)
summary(m)
##
## Call:
## lm(formula = net_average_clustering ~ age, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4390 -0.0485 0.0162 0.0733 0.3089
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.55347 0.04896 11.31 <2e-16 ***
## age -0.00347 0.00186 -1.86 0.065 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.112 on 142 degrees of freedom
## Multiple R-squared: 0.0238, Adjusted R-squared: 0.0169
## F-statistic: 3.46 on 1 and 142 DF, p-value: 0.065
plot(users_passed$net_average_clustering ~ users_passed$age, col = "black",
main = "", xlab = "Возраст", ylab = "Средний коэффициент кластеризации") +
abline(m, col = "red")
## numeric(0)
Коэф эксцесса для жанров
m <- lm(genre_kurtosis ~ age, data = users_passed)
summary(m)
##
## Call:
## lm(formula = genre_kurtosis ~ age, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.713 -1.657 -0.565 1.807 3.735
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.133 0.841 0.16 0.87
## age 0.027 0.032 0.84 0.40
##
## Residual standard error: 1.92 on 142 degrees of freedom
## Multiple R-squared: 0.00499, Adjusted R-squared: -0.00202
## F-statistic: 0.712 on 1 and 142 DF, p-value: 0.4
plot(users_passed$genre_kurtosis ~ users_passed$age, col = "black", main = "",
xlab = "Возраст", ylab = "Коэффициент эксцесса (жанровое распределение)") +
abline(m, col = "red")
## numeric(0)
m <- lm(genre_kurtosis ~ log_total_tracks, data = users_passed)
summary(m)
##
## Call:
## lm(formula = genre_kurtosis ~ log_total_tracks, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.73 -1.60 -0.57 1.85 3.68
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.9144 0.8709 1.05 0.30
## log_total_tracks -0.0166 0.1665 -0.10 0.92
##
## Residual standard error: 1.93 on 142 degrees of freedom
## Multiple R-squared: 7.04e-05, Adjusted R-squared: -0.00697
## F-statistic: 0.01 on 1 and 142 DF, p-value: 0.92
plot(users_passed$genre_kurtosis ~ users_passed$log_total_tracks, col = "black",
main = "", xlab = "Всего аудиозаписей (log)", ylab = "Коэффициент эксцесса (жанровое распределение)") +
abline(m, col = "red")
## numeric(0)
m <- lm(genre_kurtosis ~ log_total_friends, data = users_passed)
summary(m)
##
## Call:
## lm(formula = genre_kurtosis ~ log_total_friends, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.730 -1.627 -0.559 1.804 3.880
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.548 0.881 1.76 0.081 .
## log_total_friends -0.145 0.175 -0.83 0.408
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.92 on 142 degrees of freedom
## Multiple R-squared: 0.00483, Adjusted R-squared: -0.00218
## F-statistic: 0.689 on 1 and 142 DF, p-value: 0.408
plot(users_passed$genre_kurtosis ~ users_passed$log_total_friends, col = "black",
main = "", xlab = "Всего друзей (log)", ylab = "Коэффициент эксцесса (жанровое распределение)") +
abline(m, col = "red")
## numeric(0)
m <- lm(genre_kurtosis ~ net_average_clustering, data = users_passed)
summary(m)
##
## Call:
## lm(formula = genre_kurtosis ~ net_average_clustering, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.74 -1.61 -0.57 1.85 3.68
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.770 0.682 1.13 0.26
## net_average_clustering 0.127 1.427 0.09 0.93
##
## Residual standard error: 1.93 on 142 degrees of freedom
## Multiple R-squared: 5.58e-05, Adjusted R-squared: -0.00699
## F-statistic: 0.00792 on 1 and 142 DF, p-value: 0.929
plot(users_passed$genre_kurtosis ~ users_passed$net_average_clustering, col = "black",
main = "", xlab = "Коэффициент кластеризации", ylab = "Коэффициент эксцесса (жанровое распределение)") +
abline(m, col = "red")
## numeric(0)
Жанры
POP
m <- lm(perc_genre_pop ~ net_average_clustering, data = users_passed)
summary(m)
##
## Call:
## lm(formula = perc_genre_pop ~ net_average_clustering, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2090 -0.1244 -0.0396 0.0902 0.6332
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1574 0.0549 2.87 0.0047 **
## net_average_clustering 0.0821 0.1149 0.71 0.4759
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.155 on 142 degrees of freedom
## Multiple R-squared: 0.00359, Adjusted R-squared: -0.00343
## F-statistic: 0.511 on 1 and 142 DF, p-value: 0.476
plot(users_passed$perc_genre_pop ~ users_passed$net_average_clustering, col = "black",
main = "", xlab = "Коэффициент кластеризации", ylab = "содержание жанра POP") +
abline(m, col = "red")
## numeric(0)
m <- lm(perc_genre_pop ~ log_total_friends, data = users_passed)
summary(m)
##
## Call:
## lm(formula = perc_genre_pop ~ log_total_friends, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2176 -0.1266 -0.0395 0.0901 0.6497
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.14707 0.07109 2.07 0.04 *
## log_total_friends 0.00977 0.01409 0.69 0.49
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.155 on 142 degrees of freedom
## Multiple R-squared: 0.00337, Adjusted R-squared: -0.00365
## F-statistic: 0.48 on 1 and 142 DF, p-value: 0.489
plot(users_passed$perc_genre_pop ~ users_passed$log_total_friends, col = "black",
main = "", xlab = "Всего друзей (log)", ylab = "содержание жанра POP") +
abline(m, col = "red")
## numeric(0)
m <- lm(perc_genre_pop ~ age, data = users_passed)
summary(m)
##
## Call:
## lm(formula = perc_genre_pop ~ age, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2381 -0.1247 -0.0409 0.1048 0.6378
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.09962 0.06746 1.48 0.14
## age 0.00372 0.00257 1.45 0.15
##
## Residual standard error: 0.154 on 142 degrees of freedom
## Multiple R-squared: 0.0146, Adjusted R-squared: 0.00761
## F-statistic: 2.1 on 1 and 142 DF, p-value: 0.15
plot(users_passed$perc_genre_pop ~ users_passed$age, col = "black", main = "",
xlab = "Возраст", ylab = "содержание жанра POP") +
abline(m, col = "red")
## numeric(0)
ROCK
m <- lm(perc_genre_rock ~ net_average_clustering, data = users_passed)
summary(m)
##
## Call:
## lm(formula = perc_genre_rock ~ net_average_clustering, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.1564 -0.1047 -0.0415 0.0434 0.5167
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1752 0.0511 3.43 0.0008 ***
## net_average_clustering -0.0467 0.1071 -0.44 0.6633
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.145 on 142 degrees of freedom
## Multiple R-squared: 0.00134, Adjusted R-squared: -0.00569
## F-statistic: 0.19 on 1 and 142 DF, p-value: 0.663
plot(users_passed$perc_genre_rock ~ users_passed$net_average_clustering, col = "black",
main = "", xlab = "Коэффициент кластеризации", ylab = "содержание жанра ROCK") +
abline(m, col = "red")
## numeric(0)
m <- lm(perc_genre_rock ~ log_total_friends, data = users_passed)
summary(m)
##
## Call:
## lm(formula = perc_genre_rock ~ log_total_friends, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.1875 -0.1022 -0.0375 0.0464 0.5401
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2687 0.0656 4.10 7e-05 ***
## log_total_friends -0.0232 0.0130 -1.79 0.076 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.143 on 142 degrees of freedom
## Multiple R-squared: 0.022, Adjusted R-squared: 0.0151
## F-statistic: 3.19 on 1 and 142 DF, p-value: 0.0761
plot(users_passed$perc_genre_rock ~ users_passed$log_total_friends, col = "black",
main = "", xlab = "Всего друзей (log)", ylab = "содержание жанра ROCK") +
abline(m, col = "red")
## numeric(0)
m <- lm(perc_genre_rock ~ age, data = users_passed)
summary(m)
##
## Call:
## lm(formula = perc_genre_rock ~ age, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2114 -0.1065 -0.0372 0.0565 0.5037
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.00957 0.06206 0.15 0.88
## age 0.00559 0.00236 2.36 0.02 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.142 on 142 degrees of freedom
## Multiple R-squared: 0.0378, Adjusted R-squared: 0.031
## F-statistic: 5.58 on 1 and 142 DF, p-value: 0.0195
plot(users_passed$perc_genre_rock ~ users_passed$age, col = "black", main = "",
xlab = "Возраст", ylab = "содержание жанра ROCK") +
abline(m, col = "red")
## numeric(0)
CLASSIC
m <- lm(perc_genre_classical ~ net_average_clustering, data = users_passed)
summary(m)
##
## Call:
## lm(formula = perc_genre_classical ~ net_average_clustering, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.00780 -0.00721 -0.00696 -0.00047 0.09121
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.00605 0.00566 1.07 0.29
## net_average_clustering 0.00230 0.01184 0.19 0.85
##
## Residual standard error: 0.016 on 142 degrees of freedom
## Multiple R-squared: 0.000265, Adjusted R-squared: -0.00678
## F-statistic: 0.0377 on 1 and 142 DF, p-value: 0.846
plot(users_passed$perc_genre_classical ~ users_passed$net_average_clustering,
col = "black", main = "", xlab = "Коэффициент кластеризации",
ylab = "содержание жанра CLASSICAL") + abline(m, col = "red")
## numeric(0)
m <- lm(perc_genre_classical ~ log_total_friends, data = users_passed)
summary(m)
##
## Call:
## lm(formula = perc_genre_classical ~ log_total_friends, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.01174 -0.00735 -0.00672 -0.00028 0.09059
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.01174 0.00732 1.60 0.11
## log_total_friends -0.00093 0.00145 -0.64 0.52
##
## Residual standard error: 0.016 on 142 degrees of freedom
## Multiple R-squared: 0.00289, Adjusted R-squared: -0.00413
## F-statistic: 0.411 on 1 and 142 DF, p-value: 0.522
plot(users_passed$perc_genre_classical ~ users_passed$log_total_friends, col = "black",
main = "", xlab = "Всего друзей (log)", ylab = "содержание жанра CLASSICAL") +
abline(m, col = "red")
## numeric(0)
m <- lm(perc_genre_classical ~ age, data = users_passed)
summary(m)
##
## Call:
## lm(formula = perc_genre_classical ~ age, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.01763 -0.00726 -0.00482 0.00128 0.08805
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.008603 0.006863 -1.25 0.212
## age 0.000610 0.000261 2.33 0.021 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0157 on 142 degrees of freedom
## Multiple R-squared: 0.0369, Adjusted R-squared: 0.0302
## F-statistic: 5.45 on 1 and 142 DF, p-value: 0.021
plot(users_passed$perc_genre_classical ~ users_passed$age, col = "black", main = "",
xlab = "Возраст", ylab = "содержание жанра CLASSICAL") +
abline(m, col = "red")
## numeric(0)
URBAN
m <- lm(perc_genre_urban ~ net_average_clustering, data = users_passed)
summary(m)
##
## Call:
## lm(formula = perc_genre_urban ~ net_average_clustering, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.1361 -0.0633 -0.0198 0.0366 0.4213
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1556 0.0312 4.99 1.7e-06 ***
## net_average_clustering -0.1380 0.0653 -2.11 0.036 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0881 on 142 degrees of freedom
## Multiple R-squared: 0.0305, Adjusted R-squared: 0.0237
## F-statistic: 4.47 on 1 and 142 DF, p-value: 0.0363
plot(users_passed$perc_genre_urban ~ users_passed$net_average_clustering, col = "black",
main = "", xlab = "Коэффициент кластеризации", ylab = "содержание жанра URBAN") +
abline(m, col = "red")
## numeric(0)
m <- lm(perc_genre_urban ~ log_total_friends, data = users_passed)
summary(m)
##
## Call:
## lm(formula = perc_genre_urban ~ log_total_friends, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0965 -0.0634 -0.0272 0.0385 0.4548
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.03648 0.04075 0.90 0.37
## log_total_friends 0.01110 0.00808 1.37 0.17
##
## Residual standard error: 0.0889 on 142 degrees of freedom
## Multiple R-squared: 0.0131, Adjusted R-squared: 0.00617
## F-statistic: 1.89 on 1 and 142 DF, p-value: 0.172
plot(users_passed$perc_genre_urban ~ users_passed$log_total_friends, col = "black",
main = "", xlab = "Всего друзей (log)", ylab = "содержание жанра URBAN") +
abline(m, col = "red")
## numeric(0)
m <- lm(perc_genre_urban ~ age, data = users_passed)
summary(m)
##
## Call:
## lm(formula = perc_genre_urban ~ age, data = users_passed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.1111 -0.0608 -0.0274 0.0401 0.4443
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.16584 0.03863 4.29 3.2e-05 ***
## age -0.00288 0.00147 -1.96 0.052 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0883 on 142 degrees of freedom
## Multiple R-squared: 0.0263, Adjusted R-squared: 0.0195
## F-statistic: 3.84 on 1 and 142 DF, p-value: 0.052
plot(users_passed$perc_genre_urban ~ users_passed$age, col = "black", main = "",
xlab = "Возраст", ylab = "содержание жанра URBAN") +
abline(m, col = "red")
## numeric(0)