friend network

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(igraph)                    # This is the package to analyze the network

Warning: package 'igraph' was built under R version 4.0.5


Attaching package: 'igraph'

The following objects are masked from 'package:lubridate':

    %--%, union

The following objects are masked from 'package:dplyr':

    as_data_frame, groups, union

The following objects are masked from 'package:purrr':

    compose, simplify

The following object is masked from 'package:tidyr':

    crossing

The following object is masked from 'package:tibble':

    as_data_frame

The following objects are masked from 'package:stats':

    decompose, spectrum

The following object is masked from 'package:base':

    union

library(visNetwork)                # Creates visualizations of the network
library(DT)
library(plotly)


Attaching package: 'plotly'

The following object is masked from 'package:igraph':

    groups

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout

library(broom)                     # includes some of our regression functions

students <- read_csv("friendship_nodes.csv")

Rows: 84 Columns: 2
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl (2): id, name

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

student_links <- read_csv("friendships.csv")

Rows: 6972 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl (4): nominator, nominated, score, expected_score

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

student_links %>% 
  datatable(rownames = F,
            caption = 'Nominator and Nominated Scores and Expected Scores')

The data table shown above shows scores for both the nominator and the nominated from a data set on closeness within friendships.

friendship_model <- lm(expected_score ~ score, data = student_links)
tidy(friendship_model)

# A tibble: 2 × 5
  term        estimate std.error statistic  p.value
  <chr>          <dbl>     <dbl>     <dbl>    <dbl>
1 (Intercept)   0.0987   0.00739      13.4 3.22e-40
2 score         0.940    0.00352     267.  0

glance(friendship_model)

# A tibble: 1 × 12
  r.squared adj.r.squared sigma statistic p.value    df logLik   AIC   BIC
      <dbl>         <dbl> <dbl>     <dbl>   <dbl> <dbl>  <dbl> <dbl> <dbl>
1     0.911         0.911 0.339    71178.       0     1 -2353. 4713. 4733.
# ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>

student_links %>% 
  plot_ly(x = ~score, y = ~expected_score) %>% 
  add_markers() %>% 
  add_lines(y = fitted(friendship_model))

The plot above shows how well one can predict expected scores versus the actual score of perceived closeness.

friends <- student_links %>% 
  filter(score > 3) %>% 
  select(nominator, nominated) %>% 
  rename(from = nominator) %>% 
  rename(to = nominated)

friendship_network <- graph_from_data_frame(friends, students, directed = T)

students %>% 
  mutate(named_friends = degree(friendship_network), mode = "out") %>% 
  mutate(named_by_others = degree((friendship_network), mode = "in"))%>% 
  mutate(popularity = named_by_others - named_friends) %>% 
  select(named_friends, named_by_others, popularity) %>% 
  datatable(rownames = F)

The dataset above shows a network object of popularity which was found by subtracting the amount of times they named other people by the amount of times they were named by others.

students %>% 
  mutate(named_friends = degree((friendship_network), mode = "out")) %>% 
  mutate(named_by_others = degree(friendship_network), mode = "in") %>% 
  plot_ly(x = ~named_friends) %>% 
  add_histogram() %>% 
  layout(title = "Amount of Friends Students Named")

The histogram shows the amount of times students named other students as friends.

reciprocity(friendship_network, mode = "ratio")

[1] 0.5205479

Students nominated each other on a reciprocal level 52% of the time.

bestfriends <- student_links %>% 
  filter(score > 4) %>% 
  select(nominator, nominated) %>% 
  rename(from = nominator) %>% 
  rename (to = nominated)

bestfriendship_network <- graph_from_data_frame(bestfriends, students, directed = T)

students %>% 
  mutate(group = membership(infomap.community(bestfriendship_network))) %>% 
  mutate(value = degree(bestfriendship_network)) %>% 
  visNetwork(friends, main = "Network of Best Friendships") %>% 
  visIgraphLayout(layout = "layout_nicely") %>% 
  visEdges(arrows = "to") %>% 
  visOptions(highlightNearest = T, nodesIdSelection = T)

The Network of Best Friendships show groups of people who similarly rated one another via color and number of best friendships based on size.