Lab1_ClaytonSmith

EDCI 6306: Lab 1 Independent Analysis

Author

Clayton Smith

Published

February 8, 2026

Research Question: Are high-achieving girls more central in the network compared to low-achieving girls?

For this lab, I will be using data from the case study.

Package Loading

library(tidyverse)
library(tidygraph)
library(readxl)
library(ggraph)

Importing student data

student_friends <- read_excel("data/student-reported-friends.xlsx", 
                              col_names = FALSE)

Double checking import

student_friends

# A tibble: 27 × 27
    ...1  ...2  ...3  ...4  ...5  ...6  ...7  ...8  ...9 ...10 ...11 ...12 ...13
   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1     0     1     0     1     1     1     1     1     1     0     0     1     0
 2     1     0     0     0     1     0     0     0     0     1     1     0     0
 3     1     0     0     1     0     0     0     1     0     1     0     0     0
 4     1     0     0     0     0     0     0     0     0     0     0     1     0
 5     1     1     0     1     0     1     1     1     1     0     1     1     1
 6     1     0     0     0     1     0     0     0     1     0     1     1     1
 7     1     0     1     1     0     0     0     0     1     0     0     0     1
 8     1     0     1     1     1     0     1     0     1     1     1     0     1
 9     1     0     0     0     0     1     1     0     0     0     1     0     1
10     1     1     1     1     1     0     1     1     0     0     1     1     1
# ℹ 17 more rows
# ℹ 14 more variables: ...14 <dbl>, ...15 <dbl>, ...16 <dbl>, ...17 <dbl>,
#   ...18 <dbl>, ...19 <dbl>, ...20 <dbl>, ...21 <dbl>, ...22 <dbl>,
#   ...23 <dbl>, ...24 <dbl>, ...25 <dbl>, ...26 <dbl>, ...27 <dbl>

Assigning names

rownames(student_friends) <- 1:27

colnames(student_friends) <- 1:27

Assigning attributes

student_attributes <- read_excel("data/student-attributes.xlsx")

student_attributes

# A tibble: 27 × 5
      id gender achievement gender_num achievement_num
   <dbl> <chr>  <chr>            <dbl>           <dbl>
 1     1 female high                 1               1
 2     2 male   average              0               2
 3     3 female average              1               2
 4     4 male   high                 0               1
 5     5 female average              1               2
 6     6 female average              1               2
 7     7 female high                 1               1
 8     8 female average              1               2
 9     9 female high                 1               1
10    10 male   low                  0               3
# ℹ 17 more rows

Importing teacher data

teacher_friends <- read_excel("data/teacher-reported-friends.xlsx", 
                              col_names = FALSE)

rownames(teacher_friends) <- 1:27

colnames(teacher_friends) <- 1:27

teacher_friends

# A tibble: 27 × 27
     `1`   `2`   `3`   `4`   `5`   `6`   `7`   `8`   `9`  `10`  `11`  `12`  `13`
 * <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1     0     0     0     1     0     0     0     0     0     0     0     1     0
 2     0     0     1     0     0     0     0     1     0     1     0     0     0
 3     0     1     0     0     0     0     0     1     0     1     0     0     0
 4     0     0     0     0     0     0     0     0     0     0     0     0     0
 5     1     0     0     0     0     0     0     0     0     0     0     1     1
 6     0     0     0     0     0     0     0     0     0     0     0     0     1
 7     0     0     0     0     0     0     0     0     0     0     0     0     1
 8     0     1     1     0     0     0     0     0     0     1     0     0     0
 9     0     0     0     0     0     0     0     0     0     0     1     0     0
10     0     1     1     0     0     0     0     1     0     0     0     0     0
# ℹ 17 more rows
# ℹ 14 more variables: `14` <dbl>, `15` <dbl>, `16` <dbl>, `17` <dbl>,
#   `18` <dbl>, `19` <dbl>, `20` <dbl>, `21` <dbl>, `22` <dbl>, `23` <dbl>,
#   `24` <dbl>, `25` <dbl>, `26` <dbl>, `27` <dbl>

Double checking import

student_friends

# A tibble: 27 × 27
     `1`   `2`   `3`   `4`   `5`   `6`   `7`   `8`   `9`  `10`  `11`  `12`  `13`
 * <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1     0     1     0     1     1     1     1     1     1     0     0     1     0
 2     1     0     0     0     1     0     0     0     0     1     1     0     0
 3     1     0     0     1     0     0     0     1     0     1     0     0     0
 4     1     0     0     0     0     0     0     0     0     0     0     1     0
 5     1     1     0     1     0     1     1     1     1     0     1     1     1
 6     1     0     0     0     1     0     0     0     1     0     1     1     1
 7     1     0     1     1     0     0     0     0     1     0     0     0     1
 8     1     0     1     1     1     0     1     0     1     1     1     0     1
 9     1     0     0     0     0     1     1     0     0     0     1     0     1
10     1     1     1     1     1     0     1     1     0     0     1     1     1
# ℹ 17 more rows
# ℹ 14 more variables: `14` <dbl>, `15` <dbl>, `16` <dbl>, `17` <dbl>,
#   `18` <dbl>, `19` <dbl>, `20` <dbl>, `21` <dbl>, `22` <dbl>, `23` <dbl>,
#   `24` <dbl>, `25` <dbl>, `26` <dbl>, `27` <dbl>

Matrix conversion

student_matrix <- as.matrix(student_friends)

Network conversion

student_network <- as_tbl_graph(student_matrix, 
                                directed = TRUE)

Extracting edge list via pipe function

student_edges <- student_network |>
  activate(edges) |>
  as_tibble()

student_edges

# A tibble: 203 × 3
    from    to weight
   <int> <int>  <dbl>
 1     1     2      1
 2     1     4      1
 3     1     5      1
 4     1     6      1
 5     1     7      1
 6     1     8      1
 7     1     9      1
 8     1    12      1
 9     1    17      1
10     1    21      1
# ℹ 193 more rows

Saving edges

write_csv(student_edges, "data/student-edges.csv")

Edge list creation

student_network <- tbl_graph(edges = student_edges, # specifies edges
                             nodes = student_attributes, # specifies nodes
                             directed = TRUE) # specifies directionality

student_network

# A tbl_graph: 27 nodes and 203 edges
#
# A directed simple graph with 2 components
#
# Node Data: 27 × 5 (active)
      id gender achievement gender_num achievement_num
   <dbl> <chr>  <chr>            <dbl>           <dbl>
 1     1 female high                 1               1
 2     2 male   average              0               2
 3     3 female average              1               2
 4     4 male   high                 0               1
 5     5 female average              1               2
 6     6 female average              1               2
 7     7 female high                 1               1
 8     8 female average              1               2
 9     9 female high                 1               1
10    10 male   low                  0               3
# ℹ 17 more rows
#
# Edge Data: 203 × 3
   from    to weight
  <int> <int>  <dbl>
1     1     2      1
2     1     4      1
3     1     5      1
# ℹ 200 more rows

Teacher network creation

teacher_matrix <- as.matrix(teacher_friends)

teacher_network <- as_tbl_graph(teacher_matrix,
                                directed = TRUE)

teacher_edges <- teacher_network |>
  activate(edges) |>
  as_tibble()

teacher_network <- tbl_graph(edges = teacher_edges, 
                             nodes = student_attributes, 
                             directed = TRUE)

teacher_network

# A tbl_graph: 27 nodes and 69 edges
#
# A directed simple graph with 6 components
#
# Node Data: 27 × 5 (active)
      id gender achievement gender_num achievement_num
   <dbl> <chr>  <chr>            <dbl>           <dbl>
 1     1 female high                 1               1
 2     2 male   average              0               2
 3     3 female average              1               2
 4     4 male   high                 0               1
 5     5 female average              1               2
 6     6 female average              1               2
 7     7 female high                 1               1
 8     8 female average              1               2
 9     9 female high                 1               1
10    10 male   low                  0               3
# ℹ 17 more rows
#
# Edge Data: 69 × 3
   from    to weight
  <int> <int>  <dbl>
1     1     4      1
2     1    12      1
3     1    27      1
# ℹ 66 more rows

Trial run from GPT on spreading the teacher data out based on code from the case study

set.seed(42)

ggraph(teacher_network, layout = "kk") + 
  geom_edge_link(
    arrow = arrow(length = unit(1.5, "mm")),
    end_cap = circle(2, "mm"),
    start_cap = circle(2, "mm"),
    alpha = 0.08,
    width = 0.25,
    colour = "grey65"
  ) +
  geom_node_point(aes(size = local_size(), color = gender), alpha = 0.95) +
  geom_node_text(aes(label = id), repel = TRUE, size = 3) +
  scale_size_continuous(range = c(2, 7), name = "Local size") +
  scale_color_manual(
    name = "Gender",
    breaks = c("male", "female"),
    values = c(male = "royalblue", female = "pink")
  ) +
  theme_graph() +
  theme(legend.position = "bottom")

Darkening the arrows by fixing alpha and width

set.seed(42)

ggraph(teacher_network, layout = "kk") + 
  geom_edge_link(
    arrow = arrow(length = unit(1.5, "mm")),
    end_cap = circle(2, "mm"),
    start_cap = circle(2, "mm"),
    alpha = 0.25,
    width = 0.5,
    colour = "grey35"
  ) +
  geom_node_point(aes(size = local_size(), color = gender), alpha = 0.95) +
  geom_node_text(aes(label = id), repel = TRUE, size = 3) +
  scale_size_continuous(range = c(2, 7), name = "Local size") +
  scale_color_manual(
    name = "Gender",
    breaks = c("male", "female"),
    values = c(male = "royalblue", female = "pink")
  ) +
  theme_graph() +
  theme(legend.position = "top")

Not bad. Now I want to isolate achievement colors based on gender. GPT suggests using “case”. Let’s see what happens. The case_when code was directly imported from GPT. By only coloring high and low achieving girls, I can make the crowded scatterplot feel easier to read.

set.seed(42)
teacher_plot <- ggraph(teacher_network, layout = "stress") + 
  geom_edge_link(
    arrow = arrow(length = unit(1.5, "mm")),
    end_cap = circle(2, "mm"),
    start_cap = circle(2, "mm"),
    alpha = 0.25,
    width = 0.5,
    colour = "grey35"
  ) +
  geom_node_point(
    aes(
      size = local_size(),
      color = dplyr::case_when(
        gender == "male" ~ "boys",
        gender == "female" & achievement == "low" ~ "girls_low",
        gender == "female" & achievement == "average" ~ "girls_average",
        gender == "female" & achievement == "high" ~ "girls_high",
        TRUE ~ "other"
      )
    ),
    alpha = 0.95
  ) +
  geom_node_text(aes(label = id), repel = TRUE, size = 3) +
  scale_size_continuous(range = c(2, 7), name = "Local size") +
  scale_color_manual(
    name = "Group",
    values = c(
      boys = "grey70",
      girls_low = "red",
      girls_average = "grey70",
      girls_high = "green",
      other = "grey70"
    ),
    breaks = c("girls_low", "girls_average", "girls_high", "boys")
  ) +
  theme_graph() +
  theme(legend.position = "top")

Inspecting after naming

teacher_plot

I am a fan of this sociogram. I believe this is the one I will stick with. I learned a ton during this lab. For example: set.seed (42) helped tremendously on spacing.

Now, to run the same graph for my student network.

set.seed(42)
 student_plot <- ggraph(student_network, layout = "stress") + 
  geom_edge_link(
    arrow = arrow(length = unit(1.5, "mm")),
    end_cap = circle(2, "mm"),
    start_cap = circle(2, "mm"),
    alpha = 0.25,
    width = 0.5,
    colour = "grey35"
  ) +
  geom_node_point(
    aes(
      size = local_size(),
      color = dplyr::case_when(
        gender == "male" ~ "boys",
        gender == "female" & achievement == "low" ~ "girls_low",
        gender == "female" & achievement == "average" ~ "girls_average",
        gender == "female" & achievement == "high" ~ "girls_high",
        TRUE ~ "other"
      )
    ),
    alpha = 0.95
  ) +
  geom_node_text(aes(label = id), repel = TRUE, size = 3) +
  scale_size_continuous(range = c(2, 7), name = "Local size") +
  scale_color_manual(
    name = "Group",
    values = c(
      boys = "grey70",
      girls_low = "red",
      girls_average = "grey70",
      girls_high = "green",
      other = "grey70"
    ),
    breaks = c("girls_low", "girls_average", "girls_high", "boys")
  ) +
  theme_graph() +
  theme(legend.position = "top")

Inspecting after naming

student_plot

Interesting… this is the opposite of the teacher perceptions. The high achieving girls are more central and the lower achieving girls are more peripheral.

Realistically, the teacher sociogram is irrelevant. It would be like polling people on who they thought their family voted for and using that as exit poll data on election night. So, based on the student sociogram, for this specific class with this specific data set, I would conclude that higher-achieving girls have more interactions than lower achieving girls.

Saving plots

ggsave("TeacherPlot_Lab1NAC.jpeg", plot = teacher_plot, width = 8, height = 6, dpi = 300)

ggsave("StudentPlot_Lab1NAC.jpeg", plot = student_plot, width = 8, height = 6, dpi = 300)