library(tidyverse)
library(tidygraph)
library(readxl)
library(ggraph)Lab1_ClaytonSmith
EDCI 6306: Lab 1 Independent Analysis
Research Question: Are high-achieving girls more central in the network compared to low-achieving girls?
For this lab, I will be using data from the case study.
Package Loading
Importing student data
student_friends <- read_excel("data/student-reported-friends.xlsx",
col_names = FALSE)Double checking import
student_friends# A tibble: 27 × 27
...1 ...2 ...3 ...4 ...5 ...6 ...7 ...8 ...9 ...10 ...11 ...12 ...13
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 1 0 1 1 1 1 1 1 0 0 1 0
2 1 0 0 0 1 0 0 0 0 1 1 0 0
3 1 0 0 1 0 0 0 1 0 1 0 0 0
4 1 0 0 0 0 0 0 0 0 0 0 1 0
5 1 1 0 1 0 1 1 1 1 0 1 1 1
6 1 0 0 0 1 0 0 0 1 0 1 1 1
7 1 0 1 1 0 0 0 0 1 0 0 0 1
8 1 0 1 1 1 0 1 0 1 1 1 0 1
9 1 0 0 0 0 1 1 0 0 0 1 0 1
10 1 1 1 1 1 0 1 1 0 0 1 1 1
# ℹ 17 more rows
# ℹ 14 more variables: ...14 <dbl>, ...15 <dbl>, ...16 <dbl>, ...17 <dbl>,
# ...18 <dbl>, ...19 <dbl>, ...20 <dbl>, ...21 <dbl>, ...22 <dbl>,
# ...23 <dbl>, ...24 <dbl>, ...25 <dbl>, ...26 <dbl>, ...27 <dbl>
Assigning names
rownames(student_friends) <- 1:27
colnames(student_friends) <- 1:27Assigning attributes
student_attributes <- read_excel("data/student-attributes.xlsx")
student_attributes# A tibble: 27 × 5
id gender achievement gender_num achievement_num
<dbl> <chr> <chr> <dbl> <dbl>
1 1 female high 1 1
2 2 male average 0 2
3 3 female average 1 2
4 4 male high 0 1
5 5 female average 1 2
6 6 female average 1 2
7 7 female high 1 1
8 8 female average 1 2
9 9 female high 1 1
10 10 male low 0 3
# ℹ 17 more rows
Importing teacher data
teacher_friends <- read_excel("data/teacher-reported-friends.xlsx",
col_names = FALSE)
rownames(teacher_friends) <- 1:27
colnames(teacher_friends) <- 1:27
teacher_friends# A tibble: 27 × 27
`1` `2` `3` `4` `5` `6` `7` `8` `9` `10` `11` `12` `13`
* <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 0 0 1 0 0 0 0 0 0 0 1 0
2 0 0 1 0 0 0 0 1 0 1 0 0 0
3 0 1 0 0 0 0 0 1 0 1 0 0 0
4 0 0 0 0 0 0 0 0 0 0 0 0 0
5 1 0 0 0 0 0 0 0 0 0 0 1 1
6 0 0 0 0 0 0 0 0 0 0 0 0 1
7 0 0 0 0 0 0 0 0 0 0 0 0 1
8 0 1 1 0 0 0 0 0 0 1 0 0 0
9 0 0 0 0 0 0 0 0 0 0 1 0 0
10 0 1 1 0 0 0 0 1 0 0 0 0 0
# ℹ 17 more rows
# ℹ 14 more variables: `14` <dbl>, `15` <dbl>, `16` <dbl>, `17` <dbl>,
# `18` <dbl>, `19` <dbl>, `20` <dbl>, `21` <dbl>, `22` <dbl>, `23` <dbl>,
# `24` <dbl>, `25` <dbl>, `26` <dbl>, `27` <dbl>
Double checking import
student_friends# A tibble: 27 × 27
`1` `2` `3` `4` `5` `6` `7` `8` `9` `10` `11` `12` `13`
* <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0 1 0 1 1 1 1 1 1 0 0 1 0
2 1 0 0 0 1 0 0 0 0 1 1 0 0
3 1 0 0 1 0 0 0 1 0 1 0 0 0
4 1 0 0 0 0 0 0 0 0 0 0 1 0
5 1 1 0 1 0 1 1 1 1 0 1 1 1
6 1 0 0 0 1 0 0 0 1 0 1 1 1
7 1 0 1 1 0 0 0 0 1 0 0 0 1
8 1 0 1 1 1 0 1 0 1 1 1 0 1
9 1 0 0 0 0 1 1 0 0 0 1 0 1
10 1 1 1 1 1 0 1 1 0 0 1 1 1
# ℹ 17 more rows
# ℹ 14 more variables: `14` <dbl>, `15` <dbl>, `16` <dbl>, `17` <dbl>,
# `18` <dbl>, `19` <dbl>, `20` <dbl>, `21` <dbl>, `22` <dbl>, `23` <dbl>,
# `24` <dbl>, `25` <dbl>, `26` <dbl>, `27` <dbl>
Matrix conversion
student_matrix <- as.matrix(student_friends)Network conversion
student_network <- as_tbl_graph(student_matrix,
directed = TRUE)Extracting edge list via pipe function
student_edges <- student_network |>
activate(edges) |>
as_tibble()
student_edges# A tibble: 203 × 3
from to weight
<int> <int> <dbl>
1 1 2 1
2 1 4 1
3 1 5 1
4 1 6 1
5 1 7 1
6 1 8 1
7 1 9 1
8 1 12 1
9 1 17 1
10 1 21 1
# ℹ 193 more rows
Saving edges
write_csv(student_edges, "data/student-edges.csv")Edge list creation
student_network <- tbl_graph(edges = student_edges, # specifies edges
nodes = student_attributes, # specifies nodes
directed = TRUE) # specifies directionality
student_network# A tbl_graph: 27 nodes and 203 edges
#
# A directed simple graph with 2 components
#
# Node Data: 27 × 5 (active)
id gender achievement gender_num achievement_num
<dbl> <chr> <chr> <dbl> <dbl>
1 1 female high 1 1
2 2 male average 0 2
3 3 female average 1 2
4 4 male high 0 1
5 5 female average 1 2
6 6 female average 1 2
7 7 female high 1 1
8 8 female average 1 2
9 9 female high 1 1
10 10 male low 0 3
# ℹ 17 more rows
#
# Edge Data: 203 × 3
from to weight
<int> <int> <dbl>
1 1 2 1
2 1 4 1
3 1 5 1
# ℹ 200 more rows
Teacher network creation
teacher_matrix <- as.matrix(teacher_friends)
teacher_network <- as_tbl_graph(teacher_matrix,
directed = TRUE)
teacher_edges <- teacher_network |>
activate(edges) |>
as_tibble()
teacher_network <- tbl_graph(edges = teacher_edges,
nodes = student_attributes,
directed = TRUE)
teacher_network# A tbl_graph: 27 nodes and 69 edges
#
# A directed simple graph with 6 components
#
# Node Data: 27 × 5 (active)
id gender achievement gender_num achievement_num
<dbl> <chr> <chr> <dbl> <dbl>
1 1 female high 1 1
2 2 male average 0 2
3 3 female average 1 2
4 4 male high 0 1
5 5 female average 1 2
6 6 female average 1 2
7 7 female high 1 1
8 8 female average 1 2
9 9 female high 1 1
10 10 male low 0 3
# ℹ 17 more rows
#
# Edge Data: 69 × 3
from to weight
<int> <int> <dbl>
1 1 4 1
2 1 12 1
3 1 27 1
# ℹ 66 more rows
Trial run from GPT on spreading the teacher data out based on code from the case study
set.seed(42)
ggraph(teacher_network, layout = "kk") +
geom_edge_link(
arrow = arrow(length = unit(1.5, "mm")),
end_cap = circle(2, "mm"),
start_cap = circle(2, "mm"),
alpha = 0.08,
width = 0.25,
colour = "grey65"
) +
geom_node_point(aes(size = local_size(), color = gender), alpha = 0.95) +
geom_node_text(aes(label = id), repel = TRUE, size = 3) +
scale_size_continuous(range = c(2, 7), name = "Local size") +
scale_color_manual(
name = "Gender",
breaks = c("male", "female"),
values = c(male = "royalblue", female = "pink")
) +
theme_graph() +
theme(legend.position = "bottom")Darkening the arrows by fixing alpha and width
set.seed(42)
ggraph(teacher_network, layout = "kk") +
geom_edge_link(
arrow = arrow(length = unit(1.5, "mm")),
end_cap = circle(2, "mm"),
start_cap = circle(2, "mm"),
alpha = 0.25,
width = 0.5,
colour = "grey35"
) +
geom_node_point(aes(size = local_size(), color = gender), alpha = 0.95) +
geom_node_text(aes(label = id), repel = TRUE, size = 3) +
scale_size_continuous(range = c(2, 7), name = "Local size") +
scale_color_manual(
name = "Gender",
breaks = c("male", "female"),
values = c(male = "royalblue", female = "pink")
) +
theme_graph() +
theme(legend.position = "top")Not bad. Now I want to isolate achievement colors based on gender. GPT suggests using “case”. Let’s see what happens. The case_when code was directly imported from GPT. By only coloring high and low achieving girls, I can make the crowded scatterplot feel easier to read.
set.seed(42)
teacher_plot <- ggraph(teacher_network, layout = "stress") +
geom_edge_link(
arrow = arrow(length = unit(1.5, "mm")),
end_cap = circle(2, "mm"),
start_cap = circle(2, "mm"),
alpha = 0.25,
width = 0.5,
colour = "grey35"
) +
geom_node_point(
aes(
size = local_size(),
color = dplyr::case_when(
gender == "male" ~ "boys",
gender == "female" & achievement == "low" ~ "girls_low",
gender == "female" & achievement == "average" ~ "girls_average",
gender == "female" & achievement == "high" ~ "girls_high",
TRUE ~ "other"
)
),
alpha = 0.95
) +
geom_node_text(aes(label = id), repel = TRUE, size = 3) +
scale_size_continuous(range = c(2, 7), name = "Local size") +
scale_color_manual(
name = "Group",
values = c(
boys = "grey70",
girls_low = "red",
girls_average = "grey70",
girls_high = "green",
other = "grey70"
),
breaks = c("girls_low", "girls_average", "girls_high", "boys")
) +
theme_graph() +
theme(legend.position = "top")Inspecting after naming
teacher_plotI am a fan of this sociogram. I believe this is the one I will stick with. I learned a ton during this lab. For example: set.seed (42) helped tremendously on spacing.
Now, to run the same graph for my student network.
set.seed(42)
student_plot <- ggraph(student_network, layout = "stress") +
geom_edge_link(
arrow = arrow(length = unit(1.5, "mm")),
end_cap = circle(2, "mm"),
start_cap = circle(2, "mm"),
alpha = 0.25,
width = 0.5,
colour = "grey35"
) +
geom_node_point(
aes(
size = local_size(),
color = dplyr::case_when(
gender == "male" ~ "boys",
gender == "female" & achievement == "low" ~ "girls_low",
gender == "female" & achievement == "average" ~ "girls_average",
gender == "female" & achievement == "high" ~ "girls_high",
TRUE ~ "other"
)
),
alpha = 0.95
) +
geom_node_text(aes(label = id), repel = TRUE, size = 3) +
scale_size_continuous(range = c(2, 7), name = "Local size") +
scale_color_manual(
name = "Group",
values = c(
boys = "grey70",
girls_low = "red",
girls_average = "grey70",
girls_high = "green",
other = "grey70"
),
breaks = c("girls_low", "girls_average", "girls_high", "boys")
) +
theme_graph() +
theme(legend.position = "top")Inspecting after naming
student_plotInteresting… this is the opposite of the teacher perceptions. The high achieving girls are more central and the lower achieving girls are more peripheral.
Realistically, the teacher sociogram is irrelevant. It would be like polling people on who they thought their family voted for and using that as exit poll data on election night. So, based on the student sociogram, for this specific class with this specific data set, I would conclude that higher-achieving girls have more interactions than lower achieving girls.
Saving plots
ggsave("TeacherPlot_Lab1NAC.jpeg", plot = teacher_plot, width = 8, height = 6, dpi = 300)ggsave("StudentPlot_Lab1NAC.jpeg", plot = student_plot, width = 8, height = 6, dpi = 300)