#title: “Job Placement” #author: “Maisha Subin” #format: html

  id            name gender age     degree                        stream
1  1        John Doe   Male  25 Bachelor's              Computer Science
2  2      Jane Smith Female  24 Bachelor's        Electrical Engineering
3  3 Michael Johnson   Male  26 Bachelor's        Mechanical Engineering
4  4     Emily Davis Female  23 Bachelor's        Information Technology
5  5     David Brown   Male  24 Bachelor's              Computer Science
6  6    Sarah Wilson Female  25 Bachelor's Electronics and Communication
                           college_name placement_status salary gpa
1                    Harvard University           Placed  60000 3.7
2 Massachusetts Institute of Technology           Placed  65000 3.6
3                   Stanford University           Placed  58000 3.8
4                       Yale University       Not Placed      0 3.5
5                  Princeton University           Placed  62000 3.9
6                   Columbia University           Placed  63000 3.7
  years_of_experience
1                   2
2                   1
3                   3
4                   2
5                   2
6                   1
# Box Plot for GPA Vs. Place/ Not Placed
ggplot(data, aes(x = placement_status, y = gpa, fill = placement_status)) +
  geom_boxplot() +
  labs(title = "GPA vs Placement Status",
       x = "Placement Status",
       y = "GPA") +
  theme_minimal()

# Sankey diagram for stream/major Vs. Placement
library(networkD3)
library(dplyr)

# create flow data
stream_flow <- data %>%
  count(stream, placement_status)

# create nodes (unique labels)
nodes <- data.frame(
  name = unique(c(stream_flow$stream, stream_flow$placement_status))
)

# create links
links <- stream_flow %>%
  mutate(
    source = match(stream, nodes$name) - 1,
    target = match(placement_status, nodes$name) - 1,
    value = n
  )

# sankey diagram
sankeyNetwork(
  Links = links,
  Nodes = nodes,
  Source = "source",
  Target = "target",
  Value = "value",
  NodeID = "name",
  fontSize = 12,
  nodeWidth = 30
)
library(tidyverse)
library(ggplot2)
library(networkD3)

# Sankey diagram of gender Vs. placement
gender_flow <- data %>%
  count(gender, placement_status)

# create nodes
nodes2 <- data.frame(
  name = unique(c(gender_flow$gender, gender_flow$placement_status))
)

# create links
links2 <- gender_flow %>%
  mutate(
    source = match(gender, nodes2$name) - 1,
    target = match(placement_status, nodes2$name) - 1,
    value = n
  )

# sankey diagram
sankeyNetwork(
  Links = links2,
  Nodes = nodes2,
  Source = "source",
  Target = "target",
  Value = "value",
  NodeID = "name",
  fontSize = 12,
  nodeWidth = 30
)