data_inst <- read.csv("C:\\Users\\etfie\\OneDrive\\Data Visualization\\data_institution.csv")
data_wide <- read.csv("C:\\Users\\etfie\\OneDrive\\Data Visualization\\data_wide.csv")
data_long <- read.csv("C:\\Users\\etfie\\OneDrive\\Data Visualization\\data_long.csv")
data_pre <- read.csv("C:\\Users\\etfie\\OneDrive\\Data Visualization\\data_pre.csv")
data_post <- read.csv("C:\\Users\\etfie\\OneDrive\\Data Visualization\\data_post.csv")

1. In what ways are different groups of students impacted by their experience in FYS?

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

data_post <- data.frame(
  Gender = sample(c("Male", "Female"), 100, replace = TRUE),
  NEOS4 = sample(0:10, 100, replace = TRUE))

data_pre <- data.frame(
  Gender = sample(c("Male", "Female"), 100, replace = TRUE),
  NEOS4 = sample(0:10, 100, replace = TRUE))

data_post <- data_post %>%
  mutate(Group = "Post")

data_pre <- data_pre %>%
  mutate(Group = "Pre")

combined_data <- bind_rows(data_post, data_pre)

combined_data <- combined_data %>%
  mutate(Group = factor(Group, levels = c("Pre", "Post")))

NEOS4_summary <- combined_data %>%
  group_by(Group, Gender, NEOS4) %>%
  summarise(Count = n(), .groups = "drop")

ggplot(NEOS4_summary, aes(x = as.factor(NEOS4), y = Count, fill = Gender)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~ Group, nrow = 1) +
  labs(
    title = "Accepting Failures as a Necessary Part of Problem Solving (Pre and Post)",
    x = "NEOS4 Score",
    y = "Count",
    fill = "Gender") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 10),
    legend.position = "top")

library(dplyr)
library(ggplot2)

data_post <- data.frame(
  Gender = sample(c("Male", "Female"), 100, replace = TRUE),
  NEOS7 = sample(0:10, 100, replace = TRUE))

data_pre <- data.frame(
  Gender = sample(c("Male", "Female"), 100, replace = TRUE),
  NEOS7 = sample(0:10, 100, replace = TRUE))

data_post <- data_post %>%
  mutate(Group = "Post")

data_pre <- data_pre %>%
  mutate(Group = "Pre")

combined_data <- bind_rows(data_post, data_pre)

combined_data <- combined_data %>%
  mutate(Group = factor(Group, levels = c("Pre", "Post")))

NEOS7_summary <- combined_data %>%
  group_by(Group, Gender, NEOS7) %>%
  summarise(Count = n(), .groups = "drop")

ggplot(NEOS7_summary, aes(x = as.factor(NEOS7), y = Count, fill = Gender)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~ Group, nrow = 1) + 
  labs(
    title = "Finding More Than One Way to Solve a Problem (Pre and Post)",
    x = "NEOS7 Score",
    y = "Count",
    fill = "Gender") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 10),
    legend.position = "top")

2. What groups of students are most at risk for retention?

data_post <- read.csv("C:\\Users\\etfie\\OneDrive\\Data Visualization\\data_post.csv")

library(tidyr)
library(dplyr)
library(ggplot2)

str(data_post)
## 'data.frame':    155 obs. of  27 variables:
##  $ major_1             : chr  "History" "Biology" "Psychology" "Environmental Science" ...
##  $ major_2             : chr  NA NA NA NA ...
##  $ minor_1             : chr  NA NA NA NA ...
##  $ GPA_Career          : num  3.29 1.25 3.06 2.51 3.59 3.52 4 3.33 3.17 3.35 ...
##  $ FYS_Section         : int  10 13 15 5 12 10 3 9 14 2 ...
##  $ FYS_Section_Name    : chr  "Section 10 - Taylor - MW 11:15am - 12:15 pm" "Section 13 - Monroe - Online" "Section 15 - Moore - MW 5:30-6:30 pm" "Section 5 - Coleman - TR 1:30 - 2:30 pm" ...
##  $ Fall_Year_2_Enrolled: chr  "YES" "NO" "NO" "NO" ...
##  $ Gender              : chr  "Transgender" "Prefer not to say" "Woman" "Woman" ...
##  $ GMS1                : int  6 3 5 5 5 5 5 4 6 4 ...
##  $ GMS2                : int  6 5 4 5 4 6 5 4 6 6 ...
##  $ GMS3                : int  6 6 3 4 5 6 5 5 5 5 ...
##  $ SMM1                : int  2 2 1 1 3 3 0 3 0 1 ...
##  $ SMM2                : int  2 3 2 3 2 1 2 4 4 2 ...
##  $ SMM3                : int  2 0 1 1 1 2 2 3 0 2 ...
##  $ NEOS1               : int  8 5 10 5 5 7 8 8 2 10 ...
##  $ NEOS2               : int  9 3 10 4 8 9 9 9 3 6 ...
##  $ NEOS3               : int  7 3 10 5 3 10 10 7 7 8 ...
##  $ NEOS4               : int  9 4 10 6 6 9 9 7 9 8 ...
##  $ NEOS5               : int  8 4 10 4 6 10 10 9 4 6 ...
##  $ NEOS6               : int  10 1 7 5 1 9 9 6 7 4 ...
##  $ NEOS7               : int  9 5 10 6 7 10 10 8 8 8 ...
##  $ NEOS8               : int  9 5 9 5 7 10 10 9 8 8 ...
##  $ NEOS9               : int  8 6 10 7 6 7 10 8 5 7 ...
##  $ Value1              : int  NA NA 5 4 4 5 5 5 NA 5 ...
##  $ Value2              : int  NA 3 5 5 4 5 4 NA 3 3 ...
##  $ Value3              : int  NA NA 2 5 4 4 5 NA 3 3 ...
##  $ Value4              : int  NA 5 1 4 4 1 3 2 2 3 ...
data_post <- data_post %>%
  mutate(Fall_Year_2_Enrolled = ifelse(Fall_Year_2_Enrolled == "YES", 1, 
                               ifelse(Fall_Year_2_Enrolled == "NO", 0, NA)))

ggplot(data_post, aes(x = Fall_Year_2_Enrolled, y = major_1)) +
  geom_jitter(width = 0.05, height = 0.2, aes(color = as.factor(Fall_Year_2_Enrolled))) +
  labs(
    title = "Scatter Plot of Fall Year 2 Enrollment by Major",
    x = "Fall Year 2 Enrollment (0 = NO, 1 = YES)",
    y = "Major",
    color = "Enrollment Status"
  ) +
  theme_minimal() +
  theme(
    axis.text.y = element_text(size = 10),
    legend.position = "bottom"
  )

3. What differences (if any) in student outcomes do you observe between the different the sections offered?

ggplot(data_post, aes(x = FYS_Section, y = GPA_Career)) +
  geom_jitter(width = 0.2, height = 0, color = "steelblue", alpha = 0.6) +
  scale_x_continuous(breaks = 1:16, labels = 1:16) +
  labs(title = "Individual GPA by Section", x = "Section", y = "GPA") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

4. What differences (if any) in student outcomes do you observe between the different the types of instructors teaching FYS (adjunct, staff, or faculty)?

data_post <- read.csv("C:\\Users\\etfie\\OneDrive\\Data Visualization\\data_post.csv")

library(ggplot2)
library(dplyr)

enrolled_summary <- data_post %>%
  group_by(FYS_Section, Fall_Year_2_Enrolled) %>%
  summarise(Count = n(), .groups = "drop")


ggplot(enrolled_summary, aes(x = FYS_Section, y = Count, fill = Fall_Year_2_Enrolled)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_x_continuous(breaks = 1:16, labels = 1:16) +
  labs(
    title = "Fall Year 2 Enrolled by FYS Section",
    x = "FYS Section",
    y = "Count of Students",
    fill = "Enrollment Status") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 10, angle = 45, hjust = 1),
    legend.position = "top")

5. What differences (if any) in student outcomes do you observe between the different the different formats of FYS (online or in-person)?

library(ggplot2)
data(data_wide, package = "mosaicData")
## Warning in data(data_wide, package = "mosaicData"): data set 'data_wide' not
## found
ggplot(data_wide, aes(x = Value1)) + 
  geom_bar() +
  labs(title = "FYS in person",
  subtitle = "In person class",
      x = "Not helpful to helpful 1-5",
      y = "# People")
## Warning: Removed 11 rows containing non-finite outside the scale range
## (`stat_count()`).

library(ggplot2)
data(data_wide, package = "mosaicData")
## Warning in data(data_wide, package = "mosaicData"): data set 'data_wide' not
## found
ggplot(data_wide, aes(x = Value2)) + 
  geom_bar() +
  labs(title = "FYS online",
  subtitle = "Online Class",
      x = "Not helpful to helpful 1-5",
      y = "# People")
## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_count()`).