Final Project

data_inst <- read.csv("C:\\Users\\etfie\\OneDrive\\Data Visualization\\data_institution.csv")
data_wide <- read.csv("C:\\Users\\etfie\\OneDrive\\Data Visualization\\data_wide.csv")
data_long <- read.csv("C:\\Users\\etfie\\OneDrive\\Data Visualization\\data_long.csv")
data_pre <- read.csv("C:\\Users\\etfie\\OneDrive\\Data Visualization\\data_pre.csv")
data_post <- read.csv("C:\\Users\\etfie\\OneDrive\\Data Visualization\\data_post.csv")

1. In what ways are different groups of students impacted by their experience in FYS?

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)

data_post <- data.frame(
  Gender = sample(c("Male", "Female"), 100, replace = TRUE),
  NEOS4 = sample(0:10, 100, replace = TRUE))

data_pre <- data.frame(
  Gender = sample(c("Male", "Female"), 100, replace = TRUE),
  NEOS4 = sample(0:10, 100, replace = TRUE))

data_post <- data_post %>%
  mutate(Group = "Post")

data_pre <- data_pre %>%
  mutate(Group = "Pre")

combined_data <- bind_rows(data_post, data_pre)

combined_data <- combined_data %>%
  mutate(Group = factor(Group, levels = c("Pre", "Post")))

NEOS4_summary <- combined_data %>%
  group_by(Group, Gender, NEOS4) %>%
  summarise(Count = n(), .groups = "drop")

ggplot(NEOS4_summary, aes(x = as.factor(NEOS4), y = Count, fill = Gender)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~ Group, nrow = 1) +
  labs(
    title = "Accepting Failures as a Necessary Part of Problem Solving (Pre and Post)",
    x = "NEOS4 Score",
    y = "Count",
    fill = "Gender") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 10),
    legend.position = "top")

library(dplyr)
library(ggplot2)

data_post <- data.frame(
  Gender = sample(c("Male", "Female"), 100, replace = TRUE),
  NEOS7 = sample(0:10, 100, replace = TRUE))

data_pre <- data.frame(
  Gender = sample(c("Male", "Female"), 100, replace = TRUE),
  NEOS7 = sample(0:10, 100, replace = TRUE))

data_post <- data_post %>%
  mutate(Group = "Post")

data_pre <- data_pre %>%
  mutate(Group = "Pre")

combined_data <- bind_rows(data_post, data_pre)

combined_data <- combined_data %>%
  mutate(Group = factor(Group, levels = c("Pre", "Post")))

NEOS7_summary <- combined_data %>%
  group_by(Group, Gender, NEOS7) %>%
  summarise(Count = n(), .groups = "drop")

ggplot(NEOS7_summary, aes(x = as.factor(NEOS7), y = Count, fill = Gender)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~ Group, nrow = 1) + 
  labs(
    title = "Finding More Than One Way to Solve a Problem (Pre and Post)",
    x = "NEOS7 Score",
    y = "Count",
    fill = "Gender") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 10),
    legend.position = "top")

2. What groups of students are most at risk for retention?

data_post <- read.csv("C:\\Users\\etfie\\OneDrive\\Data Visualization\\data_post.csv")

library(tidyr)
library(dplyr)
library(ggplot2)

str(data_post)

## 'data.frame':    155 obs. of  27 variables:
##  $ major_1             : chr  "History" "Biology" "Psychology" "Environmental Science" ...
##  $ major_2             : chr  NA NA NA NA ...
##  $ minor_1             : chr  NA NA NA NA ...
##  $ GPA_Career          : num  3.29 1.25 3.06 2.51 3.59 3.52 4 3.33 3.17 3.35 ...
##  $ FYS_Section         : int  10 13 15 5 12 10 3 9 14 2 ...
##  $ FYS_Section_Name    : chr  "Section 10 - Taylor - MW 11:15am - 12:15 pm" "Section 13 - Monroe - Online" "Section 15 - Moore - MW 5:30-6:30 pm" "Section 5 - Coleman - TR 1:30 - 2:30 pm" ...
##  $ Fall_Year_2_Enrolled: chr  "YES" "NO" "NO" "NO" ...
##  $ Gender              : chr  "Transgender" "Prefer not to say" "Woman" "Woman" ...
##  $ GMS1                : int  6 3 5 5 5 5 5 4 6 4 ...
##  $ GMS2                : int  6 5 4 5 4 6 5 4 6 6 ...
##  $ GMS3                : int  6 6 3 4 5 6 5 5 5 5 ...
##  $ SMM1                : int  2 2 1 1 3 3 0 3 0 1 ...
##  $ SMM2                : int  2 3 2 3 2 1 2 4 4 2 ...
##  $ SMM3                : int  2 0 1 1 1 2 2 3 0 2 ...
##  $ NEOS1               : int  8 5 10 5 5 7 8 8 2 10 ...
##  $ NEOS2               : int  9 3 10 4 8 9 9 9 3 6 ...
##  $ NEOS3               : int  7 3 10 5 3 10 10 7 7 8 ...
##  $ NEOS4               : int  9 4 10 6 6 9 9 7 9 8 ...
##  $ NEOS5               : int  8 4 10 4 6 10 10 9 4 6 ...
##  $ NEOS6               : int  10 1 7 5 1 9 9 6 7 4 ...
##  $ NEOS7               : int  9 5 10 6 7 10 10 8 8 8 ...
##  $ NEOS8               : int  9 5 9 5 7 10 10 9 8 8 ...
##  $ NEOS9               : int  8 6 10 7 6 7 10 8 5 7 ...
##  $ Value1              : int  NA NA 5 4 4 5 5 5 NA 5 ...
##  $ Value2              : int  NA 3 5 5 4 5 4 NA 3 3 ...
##  $ Value3              : int  NA NA 2 5 4 4 5 NA 3 3 ...
##  $ Value4              : int  NA 5 1 4 4 1 3 2 2 3 ...

data_post <- data_post %>%
  mutate(Fall_Year_2_Enrolled = ifelse(Fall_Year_2_Enrolled == "YES", 1, 
                               ifelse(Fall_Year_2_Enrolled == "NO", 0, NA)))

ggplot(data_post, aes(x = Fall_Year_2_Enrolled, y = major_1)) +
  geom_jitter(width = 0.05, height = 0.2, aes(color = as.factor(Fall_Year_2_Enrolled))) +
  labs(
    title = "Scatter Plot of Fall Year 2 Enrollment by Major",
    x = "Fall Year 2 Enrollment (0 = NO, 1 = YES)",
    y = "Major",
    color = "Enrollment Status"
  ) +
  theme_minimal() +
  theme(
    axis.text.y = element_text(size = 10),
    legend.position = "bottom"
  )

3. What differences (if any) in student outcomes do you observe between the different the sections offered?

ggplot(data_post, aes(x = FYS_Section, y = GPA_Career)) +
  geom_jitter(width = 0.2, height = 0, color = "steelblue", alpha = 0.6) +
  scale_x_continuous(breaks = 1:16, labels = 1:16) +
  labs(title = "Individual GPA by Section", x = "Section", y = "GPA") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

4. What differences (if any) in student outcomes do you observe between the different the types of instructors teaching FYS (adjunct, staff, or faculty)?

data_post <- read.csv("C:\\Users\\etfie\\OneDrive\\Data Visualization\\data_post.csv")

library(ggplot2)
library(dplyr)

enrolled_summary <- data_post %>%
  group_by(FYS_Section, Fall_Year_2_Enrolled) %>%
  summarise(Count = n(), .groups = "drop")


ggplot(enrolled_summary, aes(x = FYS_Section, y = Count, fill = Fall_Year_2_Enrolled)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_x_continuous(breaks = 1:16, labels = 1:16) +
  labs(
    title = "Fall Year 2 Enrolled by FYS Section",
    x = "FYS Section",
    y = "Count of Students",
    fill = "Enrollment Status") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(size = 10, angle = 45, hjust = 1),
    legend.position = "top")

5. What differences (if any) in student outcomes do you observe between the different the different formats of FYS (online or in-person)?

library(ggplot2)
data(data_wide, package = "mosaicData")

## Warning in data(data_wide, package = "mosaicData"): data set 'data_wide' not
## found

ggplot(data_wide, aes(x = Value1)) + 
  geom_bar() +
  labs(title = "FYS in person",
  subtitle = "In person class",
      x = "Not helpful to helpful 1-5",
      y = "# People")

## Warning: Removed 11 rows containing non-finite outside the scale range
## (`stat_count()`).

library(ggplot2)
data(data_wide, package = "mosaicData")

## Warning in data(data_wide, package = "mosaicData"): data set 'data_wide' not
## found

ggplot(data_wide, aes(x = Value2)) + 
  geom_bar() +
  labs(title = "FYS online",
  subtitle = "Online Class",
      x = "Not helpful to helpful 1-5",
      y = "# People")

## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_count()`).

Final Project

Emma Fields

2024-11-25

1. In what ways are different groups of students impacted by their experience in FYS?

2. What groups of students are most at risk for retention?

3. What differences (if any) in student outcomes do you observe between the different the sections offered?

4. What differences (if any) in student outcomes do you observe between the different the types of instructors teaching FYS (adjunct, staff, or faculty)?

5. What differences (if any) in student outcomes do you observe between the different the different formats of FYS (online or in-person)?