R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(tsibble)
## 
## Attaching package: 'tsibble'
## 
## The following object is masked from 'package:lubridate':
## 
##     interval
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
# Set your working directory and read the CSV file
setwd("/Users/saitejaravulapalli/Documents/IUPUI_SEM 01/Intro to Statistic in R/DATA SET")
data <- read.csv("student dropout.csv", sep = ";", header = TRUE)

# Assuming Age.at.enrollment is in years
data$Date_of_Birth <- as.Date(format(Sys.Date(), "%Y-%m-%d"), "%Y-%m-%d") - as.numeric(data$Age.at.enrollment) * 365.25

# Summarize the data by taking the mean of Unemployment.rate for each Date_of_Birth
data_summary <- data %>% 
  group_by(Date_of_Birth) %>%
  summarise(Unemployment_rate_mean = mean(Unemployment.rate, na.rm = TRUE))

# Create a tsibble with the summarized data
data_ts <- data_summary %>%
  as_tsibble(index = Date_of_Birth)

# Fill missing values in the time series
data_ts <- data_ts %>% fill(`Unemployment_rate_mean`)

# Fill implicit gaps in the time series data
data_ts <- data_ts %>% fill_gaps()

# Plot the unemployment rate over time
ggplot(data_ts, aes(x = Date_of_Birth, y = `Unemployment_rate_mean`)) +
  geom_line() +
  labs(title = "Unemployment Rate Over Time", x = "Date", y = "Unemployment Rate")