This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(tsibble)
##
## Attaching package: 'tsibble'
##
## The following object is masked from 'package:lubridate':
##
## interval
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
# Set your working directory and read the CSV file
setwd("/Users/saitejaravulapalli/Documents/IUPUI_SEM 01/Intro to Statistic in R/DATA SET")
data <- read.csv("student dropout.csv", sep = ";", header = TRUE)
# Assuming Age.at.enrollment is in years
data$Date_of_Birth <- as.Date(format(Sys.Date(), "%Y-%m-%d"), "%Y-%m-%d") - as.numeric(data$Age.at.enrollment) * 365.25
# Summarize the data by taking the mean of Unemployment.rate for each Date_of_Birth
data_summary <- data %>%
group_by(Date_of_Birth) %>%
summarise(Unemployment_rate_mean = mean(Unemployment.rate, na.rm = TRUE))
# Create a tsibble with the summarized data
data_ts <- data_summary %>%
as_tsibble(index = Date_of_Birth)
# Fill missing values in the time series
data_ts <- data_ts %>% fill(`Unemployment_rate_mean`)
# Fill implicit gaps in the time series data
data_ts <- data_ts %>% fill_gaps()
# Plot the unemployment rate over time
ggplot(data_ts, aes(x = Date_of_Birth, y = `Unemployment_rate_mean`)) +
geom_line() +
labs(title = "Unemployment Rate Over Time", x = "Date", y = "Unemployment Rate")