la1

Author

deekshitha and harshitha

Student Attendance Analysis

This program analyzes student attendance data and visualizes patterns using a heatmap across different weeks and schools.


Load Libraries

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.2.0     ✔ readr     2.1.6
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.2     ✔ tibble    3.3.1
✔ lubridate 1.9.5     ✔ tidyr     1.3.2
✔ purrr     1.2.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
# ❗ IMPORTANT: Keep CSV in same folder as .qmd file
attendance <- read.csv("2018-2019_Daily_Attendance_20240429.csv")
names(attendance)
[1] "School.DBN" "Date"       "Enrolled"   "Absent"     "Present"   
[6] "Released"  
# Try this first
attendance$Date <- as.Date(attendance$Date, format="%Y-%m-%d")
attendance$week <- week(attendance$Date)
weekly_data <- attendance %>% 
  group_by(School.DBN, week) %>% 
  summarise(avg_present = mean(Present, na.rm = TRUE), .groups = "drop")
head(attendance)
  School.DBN        Date Enrolled Absent Present Released week
1     01M015 57223-06-11      172     19     153        0   24
2     01M015 57223-06-12      171     17     154        0   24
3     01M015 57223-06-13      172     14     158        0   24
4     01M015 57223-06-18      173      7     166        0   25
5     01M015 57223-06-19      173      9     164        0   25
6     01M015 57223-06-20      173     11     162        0   25
str(attendance)
'data.frame':   277153 obs. of  7 variables:
 $ School.DBN: chr  "01M015" "01M015" "01M015" "01M015" ...
 $ Date      : Date, format: "57223-06-11" "57223-06-12" ...
 $ Enrolled  : int  172 171 172 173 173 173 173 174 174 174 ...
 $ Absent    : int  19 17 14 7 9 11 10 7 7 8 ...
 $ Present   : int  153 154 158 166 164 162 163 167 167 166 ...
 $ Released  : int  0 0 0 0 0 0 0 0 0 0 ...
 $ week      : num  24 24 24 25 25 25 25 25 26 26 ...
summary(attendance)
  School.DBN             Date                Enrolled        Absent      
 Length:277153      Min.   :57223-06-10   Min.   :   1   Min.   :   0.0  
 Class :character   1st Qu.:57224-01-05   1st Qu.: 329   1st Qu.:  23.0  
 Mode  :character   Median :57248-09-11   Median : 476   Median :  38.0  
                    Mean   :57239-03-03   Mean   : 597   Mean   :  50.5  
                    3rd Qu.:57249-06-19   3rd Qu.: 684   3rd Qu.:  59.0  
                    Max.   :57250-01-21   Max.   :5955   Max.   :2151.0  
    Present          Released             week      
 Min.   :   1.0   Min.   :   0.000   Min.   : 1.00  
 1st Qu.: 291.0   1st Qu.:   0.000   1st Qu.:13.00  
 Median : 430.0   Median :   0.000   Median :27.00  
 Mean   : 544.5   Mean   :   1.983   Mean   :27.22  
 3rd Qu.: 640.0   3rd Qu.:   0.000   3rd Qu.:40.00  
 Max.   :5847.0   Max.   :5904.000   Max.   :53.00  
ggplot(weekly_data, aes(x = factor(week), y = School.DBN, fill = avg_present)) +
  geom_tile() +
  scale_fill_gradient(low = "yellow", high = "red") +
  labs(title = "Student Attendance Heatmap",
       x = "Weeks",
       y = "Schools",
       fill = "Avg Present") +
  theme_minimal() +
  theme(axis.text.y = element_blank())

ggplot(attendance, aes(x = Date, y = Present)) +
  geom_line(color = "blue") +
  labs(title = "Attendance Trend Over Time",
       x = "Date",
       y = "Present")

ggplot(weekly_data, aes(x = factor(week), y = avg_present)) +
  geom_col(fill = "orange") +
  labs(title = "Average Weekly Attendance",
       x = "Week",
       y = "Average Present")