Introduction

This project analyzes the impact of COVID-19 on student education, focusing on internet access, dropout rates, and financial conditions. The goal is to identify key factors contributing to educational disruption and predict dropout risk.

Load Libraries and Data

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
library(moments)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.5.3
data <- read.csv("C:/Users/PANASHE/OneDrive/open_one_time_covid_education_impact.csv")

Data Exploration

str(data)
## 'data.frame':    4436 obs. of  27 variables:
##  $ submission_id                                                         : num  4.57e+15 6.44e+15 5.00e+15 5.52e+15 5.03e+15 ...
##  $ submission_date                                                       : chr  "2021-03-17" "2021-03-29" "2021-03-18" "2021-03-24" ...
##  $ gender                                                                : chr  "Female" "Male" "Female" "Male" ...
##  $ age                                                                   : chr  "Over 45 years old" "26 to 35 years old" "26 to 35 years old" "36 to 45 years old" ...
##  $ geography                                                             : chr  "Suburban/Peri-urban" "Suburban/Peri-urban" "City center or metropolitan area" "Suburban/Peri-urban" ...
##  $ financial_situation                                                   : chr  "I can afford food and regular expenses, but nothing else" "I cannot afford enough food for my family" "I can comfortably afford food, clothes, and furniture, and I have savings" "I can afford food, but nothing else" ...
##  $ education                                                             : chr  "University or college degree completed" "University or college degree completed" "University or college degree completed" "University or college degree completed" ...
##  $ employment_status                                                     : chr  "I am unemployed" "I am unemployed" "I work full-time, either as an employee or self-employed" "I work full-time, either as an employee or self-employed" ...
##  $ submission_state                                                      : chr  "Miranda" "Miranda" "Miranda" "Miranda" ...
##  $ are_there_children_0_to_2_yrs_out_of_educational_system               : int  0 0 1 0 0 0 0 0 0 1 ...
##  $ were_children_3_to_17_yrs_enrolled_and_did_not_return_to_school       : int  1 1 1 0 1 0 1 0 0 1 ...
##  $ are_there_children_who_stopped_enrolling_in_primary_education         : int  1 0 1 0 0 1 0 0 0 0 ...
##  $ are_there_children_who_stopped_enrolling_in_secondary_education       : int  0 0 1 0 0 1 0 0 0 0 ...
##  $ are_children_attending_face_to_face_classes                           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ can_children_observe_deterioration_of_basic_services_of_school        : int  1 1 1 1 1 0 1 1 1 1 ...
##  $ do_children_3_and_17_yrs_receive_regular_school_meals                 : chr  "Every day" "No" "No" "No" ...
##  $ are_there_teachers_at_scheduled_class_hours                           : chr  "Irregularly" "Irregularly" "There are not enough" "There are enough" ...
##  $ are_children_3_to_17_yrs_dealing_with_irregular_school_activity       : int  0 1 1 1 1 0 1 1 0 0 ...
##  $ are_children_being_teached_by_unqualified_people                      : int  0 0 1 1 0 1 0 0 1 0 ...
##  $ did_teachers_leave_the_educational_system                             : int  0 1 1 1 1 1 0 1 1 0 ...
##  $ do_school_and_the_teachers_have_internet_connection                   : int  1 0 0 0 0 1 1 0 1 1 ...
##  $ do_children_have_internet_connection                                  : int  1 1 1 1 1 0 1 0 0 1 ...
##  $ do_children_3_to_17_yrs_miss_virtual_class_due_to_lack_of_electricity : int  0 1 0 0 1 0 0 1 1 0 ...
##  $ does_home_shows_severe_deficit_of_electricity                         : int  0 0 1 0 0 0 0 0 0 1 ...
##  $ does_home_shows_severe_deficit_of_internet                            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ do_children_3_to_17_yrs_miss_class_or_in_lower_grade                  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ are_children_promoted_with_a_modality_different_from_formal_evaluation: int  0 0 1 0 1 1 0 0 1 0 ...
summary(data)
##  submission_id       submission_date       gender              age           
##  Min.   :4.504e+15   Length:4436        Length:4436        Length:4436       
##  1st Qu.:5.077e+15   Class :character   Class :character   Class :character  
##  Median :5.642e+15   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :5.633e+15                                                           
##  3rd Qu.:6.188e+15                                                           
##  Max.   :6.755e+15                                                           
##   geography         financial_situation  education         employment_status 
##  Length:4436        Length:4436         Length:4436        Length:4436       
##  Class :character   Class :character    Class :character   Class :character  
##  Mode  :character   Mode  :character    Mode  :character   Mode  :character  
##                                                                              
##                                                                              
##                                                                              
##  submission_state   are_there_children_0_to_2_yrs_out_of_educational_system
##  Length:4436        Min.   :0.0000                                         
##  Class :character   1st Qu.:0.0000                                         
##  Mode  :character   Median :0.0000                                         
##                     Mean   :0.2949                                         
##                     3rd Qu.:1.0000                                         
##                     Max.   :1.0000                                         
##  were_children_3_to_17_yrs_enrolled_and_did_not_return_to_school
##  Min.   :0.0000                                                 
##  1st Qu.:0.0000                                                 
##  Median :1.0000                                                 
##  Mean   :0.6132                                                 
##  3rd Qu.:1.0000                                                 
##  Max.   :1.0000                                                 
##  are_there_children_who_stopped_enrolling_in_primary_education
##  Min.   :0.0000                                               
##  1st Qu.:0.0000                                               
##  Median :0.0000                                               
##  Mean   :0.2065                                               
##  3rd Qu.:0.0000                                               
##  Max.   :1.0000                                               
##  are_there_children_who_stopped_enrolling_in_secondary_education
##  Min.   :0.0000                                                 
##  1st Qu.:0.0000                                                 
##  Median :0.0000                                                 
##  Mean   :0.1943                                                 
##  3rd Qu.:0.0000                                                 
##  Max.   :1.0000                                                 
##  are_children_attending_face_to_face_classes
##  Min.   :0.0000                             
##  1st Qu.:0.0000                             
##  Median :0.0000                             
##  Mean   :0.1637                             
##  3rd Qu.:0.0000                             
##  Max.   :1.0000                             
##  can_children_observe_deterioration_of_basic_services_of_school
##  Min.   :0.0000                                                
##  1st Qu.:1.0000                                                
##  Median :1.0000                                                
##  Mean   :0.8005                                                
##  3rd Qu.:1.0000                                                
##  Max.   :1.0000                                                
##  do_children_3_and_17_yrs_receive_regular_school_meals
##  Length:4436                                          
##  Class :character                                     
##  Mode  :character                                     
##                                                       
##                                                       
##                                                       
##  are_there_teachers_at_scheduled_class_hours
##  Length:4436                                
##  Class :character                           
##  Mode  :character                           
##                                             
##                                             
##                                             
##  are_children_3_to_17_yrs_dealing_with_irregular_school_activity
##  Min.   :0.0000                                                 
##  1st Qu.:0.0000                                                 
##  Median :1.0000                                                 
##  Mean   :0.6431                                                 
##  3rd Qu.:1.0000                                                 
##  Max.   :1.0000                                                 
##  are_children_being_teached_by_unqualified_people
##  Min.   :0.0000                                  
##  1st Qu.:0.0000                                  
##  Median :0.0000                                  
##  Mean   :0.3165                                  
##  3rd Qu.:1.0000                                  
##  Max.   :1.0000                                  
##  did_teachers_leave_the_educational_system
##  Min.   :0.0000                           
##  1st Qu.:0.0000                           
##  Median :1.0000                           
##  Mean   :0.6643                           
##  3rd Qu.:1.0000                           
##  Max.   :1.0000                           
##  do_school_and_the_teachers_have_internet_connection
##  Min.   :0.0000                                     
##  1st Qu.:0.0000                                     
##  Median :1.0000                                     
##  Mean   :0.5604                                     
##  3rd Qu.:1.0000                                     
##  Max.   :1.0000                                     
##  do_children_have_internet_connection
##  Min.   :0.0000                      
##  1st Qu.:0.0000                      
##  Median :1.0000                      
##  Mean   :0.6285                      
##  3rd Qu.:1.0000                      
##  Max.   :1.0000                      
##  do_children_3_to_17_yrs_miss_virtual_class_due_to_lack_of_electricity
##  Min.   :0.0000                                                       
##  1st Qu.:0.0000                                                       
##  Median :1.0000                                                       
##  Mean   :0.6655                                                       
##  3rd Qu.:1.0000                                                       
##  Max.   :1.0000                                                       
##  does_home_shows_severe_deficit_of_electricity
##  Min.   :0.0000                               
##  1st Qu.:0.0000                               
##  Median :0.0000                               
##  Mean   :0.2845                               
##  3rd Qu.:1.0000                               
##  Max.   :1.0000                               
##  does_home_shows_severe_deficit_of_internet
##  Min.   :0.0000                            
##  1st Qu.:0.0000                            
##  Median :1.0000                            
##  Mean   :0.5791                            
##  3rd Qu.:1.0000                            
##  Max.   :1.0000                            
##  do_children_3_to_17_yrs_miss_class_or_in_lower_grade
##  Min.   :0.0000                                      
##  1st Qu.:0.0000                                      
##  Median :0.0000                                      
##  Mean   :0.2464                                      
##  3rd Qu.:0.0000                                      
##  Max.   :1.0000                                      
##  are_children_promoted_with_a_modality_different_from_formal_evaluation
##  Min.   :0.0000                                                        
##  1st Qu.:0.0000                                                        
##  Median :0.0000                                                        
##  Mean   :0.4272                                                        
##  3rd Qu.:1.0000                                                        
##  Max.   :1.0000
colnames(data)
##  [1] "submission_id"                                                         
##  [2] "submission_date"                                                       
##  [3] "gender"                                                                
##  [4] "age"                                                                   
##  [5] "geography"                                                             
##  [6] "financial_situation"                                                   
##  [7] "education"                                                             
##  [8] "employment_status"                                                     
##  [9] "submission_state"                                                      
## [10] "are_there_children_0_to_2_yrs_out_of_educational_system"               
## [11] "were_children_3_to_17_yrs_enrolled_and_did_not_return_to_school"       
## [12] "are_there_children_who_stopped_enrolling_in_primary_education"         
## [13] "are_there_children_who_stopped_enrolling_in_secondary_education"       
## [14] "are_children_attending_face_to_face_classes"                           
## [15] "can_children_observe_deterioration_of_basic_services_of_school"        
## [16] "do_children_3_and_17_yrs_receive_regular_school_meals"                 
## [17] "are_there_teachers_at_scheduled_class_hours"                           
## [18] "are_children_3_to_17_yrs_dealing_with_irregular_school_activity"       
## [19] "are_children_being_teached_by_unqualified_people"                      
## [20] "did_teachers_leave_the_educational_system"                             
## [21] "do_school_and_the_teachers_have_internet_connection"                   
## [22] "do_children_have_internet_connection"                                  
## [23] "do_children_3_to_17_yrs_miss_virtual_class_due_to_lack_of_electricity" 
## [24] "does_home_shows_severe_deficit_of_electricity"                         
## [25] "does_home_shows_severe_deficit_of_internet"                            
## [26] "do_children_3_to_17_yrs_miss_class_or_in_lower_grade"                  
## [27] "are_children_promoted_with_a_modality_different_from_formal_evaluation"
dim(data)
## [1] 4436   27
sum(is.na(data))
## [1] 0
colSums(is.na(data))
##                                                          submission_id 
##                                                                      0 
##                                                        submission_date 
##                                                                      0 
##                                                                 gender 
##                                                                      0 
##                                                                    age 
##                                                                      0 
##                                                              geography 
##                                                                      0 
##                                                    financial_situation 
##                                                                      0 
##                                                              education 
##                                                                      0 
##                                                      employment_status 
##                                                                      0 
##                                                       submission_state 
##                                                                      0 
##                are_there_children_0_to_2_yrs_out_of_educational_system 
##                                                                      0 
##        were_children_3_to_17_yrs_enrolled_and_did_not_return_to_school 
##                                                                      0 
##          are_there_children_who_stopped_enrolling_in_primary_education 
##                                                                      0 
##        are_there_children_who_stopped_enrolling_in_secondary_education 
##                                                                      0 
##                            are_children_attending_face_to_face_classes 
##                                                                      0 
##         can_children_observe_deterioration_of_basic_services_of_school 
##                                                                      0 
##                  do_children_3_and_17_yrs_receive_regular_school_meals 
##                                                                      0 
##                            are_there_teachers_at_scheduled_class_hours 
##                                                                      0 
##        are_children_3_to_17_yrs_dealing_with_irregular_school_activity 
##                                                                      0 
##                       are_children_being_teached_by_unqualified_people 
##                                                                      0 
##                              did_teachers_leave_the_educational_system 
##                                                                      0 
##                    do_school_and_the_teachers_have_internet_connection 
##                                                                      0 
##                                   do_children_have_internet_connection 
##                                                                      0 
##  do_children_3_to_17_yrs_miss_virtual_class_due_to_lack_of_electricity 
##                                                                      0 
##                          does_home_shows_severe_deficit_of_electricity 
##                                                                      0 
##                             does_home_shows_severe_deficit_of_internet 
##                                                                      0 
##                   do_children_3_to_17_yrs_miss_class_or_in_lower_grade 
##                                                                      0 
## are_children_promoted_with_a_modality_different_from_formal_evaluation 
##                                                                      0

The dataset contains both categorical and numerical variables. Missing values are checked to ensure data reliability before analysis.

Data Preparation

data$gender <- as.factor(data$gender)
data$age <- as.factor(data$age)
data$geography <- as.factor(data$geography)
data$education <- as.factor(data$education)
data$employment_status <- as.factor(data$employment_status)

Categorical variables are converted into factors to ensure proper statistical analysis and modeling.

Descriptive Analysis

mean(data$do_children_have_internet_connection) * 100
## [1] 62.84941
table(data$do_children_3_to_17_yrs_miss_virtual_class_due_to_lack_of_electricity)
## 
##    0    1 
## 1484 2952
table(data$were_children_3_to_17_yrs_enrolled_and_did_not_return_to_school)
## 
##    0    1 
## 1716 2720
mean(data$were_children_3_to_17_yrs_enrolled_and_did_not_return_to_school) * 100
## [1] 61.3165
table(data$are_children_attending_face_to_face_classes)
## 
##    0    1 
## 3710  726
table(data$age, data$were_children_3_to_17_yrs_enrolled_and_did_not_return_to_school)
##                     
##                        0   1
##   16 to 25 years old 549 753
##   26 to 35 years old 464 840
##   36 to 45 years old 413 674
##   Not Available        1   2
##   Over 45 years old  289 450
##   Under 16             0   1
table(data$financial_situation)
## 
##                        I can afford food and regular expenses, but nothing else 
##                                                                            1060 
##                                             I can afford food, but nothing else 
##                                                                            1445 
##              I can afford food, regular expenses, and clothes, but nothing else 
##                                                                             244 
##       I can comfortably afford food, clothes, and furniture, and I have savings 
##                                                                             157 
## I can comfortably afford food, clothes, and furniture, but I don’t have savings 
##                                                                             127 
##                                       I cannot afford enough food for my family 
##                                                                            1163 
##                                                                   Not Available 
##                                                                               1 
##                                                            Prefer not to answer 
##                                                                             239

A significant percentage of students lack internet access, indicating a digital divide. Financial conditions vary widely and influence access to education and return rates.

Diagnostic Analysis

table(data$do_children_have_internet_connection,
      data$do_children_3_to_17_yrs_miss_virtual_class_due_to_lack_of_electricity)
##    
##        0    1
##   0  485 1163
##   1  999 1789
table(data$does_home_shows_severe_deficit_of_electricity,
      data$do_children_3_to_17_yrs_miss_virtual_class_due_to_lack_of_electricity)
##    
##        0    1
##   0 1270 1904
##   1  214 1048
table(data$financial_situation,
      data$were_children_3_to_17_yrs_enrolled_and_did_not_return_to_school)
##                                                                                  
##                                                                                     0
##   I can afford food and regular expenses, but nothing else                        442
##   I can afford food, but nothing else                                             530
##   I can afford food, regular expenses, and clothes, but nothing else               89
##   I can comfortably afford food, clothes, and furniture, and I have savings        56
##   I can comfortably afford food, clothes, and furniture, but I don’t have savings  54
##   I cannot afford enough food for my family                                       434
##   Not Available                                                                     1
##   Prefer not to answer                                                            110
##                                                                                  
##                                                                                     1
##   I can afford food and regular expenses, but nothing else                        618
##   I can afford food, but nothing else                                             915
##   I can afford food, regular expenses, and clothes, but nothing else              155
##   I can comfortably afford food, clothes, and furniture, and I have savings       101
##   I can comfortably afford food, clothes, and furniture, but I don’t have savings  73
##   I cannot afford enough food for my family                                       729
##   Not Available                                                                     0
##   Prefer not to answer                                                            129
table(data$does_home_shows_severe_deficit_of_internet,
      data$were_children_3_to_17_yrs_enrolled_and_did_not_return_to_school)
##    
##        0    1
##   0  828 1039
##   1  888 1681
table(data$geography,
      data$were_children_3_to_17_yrs_enrolled_and_did_not_return_to_school)
##                                   
##                                       0    1
##   City center or metropolitan area  748 1172
##   Not Available                       1    0
##   Rural                             406  735
##   Suburban/Peri-urban               561  813
table(data$are_children_3_to_17_yrs_dealing_with_irregular_school_activity,
      data$were_children_3_to_17_yrs_enrolled_and_did_not_return_to_school)
##    
##        0    1
##   0  748  835
##   1  968 1885
table(data$did_teachers_leave_the_educational_system,
      data$are_children_3_to_17_yrs_dealing_with_irregular_school_activity)
##    
##        0    1
##   0  934  555
##   1  649 2298

There is a clear relationship between financial status and dropout, with lower-income groups showing higher dropout rates.

Predictive Analysis

data$internet_access <- as.factor(data$do_children_have_internet_connection)
data$return_to_school <- as.factor(data$were_children_3_to_17_yrs_enrolled_and_did_not_return_to_school)
data$irregular_activity <- as.factor(data$are_children_3_to_17_yrs_dealing_with_irregular_school_activity)
data$electricity_issue <- as.factor(data$does_home_shows_severe_deficit_of_electricity)
data$financial_status <- as.factor(data$financial_situation)

model2 <- glm(return_to_school ~ internet_access + electricity_issue + financial_status + geography,
              data = data, family = "binomial")

data$dropout_risk <- predict(model2, type = "response")

# ================================
# CORRELATION ANALYSIS
# ================================
data$internet_num <- as.numeric(data$internet_access)
data$return_num <- as.numeric(data$return_to_school)
data$electricity_num <- as.numeric(data$electricity_issue)
data$financial_num <- as.numeric(data$financial_status)

cor_matrix <- cor(data[, c("internet_num", "return_num", "electricity_num", "financial_num", "dropout_risk")])
cor_matrix
##                 internet_num   return_num electricity_num financial_num
## internet_num     1.000000000  0.009096232     -0.11804453  -0.049153182
## return_num       0.009096232  1.000000000      0.09149783  -0.002601488
## electricity_num -0.118044528  0.091497829      1.00000000   0.050075172
## financial_num   -0.049153182 -0.002601488      0.05007517   1.000000000
## dropout_risk     0.079628692  0.114286379      0.80097447  -0.022773434
##                 dropout_risk
## internet_num      0.07962869
## return_num        0.11428638
## electricity_num   0.80097447
## financial_num    -0.02277343
## dropout_risk      1.00000000
cor_melt <- melt(cor_matrix)

ggplot(cor_melt, aes(Var1, Var2, fill = value)) +
  geom_tile() +
  geom_text(aes(label = round(value, 2))) +
  labs(title = "Correlation Heatmap") +
  theme_minimal()

# ================================
# PRESCRIPTIVE ANALYSIS
# ================================
prop.table(table(data$internet_access, data$return_to_school), 1)
##    
##             0         1
##   0 0.3925971 0.6074029
##   1 0.3834290 0.6165710
high_risk_students <- data[data$dropout_risk > 0.7, ]
nrow(high_risk_students)
## [1] 381
aggregate(dropout_risk ~ financial_status + geography, data = data, mean)
##                                                                   financial_status
## 1                         I can afford food and regular expenses, but nothing else
## 2                                              I can afford food, but nothing else
## 3               I can afford food, regular expenses, and clothes, but nothing else
## 4        I can comfortably afford food, clothes, and furniture, and I have savings
## 5  I can comfortably afford food, clothes, and furniture, but I don’t have savings
## 6                                        I cannot afford enough food for my family
## 7                                                             Prefer not to answer
## 8                                                                    Not Available
## 9                         I can afford food and regular expenses, but nothing else
## 10                                             I can afford food, but nothing else
## 11              I can afford food, regular expenses, and clothes, but nothing else
## 12       I can comfortably afford food, clothes, and furniture, and I have savings
## 13 I can comfortably afford food, clothes, and furniture, but I don’t have savings
## 14                                       I cannot afford enough food for my family
## 15                                                            Prefer not to answer
## 16                        I can afford food and regular expenses, but nothing else
## 17                                             I can afford food, but nothing else
## 18              I can afford food, regular expenses, and clothes, but nothing else
## 19       I can comfortably afford food, clothes, and furniture, and I have savings
## 20 I can comfortably afford food, clothes, and furniture, but I don’t have savings
## 21                                       I cannot afford enough food for my family
## 22                                                            Prefer not to answer
##                           geography dropout_risk
## 1  City center or metropolitan area 5.856496e-01
## 2  City center or metropolitan area 6.288990e-01
## 3  City center or metropolitan area 6.378863e-01
## 4  City center or metropolitan area 6.459355e-01
## 5  City center or metropolitan area 5.726864e-01
## 6  City center or metropolitan area 6.237103e-01
## 7  City center or metropolitan area 5.373316e-01
## 8                     Not Available 9.482496e-06
## 9                             Rural 6.161654e-01
## 10                            Rural 6.597386e-01
## 11                            Rural 6.578040e-01
## 12                            Rural 6.699145e-01
## 13                            Rural 6.086975e-01
## 14                            Rural 6.499037e-01
## 15                            Rural 5.712774e-01
## 16              Suburban/Peri-urban 5.652749e-01
## 17              Suburban/Peri-urban 6.128347e-01
## 18              Suburban/Peri-urban 6.207245e-01
## 19              Suburban/Peri-urban 6.225738e-01
## 20              Suburban/Peri-urban 5.633299e-01
## 21              Suburban/Peri-urban 6.026952e-01
## 22              Suburban/Peri-urban 5.130942e-01
# ================================
# VISUALIZATION
# ================================
ggplot(data, aes(x = internet_access, fill = internet_access)) + geom_bar()

ggplot(data, aes(x = return_to_school, fill = return_to_school)) + geom_bar()

ggplot(data, aes(x = age, fill = return_to_school)) +
  geom_bar(position = "dodge")

ggplot(data, aes(x = financial_status, fill = return_to_school)) +
  geom_bar(position = "fill")

ggplot(data, aes(x = dropout_risk)) +
  geom_histogram(bins = 20, fill = "purple", alpha = 0.7) +
  geom_vline(xintercept = 0.7, color = "red", linetype = "dashed")

ggplot(data, aes(x = return_to_school, y = dropout_risk, fill = return_to_school)) +
  stat_summary(fun = mean, geom = "bar")

ggplot(data, aes(x = dropout_risk, fill = return_to_school)) +
  geom_density(alpha = 0.4)

ggplot(data, aes(x = financial_num, y = dropout_risk)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", color = "blue")
## `geom_smooth()` using formula = 'y ~ x'

skewness(data$dropout_risk)
## [1] 0.003830186
kurtosis(data$dropout_risk)
## [1] 5.629831

The distribution shows concentration of risk among specific groups and presence of extreme cases.

Conclusion

The analysis shows that internet access, electricity availability, and financial status are the main drivers of student dropout. Students from disadvantaged backgrounds are at higher risk. Predictive modeling helps identify vulnerable groups, allowing targeted interventions to reduce educational inequality.C