file.path(C:/PISA/PISA 2012/DATA) setwd(dir= file[path) getwd() install.packages(“intsvy”) library(“intsvy”) pisa.var.label(folder = “C:/PISA/PISA 2012/Data”, school.file = “SC.sav”, student.file = “ST.sav”) pisa <- pisa.select.merge(folder = “C:/PISA/PISA 2012/Data”, school.file = “SC.sav”, student.file = “ST.sav”, student = c(“ST01Q01”, “ST04Q01”, “ST08Q01”, “ST09Q01”, “ST42Q01”, “ST86Q04”, “ESCS”, “ANXMAT”, “INTMAT”, “ST42Q05”,“PARED”, “PV1MATH”, “PV2MATH”, “PV3MATH”, “PV4MATH”, “PV5MATH”, “W_FSTUWT”), school = c(“SCHOOLID”, “SC01Q01”, “CLSIZE”, “SC40Q02”, “W_FSCHWT”)) #———————————————————————————————— install.packages(“survey”) library(survey) # Create survey design object with sample weights pisa_design <- svydesign(ids = ~1, data = pisa, weights = ~W_FSTUWT)

Compute descriptive statistics for each PV1MATH to PV5MATH variable

desc_stats_pv <- data.frame( PV1MATH = svymean(~PV1MATH, design = pisa_design), PV2MATH = svymean(~PV2MATH, design = pisa_design), PV3MATH = svymean(~PV3MATH, design = pisa_design), PV4MATH = svymean(~PV4MATH, design = pisa_design), PV5MATH = svymean(~PV5MATH, design = pisa_design) )

Round the mean and standard error values for better presentation

desc_stats_pv <- round(desc_stats_pv, digits = 2)

Create a tidy data frame for presentable output

tidy_desc_stats_pv <- data.frame( Variable = c(“PV1MATH”, “PV2MATH”, “PV3MATH”, “PV4MATH”, “PV5MATH”), Mean = c(desc_stats_pv\(PV1MATH.mean, desc_stats_pv\)PV2MATH.mean, desc_stats_pv\(PV3MATH.mean, desc_stats_pv\)PV4MATH.mean, desc_stats_pv\(PV5MATH.mean), Standard_Error = c(desc_stats_pv\)PV1MATH.PV1MATH, desc_stats_pv\(PV2MATH.PV2MATH, desc_stats_pv\)PV3MATH.PV3MATH, desc_stats_pv\(PV4MATH.PV4MATH, desc_stats_pv\)PV5MATH.PV5MATH) )

Set up the plotting layout

par(mfrow=c(3, 3)) # Adjust the layout as needed par(mar=c(4, 4, 2, 1))

Loop through variables of interest

for (var in variables_of_interest) { if (is.numeric(pisa[[var]])) { hist(pisa[[var]], main=var, xlab=“Value”, col=“skyblue”, breaks = 20) } else { # For non-numeric variables, create bar plots if (length(unique(pisa[[var]])) <= 10) { barplot(table(pisa[[var]]), main=var, xlab=“Value”, col=“lightgreen”) } else { print(paste(“Skipping”, var, “as it has too many unique values for a bar plot.”)) } } }

##———————————————————————————————–#Boxplots——-#

Filter numeric variables of interest

numeric_vars_of_interest <- c(“ST04Q01”, “ST08Q01”, “ST09Q01”, “ST42Q01”, “ST86Q04”, “ESCS”, “ANXMAT”, “INTMAT”, “ST42Q05”, “PARED”, “SCHOOLID”, “SC01Q01”, “CLSIZE”, “SC40Q02”)

Set up the plotting layout

par(mfrow=c(4, 4)) # Adjusted to accommodate 16 plots par(mar=c(4, 4, 2, 1))

Define colors for the box plots

boxplot_colors <- rainbow(length(numeric_vars_of_interest))

Visualize the distribution of numeric variables using box plots

for (i in 1:length(numeric_vars_of_interest)) { var <- numeric_vars_of_interest[i] pisa[[var]] <- as.numeric(pisa[[var]]) # Convert to numeric if not already numeric

# Plot box plot boxplot(pisa[[var]], main=var, col=boxplot_colors[i]) } #——————————————————————————–# # CALCULATION OF PERCENTAGE OF MISSING VALUES# # Define the variables of interest variables_of_interest <- c(“PV1MATH”, “PV2MATH”, “PV3MATH”, “PV4MATH”, “PV5MATH”, “ST01Q01”, “ST04Q01”, “ST08Q01”, “ST09Q01”, “ST42Q01”, “ST86Q04”, “ESCS”, “ANXMAT”, “INTMAT”, “ST42Q05”, “PARED”, “SCHOOLID”, “SC01Q01”, “CLSIZE”, “SC40Q02”)

Calculate the percentage of missing values for each variable

missing_percentage <- sapply(pisa[, variables_of_interest], function(x) { mean(is.na(x)) * 100 })

Create a data frame to display the results

missing_data_summary <- data.frame(Variable = names(missing_percentage), Percent_Missing = missing_percentage)