ADOBAS_CAPSTONE

##1

load("C:/Users/User/Desktop/EDA_STAT 137 - CAPSTONE FINAL PROJECT/phil_pisa2018.Rda")
View(phil_pisa2018)

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

##2

M <- phil_pisa2018 %>% 
  mutate(REGION_New = case_when(STRATUM == "PHL0001" ~ "Region 1",
                            STRATUM == "PHL0002" ~ "Region 2",
                            STRATUM == "PHL0003" ~ "Region 3",
                            STRATUM == "PHL0004" ~ "Region 4A",
                            STRATUM == "PHL0005" ~ "Region 4B",
                            STRATUM == "PHL0006" ~ "Region 5",
                            STRATUM == "PHL0007" ~ "Region 6",
                            STRATUM == "PHL0008" ~ "Region 7",
                            STRATUM == "PHL0009" ~ "Region 8",
                            STRATUM == "PHL0010" ~ "Region 9",
                            STRATUM == "PHL0011" ~ "Region 10",
                            STRATUM == "PHL0012" ~ "Region 11",
                            STRATUM == "PHL0013" ~ "Region 12",
                            STRATUM == "PHL0014" ~ "NCR",
                            STRATUM == "PHL0015" ~ "CAR",
                            STRATUM == "PHL0016" ~ "CARAGA",
                            STRATUM == "PHL0017" ~ "NIR")) %>% 
  select(REGION_New)

head(M)

tail(M)

##3,4,5,6 :Creating new variables MATH,READING,SCIENCE and GLCM, respectively

#MATH
A <- phil_pisa2018 %>%
   select(c(PV1MATH,PV2MATH,PV3MATH,PV4MATH,PV5MATH,
            PV6MATH,PV7MATH,PV8MATH,PV9MATH,PV10MATH))
A <- A %>%  mutate(MATH = rowMeans(A)) %>% select(MATH)


#READING
B <- phil_pisa2018 %>%
   select(c(PV1READ,PV2READ,PV3READ,PV4READ,PV5READ,
            PV6READ,PV7READ,PV8READ,PV9READ,PV10READ)) 
B <- B %>%  mutate(READING = rowMeans(B)) %>% select(READING)


#SCIENCE
C <- phil_pisa2018 %>%
   select(c(PV1SCIE,PV2SCIE,PV3SCIE,PV4SCIE,PV5SCIE,
            PV6SCIE,PV7SCIE,PV8SCIE,PV9SCIE,PV10SCIE)) 
C <- C %>%  mutate(SCIENCE = rowMeans(C)) %>% select(SCIENCE)

# GLCM
D <- phil_pisa2018 %>%
   select(c(PV1GLCM,PV2GLCM,PV3GLCM,PV4GLCM,PV5GLCM,
            PV6GLCM,PV7GLCM,PV8GLCM,PV9GLCM,PV10GLCM)) 
D = D %>%  mutate(GLCM = rowMeans(D)) %>% select(GLCM)

MRSG1 = cbind(A,B,C,D)

head(MRSG1)

tail(MRSG1)

phil_pisa2018$REGION = as.factor(phil_pisa2018$REGION)
phil_pisa2018$ST004D01T = as.factor(phil_pisa2018$ST004D01T)

phil_pisa2018 %>% 
  select(REGION, ST004D01T) %>% 
  group_by(REGION, ST004D01T) %>% 
  summarize(Count = n()) %>%
  ggplot(aes(x = reorder(REGION, Count), y = Count, fill = ST004D01T))+
  geom_bar(stat = "identity", position = position_dodge(0.5),col = "black") +
  coord_flip()+
  labs(x = "Region", y = "Number of students",title = 
         "Number of Participants per Region (By Sex)") +
  theme(plot.title = element_text(hjust = 0.4)) +
  geom_text(aes(label = Count),hjust = -0.3, color ="black", 
            position = position_dodge(), size = 2) +
  scale_x_discrete(expand = c(0,0), labels = c("CAR","CARAGA","10","11","9","4B","2",
                                               "12","NIR","8","1","6","7","5",
                                               "3","NCR","4A"))+
  scale_y_continuous(expand = c(0,0), limits = c(0,630)) +
  theme_gray() +
  scale_fill_discrete(name = "Sex", labels = c("Female", "Male"))+
  scale_fill_manual(values = c("yellowgreen","brown"),name = 
                      "Sex",labels = c("Female", "Male"))

## `summarise()` has grouped output by 'REGION'. You can override using the
## `.groups` argument.
## Scale for fill is already present. Adding another scale for fill, which will
## replace the existing scale.

## Warning: Width not defined
## ℹ Set with `position_dodge(width = ...)`

The bar graph above indicates that Region 4A obtained the highest number of participants which has a total of 606 students while CAR Region obtained the least which is about 121 number of students. Meanwhile, the data graphics shows that female participants is higher compared to males.

##8

phil_pisa2018$ST001D01T = as.factor(phil_pisa2018$ST001D01T)
phil_pisa2018$ST004D01T = as.factor(phil_pisa2018$ST004D01T)


phil_pisa2018 %>% select(ST001D01T,ST004D01T) %>%
  group_by(ST001D01T,ST004D01T) %>% 
  summarise(count=n()) %>%
  ggplot(aes(x = reorder(ST001D01T, -count), y = count, fill = ST004D01T)) +
  geom_bar(stat = "identity", position = position_dodge(0.5),col="blue") +
  labs(x = "Grade_Level", y = "Number", title = "Number of students by Year level")+
  theme(plot.title = element_text(hjust = 0.5))+
  scale_fill_discrete(name = "Sex", labels = c("Female", "Male")) +
  scale_fill_manual(values = c("pink","lightblue"),name = "Sex",labels = c("Female", "Male")) +
  geom_text(aes(label = count),vjust = -0.3, color ="black", 
            position = position_dodge(0.6), size = 3)

## `summarise()` has grouped output by 'ST001D01T'. You can override using the
## `.groups` argument.
## Scale for fill is already present. Adding another scale for fill, which will
## replace the existing scale.

This bar graph above shows that most number of participants are grade 9 students with a total of 3693 participants while the fewest are grade 12 students of about 3 participants wherein, females are higher than males.

##9

phil_pisa2018$ST011Q06TA = as.factor(phil_pisa2018$ST011Q06TA)
phil_pisa2018$REGION = as.factor(phil_pisa2018$REGION)

phil_pisa2018 %>% 
  select(ST011Q06TA,REGION) %>%
  group_by(REGION,ST011Q06TA) %>%
  filter(ST011Q06TA == "1") %>% 
  drop_na(ST011Q06TA,REGION) %>% 
  summarize(count = n()) %>%
  ggplot(aes(x = reorder(REGION, count), y = count)) +
  geom_bar(stat = "identity", position = position_dodge(0.5),
           col = "blue",fill = "pink") +
  coord_flip() +
  labs(x = "Region", y = "Number", title = "Number of Students 
       with Internet Access") +
  geom_text(aes(label = count),hjust = -0.3, color ="black", 
            position = position_dodge(0.5), size = 2)

## `summarise()` has grouped output by 'REGION'. You can override using the
## `.groups` argument.

The bar graph above tells us that most of the students with internet access are found in Region 4A which is about 626 students, while the least are in CARAGA Region of about 60 students.

##10

##MATH
M9 <- A %>%
  select(MATH) %>% 
  drop_na(MATH) %>% 
  ggplot(aes(MATH))+ 
  geom_histogram(aes(y = after_stat(density)),
                 colour = "blue",
                 bins = 30,
                 fill = "lightblue") +
  geom_density(alpha = 1, color = "red")+
  #stat_function(fun = dnorm, args = list(mean = mean(MATH), 
  #sd = sd(MATH)),col="red",width = 2) +
  labs(x = "Math", y ="Density")



##READING
R9 <- B %>%
  select(READING) %>% 
  drop_na(READING) %>% 
  ggplot(aes(READING))+ 
  geom_histogram(aes(y = after_stat(density)),
                 colour = "red",
                 bins = 30,
                 fill = "pink") +
  geom_density(alpha = 1, color = "red")+
  #stat_function(fun=dnorm, args = list(mean = mean(READING), sd = sd(READING))) +
  labs(x = "Reading", y ="Density")


##SCIENCE
S9 <- C %>%
  select(SCIENCE) %>% 
  drop_na(SCIENCE) %>% 
  ggplot(aes(SCIENCE))+ 
  geom_histogram(aes(y = after_stat(density)),
                 colour = "black",
                 bins = 30,
                 fill = "yellowgreen") +
  #stat_function(fun=dnorm, args = list(mean = mean(SCIENCE), sd =sd(SCIENCE))) +
  geom_density(alpha = 1, color = "red")+
  labs(x = "Science", y ="Density")


##GLCM
G9 <- D %>% 
  drop_na(GLCM) %>% 
  ggplot(aes(GLCM))+ 
  geom_histogram(aes(y = after_stat(density)),
                 colour = "black",
                 bins = 30,
                 fill = "gray") +
  geom_density(alpha = 1, color = "red")+
  labs( x = "GLCM", y ="Density", size = 12)

library(ggpubr)
CH <- ggarrange(M9,S9,G9,R9)

annotate_figure(CH, top = text_grob("Empirical Probability distributions of Subject Areas", 
                                    face ="bold",size = 14, color ="blue"))+theme_light()

The above histograms shows that Mathematics has a normal distribution while Reading, Science and GLCM has a positively skewed distributions. Meanwhile, there are no outliers observed in the above mentioned distributions.

##11

# Create  functions for computing standard error per subjects
E <- phil_pisa2018 %>% select(REGION,ST004D01T)
E <- data.frame(E)

MRSG1 = cbind(A,B,C,D)
MRSG <- cbind(MRSG1,E)

SEm <- function(MATH) sd(MATH) / sqrt(length(MATH)) 

SEr <- function(READING) sd(READING) / sqrt(length(READING))

SEs <- function(SCIENCE) sd(SCIENCE) / sqrt(length(SCIENCE))

SEg <- function(GLCM) sd(GLCM) / sqrt(length(GLCM))

T <- MRSG %>% select(c(REGION,MATH,READING,SCIENCE,GLCM)) %>%
  group_by(REGION) %>% 
  arrange(REGION) %>%
  summarize(Mean_Math = round(mean(MATH),2), SEM = round(SEm(MATH),2),
            Mean_Reading = round(mean(READING),2), SER = round(SEr(READING),2),
            Mean_Science = round(mean(SCIENCE),2), SES = round(SEs(SCIENCE),2),
            Mean_GLCM = round(mean(GLCM),2), SEG = round(SEg(GLCM),2)) 

library(knitr)
kable(T)

REGION	Mean_Math	SEM	Mean_Reading	SER	Mean_Science	SES	Mean_GLCM	SEG
Region 1	341.89	3.19	321.00	3.31	340.79	3.03	353.36	3.28
Region 2	344.65	4.17	326.65	4.10	346.87	3.83	363.06	4.17
Region 3	355.46	2.34	339.04	2.44	350.31	2.23	367.95	2.35
Region 4A	365.44	1.92	349.82	2.15	365.88	2.01	380.04	2.12
Region 4B	337.20	4.35	326.54	4.29	339.27	3.96	354.98	4.30
Region 5	338.22	3.36	327.47	3.38	345.08	3.09	363.74	3.32
Region 6	345.21	3.96	333.86	4.10	354.29	3.68	367.23	3.84
Region 7	364.03	3.27	353.46	3.67	375.31	3.15	386.83	3.64
Region 8	341.16	4.00	343.50	4.26	351.39	3.97	370.22	4.16
Region 9	329.98	4.32	309.24	4.04	331.43	3.77	345.76	4.04
Region 10	342.00	4.24	330.77	4.59	343.07	3.96	353.60	4.77
Region 11	342.13	3.45	331.78	3.94	349.39	3.56	362.89	3.95
Region 12	313.70	3.69	297.41	3.47	326.32	3.09	339.41	3.30
NCR	384.93	2.39	371.51	2.79	390.47	2.45	401.89	2.74
CAR	369.81	6.81	354.32	6.87	361.32	6.52	383.18	7.00
CARAGA	305.29	4.45	299.52	4.04	317.17	3.64	333.19	3.99
NIR	342.31	4.36	335.84	4.66	359.89	4.43	369.55	4.57

#gt(T)
N <- read.csv("DATAFINAL.csv",header = TRUE, sep = ",")
library(knitr)
kable(N)

Subject	Level	Mean	Region
MATH	High	384.93	NCR
	Low	305.29	CARAGA
READING	High	371.51	NCR
	Low	297.41	Region 12
SCIENCE	High	390.47	NCR
	Low	317.17	CARAGA
GLCM	High	401.89	NCR
	Low	333.19	CARAGA
		NA

#library(knitr)
#kable(T)

The table above shows that the mean highest scores in each subjects was mainly from National Capital Region (NCR) which mean that this region performs best in the said subjects while CARAGA region on the other hand has least means scores that indicates poor or worst poor performance in these particular subjects, Math, Reading, GLCM while Region 12 performs poor in Reading.Generally, this data indicates that students from NCR are known for being achievers based on the results they gained.

##12:

Barplot_MRSG <- MRSG %>% select(c(MATH,READING,SCIENCE,GLCM,ST004D01T)) %>% 
  group_by(ST004D01T) %>% 
  summarise(meanM = round(mean(MATH),2), sdM = round(sd(MATH),2),
            meanR = round(mean(SCIENCE),2), sdR = round(sd(SCIENCE),2),
            meanS = round(mean(READING),2), sdS = round(sd(READING),2),
            meanG = round(mean(GLCM),2), sdG = round(sd(GLCM),2))
Barplot_MRSG

Data_MRSG <- read.csv("Data_MRSG.csv")
Data_MRSG

Data_MRSG %>% select(c(SUBJECTS,MEAN,SD,SEX)) %>% 
  ggplot((aes(x = SUBJECTS, y = MEAN, ymin=MEAN-SD,ymax =MEAN+SD)))+
  geom_bar(aes(fill = SEX),
           color = "black",
           stat = "identity",
           position = "dodge",
           width = 0.8) +
  geom_errorbar(aes(fill = SEX), width = 0.1,
                color = "darkblue",
           position = position_dodge(0.8))+
  geom_text(aes(label = MEAN),
            vjust = 8,
            color = "black",
            position = position_dodge(0.5),
            size = 2)+
  scale_fill_manual(values = c("yellowgreen","lightblue"))+
  labs(title = "Barplot for the Mean of each Subject", x= "Subjects", y = "Mean")+
  theme_bw()

## Warning in geom_errorbar(aes(fill = SEX), width = 0.1, color = "darkblue", :
## Ignoring unknown aesthetics: fill

This bar graph displays that there is just a slight difference in the gender in terms of the mean scores in these subjects. Mostly, female students got little gap of mean scores obtained than males. However, it is just a close performance wherein small gaps of means scores are being observed based on the data obtained from the above bar plot.

##13

library(tidyverse)

RR13 <- MRSG %>% select(c(READING,REGION))


RP <- RR13 %>% select(c(READING,REGION)) %>% mutate(RL = case_when(READING <262 ~ "1c",
                               READING>=262 & READING<335 ~ "1b",
                               READING>=335 & READING<407 ~ "1a",
                               READING>=406 & READING<480 ~ "2",
                               READING>=480 & READING<553 ~ "3",
                               READING>=553 & READING<626 ~ "4",
                               READING>=626 & READING<698 ~ "5",
                               READING>=698 ~ "6")) %>% group_by(REGION,RL) %>%
  summarise(count =n()) %>% 
  mutate(prop = round(count*100/sum(count),2))

## `summarise()` has grouped output by 'REGION'. You can override using the
## `.groups` argument.

head(RP)

RP %>% ggplot(aes(x = REGION, y=prop, fill = RL))+
  geom_bar(stat = "identity", position = "dodge")+
  labs(x = "Region", y = "Proportion", title = "Percentage of Reading Level proficiency of students by region")+
  scale_x_discrete(expand = c(0,0), labels = c("1","2","3","4A","4B","5","6","7",
                                               "8","9","10","11","12","NCR","CAR",
                                               "CARAGA","NIR"))+
  theme_bw()

The graph above shows that CARAGA region garnered the highest proportion of 1c level which indicates that most of the students in this region have low level of proficiency in reading area.Meanwhile, region 4A(CALABARZON) has level 5 but only one student achieve it while NIR has the most number of level 4, this shows that region 4A and NIR region performs best in obtaining a high level of proficiency in reading.

##14:

ML14 <- MRSG %>% select(c(MATH,REGION))


MP = ML14 %>% select(c(MATH,REGION)) %>% mutate(Level = case_when(MATH<420 ~ "1",
                               MATH>=420 & MATH<482 ~ "2",
                               MATH>=482 & MATH<545 ~ "3",
                               MATH>=545 & MATH<607 ~ "4",
                               MATH>=607 & MATH<668 ~ "5",
                               MATH>=669 ~ "6")) %>% group_by(REGION,Level) %>% 
  summarise(count =n())%>% 
  mutate(prop = round(count*100/sum(count),2))

## `summarise()` has grouped output by 'REGION'. You can override using the
## `.groups` argument.

head(MP)

MP %>% ggplot(aes(x = REGION, y= prop, fill = Level))+
  geom_bar(stat = "identity", position = "dodge")+
  labs(x = "Region", y = "proportion", title = "Percentage of Mathematics Level proficiency of students by region")+
  scale_x_discrete(expand = c(0,0), labels = c("1","2","3","4A","4B","5","6","7",
                                               "8","9","10","11","12","NCR","CAR",
                                               "CARAGA","NIR"))+
  theme_bw()

The data visualization above indicate that most of the students in CARAGA region obtains high percentage of level 1 which means low mathematics level proficiency. While few students in some regions reached levels 5 and 4,these regions are Negros Island Region, National Capital Region, Region 7 and Region 6. This shows that some of the students in these regions reached high level of mathematics proficiency.

##15

GL <- phil_pisa2018 %>% select(ST001D01T)

MRSG2 <- cbind(MRSG,GL)
W <- MRSG2 %>% select(c(READING,ST001D01T,ST004D01T)) %>%
  mutate(Sex = case_when(ST004D01T == 1 ~ "Male",
                         ST004D01T == 2 ~ "Female")) %>% 
          mutate(Level = case_when(READING <262 ~ "1c",
                               READING>=262 & READING<335 ~ "1b",
                               READING>=335 & READING<407 ~ "1a",
                               READING>=406 & READING<480 ~ "2",
                               READING>=480 & READING<553 ~ "3",
                               READING>=553 & READING<626 ~ "4",
                               READING>=626 & READING<698 ~ "5",
                               READING>=698 ~ "6")) %>% group_by(Level,ST001D01T,Sex) %>%
  summarise(count = n()) %>% 
  mutate(prop = round(count*100/sum(count),2))

## `summarise()` has grouped output by 'Level', 'ST001D01T'. You can override
## using the `.groups` argument.

head(W)

W %>% ggplot(aes(x = ST001D01T,y = prop)) +
  geom_bar(aes(fill =Level), stat = "identity", position = position_dodge(0.6),color = "blue" )+
  facet_wrap(~Sex)+
  #scale_x_continuous(expand = c(0,0))+
  scale_y_continuous(expand = c(0,0), limit = c(0,100))+
  theme_bw()+
  labs(x = "Grade Level", y = "Proportion", title = "Percentage of Reading Proficiency of students by grade level")

The bar graph above indicates that Grade 10 female students obtained the highest percentage of Reading level proficiency, followed by grade 12 students, while grade 7 female students has the low percentage in terms of reading level proficiency.Furthermore, grade 10 male students reached the highest percentage of reading level proficiency at the same time they also obtained highest percentage of students with low level of reading proficiency.

#MATHEMATICS

Z <- MRSG2 %>% select(c(MATH,ST001D01T,ST004D01T)) %>%
  mutate(Sex = case_when(ST004D01T == 1 ~ "Male",
                         ST004D01T == 2 ~ "Female")) %>% 
  mutate(Level = case_when(MATH<420 ~ "1",
                               MATH>=420 & MATH<482 ~ "2",
                               MATH>=482 & MATH<545 ~ "3",
                               MATH>=545 & MATH<607 ~ "4",
                               MATH>=607 & MATH<668 ~ "5",
                               MATH>=669 ~ "6")) %>% 
  group_by(Level,ST001D01T,Sex) %>%
  summarise(count = n()) %>% 
  mutate(prop = round(count*100/sum(count),2))

## `summarise()` has grouped output by 'Level', 'ST001D01T'. You can override
## using the `.groups` argument.

head(Z)

Z %>% ggplot(aes(x = ST001D01T,y = prop)) +
  geom_bar(aes(fill =Level), stat = "identity", position = position_dodge(0.8),color = "blue" )+
  facet_wrap(~Sex)+
 # scale_x_continuous(expand = c(0,0))+
  scale_y_continuous(expand = c(0,0), limit = c(0,100))+
  theme_bw()+
  labs(x = "Grade Level", y = "Proportion", title = "Percentage of Mathematics Proficiency of students by grade level")

The bar graph above indicates that grade 9 and grade 10 female students reached the highest percentage of mathematics level while grade 7 obtains the least. Moreover, grade 11 male students reached the lowest level of proficiency in mathematics while grade 9 males students has the highest.

##16:

#Math and Science
K1 <- MRSG2 %>% select(c(MATH,SCIENCE,ST004D01T,ST001D01T)) %>% 
  filter(ST001D01T %in% c("9","10")) %>% 
  ggplot(aes(x = MATH, y= SCIENCE,color = as.factor(ST001D01T),shape = as.factor(ST004D01T)))+
  geom_point(size = 2)+
  scale_shape_discrete(name ="Sex", labels = c("Female", "Male"))+
  scale_colour_discrete(name= "Grade_Level")+
  theme_bw()


#Reading and GLCM
K2 <-MRSG2 %>% select(c(READING,GLCM,ST004D01T,ST001D01T)) %>% 
  filter(ST001D01T %in% c("9","10")) %>% 
  ggplot(aes(x = READING, y= GLCM,  color = as.factor(ST001D01T),shape = as.factor(ST004D01T)))+
  geom_point(size = 2)+
  scale_shape_discrete(name ="Sex", labels = c("Female", "Male"))+
  scale_colour_discrete(name= "Grade_Level")+
  theme_bw()


#MATH and Reading
K3 <-MRSG2 %>% select(c(MATH,READING,ST004D01T,ST001D01T)) %>% 
  filter(ST001D01T %in% c("9","10")) %>% 
  ggplot(aes(x = MATH, y= READING,  color = as.factor(ST001D01T),shape = as.factor(ST004D01T)))+
  geom_point(size = 2)+
  scale_shape_discrete(name ="Sex", labels = c("Female", "Male"))+
  scale_colour_discrete(name= "Grade_Level")+
  theme_bw()


#MATH and GLCM
K4 <-MRSG2 %>% select(c(MATH,GLCM,ST004D01T,ST001D01T)) %>% 
  filter(ST001D01T %in% c("9","10")) %>% 
  ggplot(aes(x = MATH, y= GLCM,  color = as.factor(ST001D01T),shape = as.factor(ST004D01T)))+
  geom_point(size = 2)+
  scale_shape_discrete(name ="Sex", labels = c("Female", "Male"))+
  scale_colour_discrete(name= "Grade_Level")+
  theme_bw()


#Science and GLCM
K5 <-MRSG2 %>% select(c(SCIENCE,GLCM,ST004D01T,ST001D01T)) %>% 
  filter(ST001D01T %in% c("9","10")) %>% 
  ggplot(aes(x = SCIENCE, y= GLCM,  color = as.factor(ST001D01T),shape = as.factor(ST004D01T)))+
  geom_point(size = 2)+
  scale_shape_discrete(name ="Sex", labels = c("Female", "Male"))+
  scale_colour_discrete(name= "Grade_Level")+
  theme_bw()


#Science and Reading
K6 <-MRSG2 %>% select(c(SCIENCE,READING,ST004D01T,ST001D01T)) %>% 
  filter(ST001D01T %in% c("9","10")) %>% 
  ggplot(aes(x = SCIENCE, y= READING,  color = as.factor(ST001D01T),shape = as.factor(ST004D01T)))+
  geom_point(size = 2)+
  scale_shape_discrete(name ="Sex", labels = c("Female", "Male"))+
  scale_colour_discrete(name= "Grade_Level")+
  theme_bw()

CS <- ggarrange(K1,K2)

annotate_figure(CS, top = text_grob("Relationship between Subject Areas", 
                              face ="bold",size = 13, color = "yellow"))+theme_dark(0.5)

CF <- ggarrange(K3,K4)

annotate_figure(CF, top = text_grob("Relationship between Subject Areas", 
                                    face ="bold",size = 13, color = "yellow"))+theme_dark(0.5)

CM <- ggarrange(K5,K6)

annotate_figure(CM, top = text_grob("Relationship between Subject Areas", 
                                    face ="bold",size = 13, color = "yellow"))+theme_dark(0.5)

Based on the scatter plots shown above, there are linear relationships between subject areas because the scatter plot indicates a line diagonally upward in which displays relationship between subjects, Math, Science, Reading and GLCM. In the same manner these relationship also is similar for grade a9 and grade 10 students and for male and female students
in general.Furthermore, it suggests a strong correlation among these subject areas, grade level and gender in general.

No.17:CONCLUSION

Generally,Philpisa 2018 data have shown that most of the participants are students from region 4A CALABARZON,NCR and region 3,while the rest are from different regions.Mostly, these students are junior high school, grade 9 and grade 10 students while there were fewer senior high school participants.

However, students from National Capital Region(NCR) excel mostly, as they obtained highest mean scores in all the subjects(Math,Science,GLCM, Reading).It signifies that most of the participants from this region have higher academic abilities or potentials.Meanwhile, students from CARAGA region, have the least total mean score in most of the said subjects except for Reading.

Moreover, most female students got little gap of mean scores obtained than males.However, it is just a close performance wherein small gaps of means scores are being observed based on the data obtained from the above bar plot.Furthermore, grade 10 female and male students got the highest percentage of reading level proficiency.Meanwhile, grade 9 male and female students also grade 10 female students garnered highest mean scores in mathematics level of proficiency. This, indicates that most of the participants from junior high school performed better performance.

ADOBAS_CAPSTONE_PROJECT

Leonessa T. Adobas

2023-02-01