library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(ggtext)
library(tidyverse)## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
.We imported the dataset from a CSV file called “data.csv.” .We handled missing values, ensuring they were correctly identified as NA. .We converted character variables into factors for efficient data processing.
data <- read.csv("data.csv", stringsAsFactors = TRUE, na.strings = c("", "NA")).We improved the clarity and interpretability of column names by renaming them. .Columns were renamed to reflect the data they contain, facilitating easier analysis in subsequent steps.
data <- rename(data, ConsertForm = In.order.to.proceed..please.fill.out.this.consent.form.)
data <- rename(data, X1 = X1..Below.is.a.list.of.some.of.the.social.media.that.are.available..Please.indicate.which.you.are.aware.of.)
data <- rename(data, X1.1 = X1.1.Are.there.any.other.social.media.available.that.you.are.aware.of.but.didn.t.mention.in.question.1.)
data <- rename(data, X1.2 = X1.2.Please.mention.them.separating.them.by.using.commas.)
data <- rename(data, X2 = X2..Do.you.use.social.media.for.learning.purposes.)
data <- rename(data, X2.1 = X2.1.Please.give.the.reason.s..why.you.do.not.use.social.media.for.learning.purposes.)
data <- rename(data, X2.2 = X2.2.Would.you.be.interested.in.using.social.media.for.learning.purposes.if.the.reason.s..you.mentioned.are.resolved.)
data <- rename(data, X3 = X3..Please.indicate.what.social.media.you.use.for.learning.purposes.)
data <- rename(data, X3.1 = X3.1.Are.there.any.other.social.media..that.you.are.using.for.learning.purposes.but.didn.t.mention.in.question.3.)
data <- rename(data, X3.2 = X3.2.Please.mention.them.separating.them.by.using.commas..)
data <- rename(data, X3.3 = X3.3.If.you.use.more.than.one.social.media.please.specify.which.you.tend.to.use.the.most.)
data <- rename(data, X4 = X4..How.do.you.access.the.social.media.that.you.use.for.learning.purposes.)
data <- rename(data, X4.1 = X4.1..Are.there.any.other.devices.that.you.use.to.access.social.media.but.not.mentioned.in.question.4.)
data <- rename(data, X4.2 = X4.2.Please.mention.them.separating.each.by.using.commas..)
data <- rename(data, X4.3 = X4.3.If.you.ticked.more.than.one.access.method.please.specify.which.one.you.tend.to.use.the.most.)
data <- rename(data, X5 = X5..Where.do.you.access.social.media.for.learning.purposes.)
data <- rename(data, X5.1 = X5.1.Are.there.any.other.places.where.you.access.social.media.but.not.mentioned.in.question.5.)
data <- rename(data, X5.2 = X5.2.Please.mention.them.separating.them.by.using.commas..)
data <- rename(data, X5.3 = X5.2.If.you.ticked.more.than.one.access.point..please.specify.where.you.tend.to.access.the.most.)
data <- rename(data, X6 = X6..What.are.the.Learning.activities.that.you.use.social.media.for.)
data <- rename(data, X6.1 = X6.1.Are.there.any.other.learning.activities.that.you.have.used.social.media.for.but.not.mentioned.in.question.6.)
data <- rename(data, X6.2 = X6.2.Please.mention.them.separating.them.by.using.commas..)
data <- rename(data, X7 = X7..How.often.do.you.use.the.social.media.for.learning.purposes..per.week..)
data <- rename(data, X8ForeignLanguage = X8..Which.academic.subjects.do.you.think.are.better.suited.for.using.social.media.as.a.learning.tool...Foreign.language.)
data <- rename(data, X8SocialSciences = X8..Which.academic.subjects.do.you.think.are.better.suited.for.using.social.media.as.a.learning.tool...Social.sciences.)
data <- rename(data, X8ComputerScience = X8..Which.academic.subjects.do.you.think.are.better.suited.for.using.social.media.as.a.learning.tool...Computer.science.)
data <- rename(data, X8NaturalSciences = X8..Which.academic.subjects.do.you.think.are.better.suited.for.using.social.media.as.a.learning.tool...Natural.sciences.)
data <- rename(data, X8Humanities = X8..Which.academic.subjects.do.you.think.are.better.suited.for.using.social.media.as.a.learning.tool...Humanities.)
data <- rename(data, X8Business = X8..Which.academic.subjects.do.you.think.are.better.suited.for.using.social.media.as.a.learning.tool...Business.)
data <- rename(data, X9Databundles = X9..Below.are.the.possible.constraints.that.could.limit.your.use.of.social.media.for.learning.purposes...Please.indicate.how.much.of.a.constraint.they.are.to.your.use.of.social.media...High.cost.of.subscription.to.internet.data.bundles.)
data <- rename(data, X9Smartphones = X9..Below.are.the.possible.constraints.that.could.limit.your.use.of.social.media.for.learning.purposes...Please.indicate.how.much.of.a.constraint.they.are.to.your.use.of.social.media...High.cost.of.social.media.enabled.phones..smartphones..)
data <- rename(data, X9Connectivity = X9..Below.are.the.possible.constraints.that.could.limit.your.use.of.social.media.for.learning.purposes...Please.indicate.how.much.of.a.constraint.they.are.to.your.use.of.social.media...Poor.internet.connectivity.)
data <- rename(data, X9LowLiteracy = X9..Below.are.the.possible.constraints.that.could.limit.your.use.of.social.media.for.learning.purposes...Please.indicate.how.much.of.a.constraint.they.are.to.your.use.of.social.media...Low.ICT.literacy.)
data <- rename(data, X9LowAwareness = X9..Below.are.the.possible.constraints.that.could.limit.your.use.of.social.media.for.learning.purposes...Please.indicate.how.much.of.a.constraint.they.are.to.your.use.of.social.media...Low.awareness.of.the.social.media.forlearning.purposes.)
data <- rename(data, X9Unwillingness = X9..Below.are.the.possible.constraints.that.could.limit.your.use.of.social.media.for.learning.purposes...Please.indicate.how.much.of.a.constraint.they.are.to.your.use.of.social.media...A.friend.s.unwillingness.to.communicate.learning.issues.through.social.media.)
data <- rename(data, X9Poorknowledge = X9..Below.are.the.possible.constraints.that.could.limit.your.use.of.social.media.for.learning.purposes...Please.indicate.how.much.of.a.constraint.they.are.to.your.use.of.social.media...Poor.knowledge.of.social.media.application.software.and.sites.)
data <- rename(data, Comments = X10..If.you.have.any.further.comments.relating.to.the.use.of.social.media.for.learning.purposes..please.specify.them.below.)
data <- rename(data, Program = Please.specify.the.program.that.best.describes.you.)
data <- rename(data, SubjectWithArts = Please.select.the.two.subject.areas.in.which.you.specialize)
data <- rename(data, SubjectWithSciences = Please.select.the.two.subject.areas.in.which.you.specialize.1)
data <- rename(data, SubjectBed = Please.select.the.subject.area.in.which.you.specialize)
data <- rename(data, YearofStudy = Please.indicate.the.year.of.study.you.are.currently.in.)
data <- rename(data, Background = Before.enrolling.at.DUCE..please.select.the.option.that.best.describes.your.background.)
data <- rename(data, WorkExperience = Please.indicate.the.number.of.years.of.teaching.experience.you.have.)
data <- rename(data, Gender = Please.specify.your.gender.)
data <- rename(data, Age = Please.indicate.your.age.within.the.specified.range.)This section delves into a comprehensive analysis of the demographic information of the dataset’s participants, covering aspects such as age, gender, year of study, educational background, working experience, program of study, and subject combinations. The analysis entails generating frequency tables, calculating percentages, and crafting visualizations to gain a deeper understanding of the distributions.
Methodology:
Step 1: Calculate Frequency Tables
Our analysis commences by calculating frequency tables using the
table function. These tables enumerate the occurrences of
each variable group within the dataset, shedding light on the
distribution patterns.
Step 2: Convert Frequency Tables to Data Frames
To facilitate data manipulation, we convert these frequency tables into data frames. These data frames comprise two vital columns, such as “Age” and “Frequency” or “Gender” and “Frequency,” depending on the specific demographic factor under consideration.
Step 3: Calculate Percentages
To offer a more holistic perspective on the distributions, we compute the percentage of participants in each variable group. This entails dividing the frequency of each group by the total number of participants and rounding the result to ensure clarity.
Step 4: Calculate Total Rows
Incorporating a “Total” row becomes essential at this juncture. This row effectively summarizes the overall count and percentage of all participants, serving as a valuable point of reference.
Step 5: Combine Data Frames with Total Rows
The “Total” row is seamlessly integrated into the previously created
data frame using the rbind function. This consolidation
consolidates the key insights into a single, cohesive table.
Step 6: Print the Resulting Table
To make these insights easily accessible, we print the final table, featuring variable groups (e.g., age, gender), their frequencies, and respective percentages to the console.
Step 7: Creating a Bar Plot
In addition to tabular representations, we employ the versatile
ggplot2 library to generate informative bar plots. These
visualizations vividly illustrate the distribution of variable groups,
with each bar representing a group’s frequency. Labels are thoughtfully
incorporated to display both the count and percentage for each
group.
Step 8: Save Visualization as PNG
The resulting visualizations are saved as PNG image files for future reference, each aptly named for the corresponding variable being analyzed (e.g., “age.png,” “gender.png”). These image files are ideally suited for inclusion in dissertation reports or presentations.
# Calculate the frequency table
freq_tableAge <- table(data$Age)
# Convert the frequency table to a data frame
df_freq <- data.frame(Age = as.character(names(freq_tableAge)),
Frequency = as.numeric(freq_tableAge))
# Calculate the percentage for each age group
total_people <- nrow(data)
df_freq$Percentage <- round((df_freq$Frequency / total_people) * 100)
# Calculate the total row
total_row <- data.frame(Age = "Total",
Frequency = sum(df_freq$Frequency),
Percentage = 100)
# Combine the data frame with the total row
df_freq <- rbind(df_freq, total_row)
# Print the resulting table
print(df_freq)## Age Frequency Percentage
## 1 18-20 3 4
## 2 21-25 60 81
## 3 26-30 9 12
## 4 Above 30 2 3
## 5 Total 74 100
#Saving to csv file
write.csv(df_freq, file = "age.csv")
# Graphical
counts_age <- data %>%
group_by(Age) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
mutate(percentage = round(count / sum(count) * 100))
plotage <- ggplot(counts_age, aes(x = Age, y = count)) +
geom_bar(stat = "identity", fill = "steelblue") +
geom_text(aes(label = paste0(count, " (", percentage, "%)")), vjust = -0.3, color = "black", fontface="bold") +
labs(x = "Age", y = "Frequency") +
theme_classic() +
theme(plot.title = element_text(size = 14, face = "bold"),
axis.title.x = element_text(face = "bold"))
# Save the plot as an image file (e.g., PNG format)
ggsave(filename = "age.png", plot = plotage, width = 8, height = 4, units = "in")# Calculate the frequency table
freq_tableSex <- table(data$Gender)
# Convert the frequency table to a data frame
df_freqSex <- data.frame(Sex = as.character(names(freq_tableSex)),
Frequency = as.numeric(freq_tableSex))
# Calculate the percentage for each age group
total_people <- nrow(data)
df_freqSex$Percentage <- round((df_freqSex$Frequency / total_people) * 100)
# Calculate the total row
total_row <- data.frame(Sex = "Total",
Frequency = sum(df_freqSex$Frequency),
Percentage = 100)
# Combine the data frame with the total row
df_freqSex <- rbind(df_freqSex, total_row)
# Print the resulting table
print(df_freqSex)## Sex Frequency Percentage
## 1 Female 18 24
## 2 Male 56 76
## 3 Total 74 100
#Saving
write.csv(df_freqSex, file = "gender.csv")
# Graphical
counts_gender <- data %>%
group_by(Gender) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
mutate(percentage = round(count / sum(count) * 100))
plotsex <- ggplot(counts_gender, aes(x = Gender, y = count)) +
geom_bar(stat = "identity", fill = "steelblue") +
geom_text(aes(label = paste0(count, " (", percentage, "%)")), vjust = -0.3, color = "black", fontface="bold") +
labs(x = "Sex", y = "Frequency") +
theme_classic() +
theme(plot.title = element_text(size = 14, face = "bold"),
axis.title.x = element_text(face = "bold"))
# Save the plot as an image file (e.g., PNG format)
ggsave(filename = "gender.png", plot = plotsex, width = 8, height = 4, units = "in")# Calculate the frequency table
freq_tableYearOfStudy <- table(data$YearofStudy)
# Convert the frequency table to a data frame
df_freqYearOfStudy <- data.frame(Study_Year = as.character(names(freq_tableYearOfStudy)),
Frequency = as.numeric(freq_tableYearOfStudy))
# Calculate the percentage for each age group
total_people <- nrow(data)
df_freqYearOfStudy$Percentage <- round((df_freqYearOfStudy$Frequency / total_people) * 100)
# Calculate the total row
total_row <- data.frame(Study_Year = "Total",
Frequency = sum(df_freqYearOfStudy$Frequency),
Percentage = 100)
# Combine the data frame with the total row
df_freqYearOfStudy <- rbind(df_freqYearOfStudy, total_row)
# Print the resulting table
print(df_freqYearOfStudy)## Study_Year Frequency Percentage
## 1 First 25 34
## 2 Second 24 32
## 3 Third 25 34
## 4 Total 74 100
#Saving
write.csv(df_freqYearOfStudy, file = "yearofstudy.csv")
# Graphical
counts_yrofstudy <- data %>%
group_by(YearofStudy) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
mutate(percentage = round(count / sum(count) * 100))
plotyearofstudy <- ggplot(counts_yrofstudy, aes(x = YearofStudy, y = count)) +
geom_bar(stat = "identity", fill = "steelblue") +
geom_text(aes(label = paste0(count, " (", percentage, "%)")), vjust = -0.3, color = "black", fontface="bold") +
labs(x = "Year Of Study", y = "Frequency") +
theme_classic() +
theme(plot.title = element_text(size = 14, face = "bold"),
axis.title.x = element_text(face = "bold"))
# Save the plot as an image file (e.g., PNG format)
ggsave(filename = "yearofstudy.png", plot = plotyearofstudy, width = 8, height = 4, units = "in")Before proceeding with the standard analysis steps described earlier, a specific transformation is applied to the ‘Background’ variable. This transformation involves the use of conditional logic to modify certain values. Precisely, if the ‘Background’ value corresponds to “Fresher (I entered directly from a lower level of education),” it is replaced with “Fresher.” In a similar manner, if the value represents “In-service (I have previous teaching experience),” it is converted to “In-service.” All other values within the ‘Background’ variable remain unaltered, ensuring the preservation of the original data integrity. This recoding process is implemented to enhance the clarity and consistency of the ‘Background’ variable, thereby facilitating subsequent analysis.
data$Background <- ifelse(data$Background == "Fresher (I entered directly from a lower level of education).","Fresher",
ifelse(data$Background == "In-service(I have previous teaching experience).", "In-service",
data$Background))
# Calculate the frequency table
freq_tableBackground <- table(data$Background)
# Convert the frequency table to a data frame
df_freqBackground <- data.frame(Background = as.character(names(freq_tableBackground)),
Frequency = as.numeric(freq_tableBackground))
# Calculate the percentage for each age group
total_people <- nrow(data)
df_freqBackground$Percentage <- round((df_freqBackground$Frequency / total_people) * 100)
# Calculate the total row
total_row <- data.frame(Background = "Total",
Frequency = sum(df_freqBackground$Frequency),
Percentage = 100)
# Combine the data frame with the total row
df_freqBackground <- rbind(df_freqBackground, total_row)
# Print the resulting table
print(df_freqBackground)## Background Frequency Percentage
## 1 Fresher 72 97
## 2 In-service 2 3
## 3 Total 74 100
#Saving
write.csv(df_freqBackground, file = "background.csv")
# Graphical
counts_background <- data %>%
group_by(Background) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
mutate(percentage = round(count / sum(count) * 100))
plotbackground <- ggplot(counts_background, aes(x = Background, y = count)) +
geom_bar(stat = "identity", fill = "steelblue") +
geom_text(aes(label = paste0(count, " (", percentage, "%)")), vjust = -0.3, color = "black", fontface="bold") +
labs(x = "Background", y = "Frequency") +
theme_classic() +
theme(plot.title = element_text(size = 14, face = "bold"),
axis.title.x = element_text(face = "bold"))
# Save the plot as an image file (e.g., PNG format)
ggsave(filename = "background.png", plot = plotbackground, width = 8, height = 4, units = "in")# Calculate the frequency table
freq_tableWE <- table(data$WorkExperience)
# Convert the frequency table to a data frame
df_freqWE <- data.frame(Work_Experience = as.character(names(freq_tableWE)),
Frequency = as.numeric(freq_tableWE))
# Calculate the percentage for each age group
total_people <- data %>% filter(!is.na(WorkExperience)) %>% nrow()
# Calculate the total row
total_row <- data.frame(Work_Experience = "Total",
Frequency = sum(df_freqWE$Frequency))
# Combine the data frame with the total row
df_freqWE <- rbind(df_freqWE, total_row)
# Print the resulting table
print(df_freqWE)## Work_Experience Frequency
## 1 1 1
## 2 3 1
## 3 Total 2
#Saving
write.csv(df_freqWE, file = "workingExperience.csv")Prior to the standard analysis procedures outlined earlier, a specific transformation is applied to the ‘Program’ variable. This transformation utilizes conditional logic to modify certain values within the variable. Specifically, if the ‘Program’ value corresponds to “Bachelor of Science with Education,” it is replaced with the abbreviation “B.Sc. Ed.” Similarly, if the value represents “Bachelor of Arts with Education,” it is transformed into “B.A. Ed.” In cases where the value is “Bachelor of Education (Science),” it is recoded as “B.Ed. Science,” and if it is “Bachelor of Education (Arts),” it is converted to “B.Ed. Arts.” All other values within the ‘Program’ variable remain unaltered, preserving the original data integrity. This recoding process enhances the clarity and consistency of the ‘Program’ variable for subsequent analysis.
data$Program <- ifelse(data$Program == "Bachelor of Science with Education","B.Sc. Ed.",
ifelse(data$Program == "Bachelor of Arts with Education", "B.A. Ed.",
ifelse(data$Program=="Bachelor of Education (Science)", "B.Ed. Science",
ifelse(data$Program=="Bachelor of Education(Arts)", "B.Ed. Arts",
data$Background))))
# Calculate the frequency table
freq_tableProgram <- table(data$Program)
# Convert the frequency table to a data frame
df_freqProgram <- data.frame(Program = as.character(names(freq_tableProgram)),
Frequency = as.numeric(freq_tableProgram))
# Calculate the percentage for each age group
total_people <- nrow(data)
df_freqProgram$Percentage <- round((df_freqProgram$Frequency / total_people) * 100)
# Calculate the total row
total_row <- data.frame(Program = "Total",
Frequency = sum(df_freqProgram$Frequency),
Percentage = 100)
# Combine the data frame with the total row
df_freqProgram <- rbind(df_freqProgram, total_row)
# Print the resulting table
print(df_freqProgram)## Program Frequency Percentage
## 1 B.A. Ed. 8 11
## 2 B.Ed. Arts 5 7
## 3 B.Ed. Science 4 5
## 4 B.Sc. Ed. 57 77
## 5 Total 74 100
#Saving
write.csv(df_freqProgram, file = "program.csv")
# Graphical
counts_program <- data %>%
group_by(Program) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
mutate(percentage = round(count / sum(count) * 100))
plotprogram <- ggplot(counts_program, aes(x = Program, y = count)) +
geom_bar(stat = "identity", fill = "steelblue") +
geom_text(aes(label = paste0(count, " (", percentage, "%)")), vjust = -0.3, color = "black", fontface="bold") +
labs(x = "Program", y = "Frequency") +
theme_classic() +
theme(plot.title = element_text(size = 14, face = "bold"),
axis.title.x = element_text(face = "bold"))
# Save the plot as an image file (e.g., PNG format)
ggsave(filename = "program.png", plot = plotprogram, width = 8, height = 4, units = "in")Before proceeding with the standard analysis steps as previously detailed, a specialized transformation is performed on the ‘SubjectWithSciences’ variable. Initially, rows containing missing values (NAs) are filtered out from the dataset to ensure data completeness. Additionally, to improve graph readability, the term “Information Science” within the ‘SubjectWithSciences’ variable is substituted with its acronym “IS.” This transformation simplifies the representation of subject combinations within the ‘SubjectWithSciences’ variable, facilitating a clearer and more informative graphical presentation of the data.
# Filter out rows with NA values in 'SubjectWithSciences'
data_bsc <- data %>% filter(!is.na(SubjectWithSciences))
# Replace "Information Science" with "IS" to enhance graph readability
data_bsc$SubjectWithSciences <- gsub("Information Science", "IS", data_bsc$SubjectWithSciences)
# Calculate the frequency table
freq_tableScience <- table(data_bsc$SubjectWithSciences)
# Convert the frequency table to a data frame
df_freqScience <- data.frame(B.Sc.Ed = as.character(names(freq_tableScience)),
Frequency = as.numeric(freq_tableScience))
# Calculate the percentage for each age group
total_people <- nrow(data_bsc)
df_freqScience$Percentage <- round((df_freqScience$Frequency / total_people) * 100)
# Calculate the total row
total_row <- data.frame(B.Sc.Ed = "Total",
Frequency = sum(df_freqScience$Frequency),
Percentage = 100)
# Combine the data frame with the total row
df_freqScience <- rbind(df_freqScience, total_row)
# Print the resulting table
print(df_freqScience)## B.Sc.Ed Frequency Percentage
## 1 Biology;Chemistry 32 56
## 2 Chemistry;Geography 3 5
## 3 Chemistry;Physics 5 9
## 4 Mathematics;Economics 1 2
## 5 Mathematics;IS 12 21
## 6 Mathematics;Physics 4 7
## 7 Total 57 100
#Saving
write.csv(df_freqScience, file = "subjectcombinationScience.csv")
# Graphical
counts_bsc <- data_bsc %>%
group_by(SubjectWithSciences) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
mutate(percentage = round(count / sum(count) * 100))
plotteachingsc <- ggplot(counts_bsc, aes(x = SubjectWithSciences, y = count)) +
geom_bar(stat = "identity", fill = "steelblue") +
geom_text(aes(label = paste0(count, " (", percentage, "%)")), vjust = -0.3, color = "black", fontface="bold") +
labs(x = "Teaching Subjects", y = "Frequency") +
theme_classic() +
theme(plot.title = element_text(size = 14, face = "bold"),
axis.title.x = element_text(face = "bold"))
# Save the plot as an image file (e.g., PNG format)
ggsave(filename = "teachingsciences.png", plot = plotteachingsc, width = 8, height = 4, units = "in")Also here a preliminary step is taken to filter out rows with missing values (NAs) within the ‘SubjectWithArts’ variable. This ensures that the analysis is based on complete data, enhancing its reliability.
Furthermore, to improve the visual clarity of the subsequent graphical representation, certain subject names within the ‘SubjectWithArts’ variable are replaced with their respective acronyms. For example, “Geography” becomes “GE,” “History” becomes “HI,” “Linguistics” becomes “LL,” “Kiswahili” becomes “KI,” “French” becomes “FR,” and “Literature” becomes “LT.” This transformation simplifies the presentation of subject combinations, making the resulting plot more reader-friendly and interpretable.
# Filter out rows with NA values in 'SubjectWithArts'
data_arts <- data %>% filter(!is.na(SubjectWithArts))
# Calculate the frequency table
freq_tableArts <- table(data_arts$SubjectWithArts)
# Convert the frequency table to a data frame
df_freqArts <- data.frame(B.A.Ed = as.character(names(freq_tableArts)),
Frequency = as.numeric(freq_tableArts))
# Calculate the percentage for each age group
total_people <- nrow(data_arts)
df_freqArts$Percentage <- round((df_freqArts$Frequency / total_people) * 100)
# Calculate the total row
total_row <- data.frame(B.A.Ed = "Total",
Frequency = sum(df_freqArts$Frequency),
Percentage = 100)
# Combine the data frame with the total row
df_freqArts <- rbind(df_freqArts, total_row)
# Print the resulting table
print(df_freqArts)## B.A.Ed Frequency Percentage
## 1 Geography;History 1 12
## 2 Geography;Kiswahili 1 12
## 3 Geography;Literature 1 12
## 4 Kiswahili;French 1 12
## 5 Kiswahili;History 1 12
## 6 Linguistics;French 1 12
## 7 Linguistics;History 1 12
## 8 Linguistics;Literature 1 12
## 9 Total 8 100
#Saving
write.csv(df_freqArts, file = "subjectcombinationArts.csv")
# Graphical
# Replace Subjects with Acronym to enhance graph readability
data_arts$SubjectWithArts <- gsub("Geography", "GE", data_arts$SubjectWithArts)
data_arts$SubjectWithArts <- gsub("History", "HI", data_arts$SubjectWithArts)
data_arts$SubjectWithArts <- gsub("Linguistics", "LL", data_arts$SubjectWithArts)
data_arts$SubjectWithArts <- gsub("Kiswahili", "KI", data_arts$SubjectWithArts)
data_arts$SubjectWithArts <- gsub("French", "FR", data_arts$SubjectWithArts)
data_arts$SubjectWithArts <- gsub("Literature", "LT", data_arts$SubjectWithArts)
counts_arts <- data_arts %>%
group_by(SubjectWithArts) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
mutate(percentage = round(count / sum(count) * 100))
plotteachingARTS <- ggplot(counts_arts, aes(x = SubjectWithArts, y = count)) +
geom_bar(stat = "identity", fill = "steelblue") +
geom_text(aes(label = paste0(count, " (", percentage, "%)")), vjust = -0.3, color = "black", fontface="bold") +
labs(x = "Teaching Subjects", y = "Frequency") +
theme_classic() +
theme(plot.title = element_text(size = 14, face = "bold"),
axis.title.x = element_text(face = "bold"))
# Save the plot as an image file (e.g., PNG format)
ggsave(filename = "teachingARTS.png", plot = plotteachingARTS, width = 8, height = 4, units = "in")# Filter out rows with NA values in 'SubjectBed'
data_bed <- data %>% filter(!is.na(SubjectBed))
# Calculate the frequency table
freq_tableBed <- table(data_bed$SubjectBed)
# Convert the frequency table to a data frame
df_freqBed <- data.frame(BED = as.character(names(freq_tableBed)),
Frequency = as.numeric(freq_tableBed))
# Calculate the percentage for each age group
total_people <- nrow(data_bed)
df_freqBed$Percentage <- round((df_freqBed$Frequency / total_people) * 100)
# Calculate the total row
total_row <- data.frame(BED = "Total",
Frequency = sum(df_freqBed$Frequency),
Percentage = 100)
# Combine the data frame with the total row
df_freqBed <- rbind(df_freqBed, total_row)
# Print the resulting table
print(df_freqBed)## BED Frequency Percentage
## 1 Chemistry 1 11
## 2 Geography 1 11
## 3 History 1 11
## 4 Kiswahili 1 11
## 5 Linguistics 1 11
## 6 Literature 1 11
## 7 Physics 3 33
## 8 Total 9 100
#Saving
write.csv(df_freqBed, file = "subjectcombinationBed.csv")
# Graphical
counts_bed <- data_bed %>%
group_by(SubjectBed) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
mutate(percentage = round(count / sum(count) * 100))
plotteachingBED <- ggplot(counts_bed, aes(x = SubjectBed, y = count)) +
geom_bar(stat = "identity", fill = "steelblue") +
geom_text(aes(label = paste0(count, " (", percentage, "%)")), vjust = -0.3, color = "black", fontface="bold") +
labs(x = "Teaching Subject", y = "Frequency") +
theme_classic() +
theme(plot.title = element_text(size = 14, face = "bold"),
axis.title.x = element_text(face = "bold"))
# Save the plot as an image file (e.g., PNG format)
ggsave(filename = "teachingBED.png", plot = plotteachingBED, width = 8, height = 4, units = "in")
5.8.2 SocialSciences