#Install and load necessary libraries
#LOad library
library(ggplot2) ##for data visualization
library(tidyverse) # For data manipulation
## āā Attaching core tidyverse packages āāāāāāāāāāāāāāāāāāāāāāāā tidyverse 2.0.0 āā
## ā dplyr 1.1.4 ā readr 2.1.5
## ā forcats 1.0.0 ā stringr 1.5.1
## ā lubridate 1.9.4 ā tibble 3.2.1
## ā purrr 1.0.4 ā tidyr 1.3.1
## āā Conflicts āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā tidyverse_conflicts() āā
## ā dplyr::filter() masks stats::filter()
## ā dplyr::lag() masks stats::lag()
## ā¹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(psych) #For description of data
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(gganimate) #adds dynamic animations for your visualization
library(ggdist) #Enhances visual representation of distribution
#library(ggstatsplot) #Integrates statistical tests directory
library(patchwork)
#Set a working directory (setting a working directory connects the script, inputs and output)
setwd("~/R TRAINING")
#Import the data set
gss=read.csv("GSSsubset.csv")
#head(gss)
#View(gss)
#āāāāāāāāāāāāāāāāāāāāāāāāāā- #Exploratory data analaysis(EDA) #āāāāāāāāāāāāāāāāāāāāāāāāā #Before we start visualizating our data, we need to understand the caharacteristics of our data #Here are some functions
#names(gss) #for features ?Variable names in the data set
#dim(gss) #For dimensions of dataset
#str(gss) #structure of the dataset
#summary(gss) #summary of the descriptivr statistics
#head(gss) #for the first few rows in the data set
#describe(gss) #for descriptive statistics
#āāāāāāāāāāāāāāāāāāāāāāāāā #Data visualization with ggplot2 #āāāāāāāāāāāāāāāāāāāāāāāā
#THE DATA
#ggplot(data=gss)
# The aesthetics
#ggplot(data=gss, aes(x=age, y=income))
#The geometrics
#ggplot(data=gss, aes(x=age, y=income))+ geom_point()
#change the color of point to my choice
ggplot(data=gss, aes(x=age, y=income))+ geom_point(color="maroon")
#color point in the plot by sex
#ggplot(data=gss, aes(x=age, y=income, color=degree))+ geom_point() #color by degree
#ggplot(data=gss, aes(x=age, y=income, color=sex))+ geom_point() #color by sex
#set the theme at the end of the plot
#ggplot(data=gss, aes(x=age, y=income, color=sex))+ geom_point() +theme_light()
#Labels
ggplot(data=gss, aes(x=age, y=income, color=sex))+
geom_point() +labs(title = "Gender Distribution",
x="Gender",
y="Number of respondents",
caption = "Chep, 2025")+
theme_linedraw() +theme(plot.title = element_text(hjust = 0.5))
#Facets( make use of the operator~)
ggplot(data=gss, aes(x=age, y=income, color=sex))+
geom_point()+ facet_wrap(~sex)+labs(title = "Gender Distribution",
x="Gender",
y="Number of respondents",
caption = "Chep, 2025")+
theme_linedraw() +theme(plot.title = element_text(hjust = 0.5))
#Bar plot -display of categorical data #example: bar plot of gender counts
ggplot(data=gss, aes(x=sex,))+geom_bar(fill="maroon")+labs(title = "Gender Distribution",
x="Gender",
y="Number of respondents",
caption = "Chep, 2025")
#histogram of income
ggplot(gss, aes(x=income))+geom_histogram(binwidth = 10000,fill="orange",
color="black")+
labs(title="Income distribution", x="income", y="frequency")+theme_light()
#āāāāāāāāāāāāāāāāāāāāāāāāāāā-
ggplot(gss, aes(x=income))+geom_histogram(bins = 20,fill="orange",
color="black")+
labs(title="Income distribution", x="income", y="frequency")+theme_light()
#BOXPLOT summarize the distribution of a continous variable
ggplot(data=gss, aes(x=age, y=income, color=sex))+
geom_boxplot(fill="purple") +labs(title = "Income by gender",
x="Gender",
y="Income",
caption = "Chep, 2025")
#combining the plots
library(patchwork)
p1=ggplot(data=gss, aes(x=age, y=income, color=sex))+
geom_point() +labs(title = "Gender Distribution",
x="Gender",
y="Number of respondents",
caption = "Chep, 2025")+
theme_linedraw() +theme(plot.title = element_text(hjust = 0.5))
p2= ggplot(gss, aes(x=income))+geom_histogram(binwidth = 10000,fill="orange",
color="black")+
labs(title="Income distribution", x="income", y="frequency")+theme_light()
p3=ggplot(data=gss, aes(x=age, y=income, color=sex))+
geom_boxplot(fill="purple") +labs(title = "Income by gender",
x="Gender",
y="Income",
caption = "Chep, 2025")
combine=(p1|p2|p3)+plot_annotation(title = "Combined plots example")
print(combine)
#Exporting plots
#plot=ggsave("plot.png", width = 10,height=6,dpi=300)
#Example: scatter plot customizations
#Load data set
library(ggplot2)
SMdata= read.csv("SMdata2.csv")
#Explatory Data Analysis
#names(SMdata)
#str(SMdata)
#head(SMdata)
#Data VISualization with ggplot2
#The data
#ggplot(data = SMdata)
#The aesthetics
#ggplot(data = SMdata, aes(x=Total, y=Positive))
#The geometrics
ggplot(data = SMdata, aes(x=Total, y=Positive))+
geom_point(color="maroon")
# color point in the plot by region
ggplot(data = SMdata, aes(x=Total, y=Positive, color=Region))+
geom_point()
#Use geom_ histogram
ggplot(data= SMdata, aes(x=Incidence_Rate))+ geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Change color of the fill and color of the boyundaries
ggplot(data= SMdata, aes(x=Incidence_Rate))+ geom_histogram(color="green", fill="maroon", bins=10)
#use geom _bar
ggplot(data=SMdata, aes(x=Month))+ geom_bar(color="green", fill="purple")
#boxplot
ggplot(data=SMdata, aes(x=Intervention_Type, y=Total,fill=Region))+
geom_boxplot()+
labs(x="Intervention type",
y="Total Cases",
title="Number of malaria case by Intervention type",
caption="Source:Chep,2025")+
theme_light()+theme(legend.position = "top")+
theme(plot.title = element_text(hjust=0.5))
#facet
ggplot(data=SMdata, aes(x=Intervention_Type, y=Total,fill=Region))+
geom_boxplot()+
facet_wrap(~Year)+
labs(x="Intervention type",
y="Total Cases",
title="Number of malaria case by Intervention type",
caption="Source:Chep,2025")+
theme_light()+theme(legend.position = "top")+
theme(plot.title = element_text(hjust=0.5)) #align to the center
#plot_2=ggsave("plot_2.png", dpi=350)
#gradient of color changing depending on the intensity
ggplot(data=SMdata, aes(x=Total, y=Positive,color= Incidence_Rate))+
geom_point()+
facet_wrap(~Intervention_Type)+
scale_color_gradient(low="blue", high = "red")+
labs(x="Total",
y="Positive Cases",
title="Number of malaria case by Total case",
caption="Source:Chep,2025")+theme_bw()
#Violin Plots are similar box plot except that only show kennel
probabili
ggplot(data=SMdata, aes(x=Age, y=Positive))+geom_violin(fill="maroon")
library(gganimate)
library(dplyr)
plot3= ggplot(data=SMdata, aes(x=Incidence_Rate, fill = Region))+
geom_histogram(color="maroon")+
theme_classic()+labs(caption= "Source:Chep,2025")
theme(legend.position = "bottom")
## List of 1
## $ legend.position: chr "bottom"
## - attr(*, "class")= chr [1:2] "theme" "gg"
## - attr(*, "complete")= logi FALSE
## - attr(*, "validate")= logi TRUE
print(plot3)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#add animate
#animate_plot=plot3+transition_time(Year)+ #animate over time variables
ease_aes("linear") #Smooth linear interactions
## <ggproto object: Class EaseAes, gg>
## aes_names:
## aesthetics:
## default: linear
## get_ease: function
## super: <ggproto object: Class EaseAes, gg>
#save or display
#anim_save("Plot_3.gif", animation=animate_plot) #save as GIf