#Install and load necessary libraries

#LOad library

library(ggplot2)  ##for data visualization
library(tidyverse) # For data manipulation
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## āœ” dplyr     1.1.4     āœ” readr     2.1.5
## āœ” forcats   1.0.0     āœ” stringr   1.5.1
## āœ” lubridate 1.9.4     āœ” tibble    3.2.1
## āœ” purrr     1.0.4     āœ” tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## āœ– dplyr::filter() masks stats::filter()
## āœ– dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(psych)   #For description of data 
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(gganimate) #adds dynamic animations for your visualization

library(ggdist) #Enhances visual representation of distribution
#library(ggstatsplot) #Integrates statistical tests directory
library(patchwork)

#Set a working directory (setting a working directory connects the script, inputs and output)

setwd("~/R TRAINING")

#Import the data set

gss=read.csv("GSSsubset.csv")
#head(gss)
#View(gss)

#——————————————————————————- #Exploratory data analaysis(EDA) #————————————————————————— #Before we start visualizating our data, we need to understand the caharacteristics of our data #Here are some functions

#names(gss)     #for features ?Variable names in the data set
#dim(gss)       #For dimensions of dataset
#str(gss)     #structure of the dataset
#summary(gss)  #summary of the descriptivr statistics
#head(gss)     #for the first few rows in the data set
#describe(gss)   #for descriptive statistics

#————————————————————————— #Data visualization with ggplot2 #———————————————————————–

#THE DATA
#ggplot(data=gss)

# The aesthetics
#ggplot(data=gss, aes(x=age, y=income))

#The geometrics
#ggplot(data=gss, aes(x=age, y=income))+ geom_point()

#change the color of point to my choice
ggplot(data=gss, aes(x=age, y=income))+ geom_point(color="maroon")

#color point in the plot by sex
#ggplot(data=gss, aes(x=age, y=income, color=degree))+ geom_point() #color by degree

#ggplot(data=gss, aes(x=age, y=income, color=sex))+ geom_point()   #color by sex

#set the theme at the end of the plot
#ggplot(data=gss, aes(x=age, y=income, color=sex))+ geom_point() +theme_light()

#Labels 
ggplot(data=gss, aes(x=age, y=income, color=sex))+ 
  geom_point() +labs(title = "Gender Distribution",
                     x="Gender", 
                     y="Number of respondents",
                     caption = "Chep, 2025")+
  theme_linedraw() +theme(plot.title = element_text(hjust = 0.5))

#Facets( make use of the operator~)

ggplot(data=gss, aes(x=age, y=income, color=sex))+ 
  geom_point()+ facet_wrap(~sex)+labs(title = "Gender Distribution",
                     x="Gender", 
                     y="Number of respondents",
                     caption = "Chep, 2025")+
  theme_linedraw() +theme(plot.title = element_text(hjust = 0.5)) 

#Bar plot -display of categorical data #example: bar plot of gender counts

ggplot(data=gss, aes(x=sex,))+geom_bar(fill="maroon")+labs(title = "Gender Distribution",
                                              x="Gender", 
                                              y="Number of respondents",
                                              caption = "Chep, 2025")

#histogram of income

ggplot(gss, aes(x=income))+geom_histogram(binwidth = 10000,fill="orange",
                                          color="black")+
  labs(title="Income distribution", x="income", y="frequency")+theme_light()

#———————————————————————————-

ggplot(gss, aes(x=income))+geom_histogram(bins = 20,fill="orange",
                                          color="black")+
  labs(title="Income distribution", x="income", y="frequency")+theme_light()

#BOXPLOT summarize the distribution of a continous variable
ggplot(data=gss, aes(x=age, y=income, color=sex))+ 
  geom_boxplot(fill="purple") +labs(title = "Income by gender",
                     x="Gender", 
                     y="Income",
                     caption = "Chep, 2025")

#combining the plots

library(patchwork)
p1=ggplot(data=gss, aes(x=age, y=income, color=sex))+ 
  geom_point() +labs(title = "Gender Distribution",
                     x="Gender", 
                     y="Number of respondents",
                     caption = "Chep, 2025")+
  theme_linedraw() +theme(plot.title = element_text(hjust = 0.5)) 

p2= ggplot(gss, aes(x=income))+geom_histogram(binwidth = 10000,fill="orange",
                                          color="black")+
  labs(title="Income distribution", x="income", y="frequency")+theme_light()

p3=ggplot(data=gss, aes(x=age, y=income, color=sex))+ 
  geom_boxplot(fill="purple") +labs(title = "Income by gender",
                                    x="Gender", 
                                    y="Income",
                                    caption = "Chep, 2025")
combine=(p1|p2|p3)+plot_annotation(title = "Combined plots example")
print(combine)

#Exporting plots

#plot=ggsave("plot.png", width = 10,height=6,dpi=300)

#Example: scatter plot customizations

#Load data set
library(ggplot2)

SMdata= read.csv("SMdata2.csv")
#Explatory Data Analysis

#names(SMdata)
#str(SMdata)
#head(SMdata)

#Data VISualization with ggplot2

#The data
#ggplot(data = SMdata)

#The aesthetics
#ggplot(data = SMdata, aes(x=Total, y=Positive))

#The geometrics
ggplot(data = SMdata, aes(x=Total, y=Positive))+
  geom_point(color="maroon")

# color point in the plot by region
ggplot(data = SMdata, aes(x=Total, y=Positive, color=Region))+
  geom_point()

#Use geom_ histogram
ggplot(data= SMdata, aes(x=Incidence_Rate))+ geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Change color of the fill and color of the boyundaries
ggplot(data= SMdata, aes(x=Incidence_Rate))+ geom_histogram(color="green", fill="maroon", bins=10)

#use geom _bar
ggplot(data=SMdata, aes(x=Month))+ geom_bar(color="green", fill="purple")

#boxplot

ggplot(data=SMdata, aes(x=Intervention_Type, y=Total,fill=Region))+
  geom_boxplot()+
  labs(x="Intervention type",
       y="Total Cases",
         title="Number of malaria case by Intervention type",
       caption="Source:Chep,2025")+
  theme_light()+theme(legend.position = "top")+
  theme(plot.title = element_text(hjust=0.5))

#facet

ggplot(data=SMdata, aes(x=Intervention_Type, y=Total,fill=Region))+
  geom_boxplot()+
  facet_wrap(~Year)+
  labs(x="Intervention type",
       y="Total Cases",
       title="Number of malaria case by Intervention type",
       caption="Source:Chep,2025")+
  theme_light()+theme(legend.position = "top")+
  theme(plot.title = element_text(hjust=0.5))   #align to the center

#plot_2=ggsave("plot_2.png", dpi=350)

#gradient of color changing depending on the intensity

ggplot(data=SMdata, aes(x=Total, y=Positive,color= Incidence_Rate))+
  geom_point()+
  facet_wrap(~Intervention_Type)+
  scale_color_gradient(low="blue", high = "red")+
  labs(x="Total",
       y="Positive Cases",
       title="Number of malaria case by Total case",
       caption="Source:Chep,2025")+theme_bw()

#Violin Plots are similar box plot except that only show kennel probabili

ggplot(data=SMdata, aes(x=Age, y=Positive))+geom_violin(fill="maroon")

library(gganimate)
library(dplyr)

plot3= ggplot(data=SMdata, aes(x=Incidence_Rate, fill = Region))+
  geom_histogram(color="maroon")+
  theme_classic()+labs(caption= "Source:Chep,2025")
                   theme(legend.position = "bottom")
## List of 1
##  $ legend.position: chr "bottom"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE
print(plot3)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#add animate
#animate_plot=plot3+transition_time(Year)+  #animate over time variables
  ease_aes("linear")    #Smooth linear interactions
## <ggproto object: Class EaseAes, gg>
##     aes_names: 
##     aesthetics: 
##     default: linear
##     get_ease: function
##     super:  <ggproto object: Class EaseAes, gg>
#save or display
#anim_save("Plot_3.gif", animation=animate_plot)  #save as GIf