##1.Install and load ggplot2
library(ggplot2)
##set a working directory
setwd("~/R TRAINING")
##import the dataset
gss<-read.csv("GSSsubset.csv")
##the data
#ggplot(data=gss)
##labels** labels can be added to various components of a plot using the
ggplot(data=gss, aes(x=age, y=income,colour = sex))+
geom_point()+
labs(title="Gender distribution",
x="Gender",
y="Number of respondents",
caption ="Mambo 2025")+
theme_classic()
#Moving the scatter plot title to the center
ggplot(data=gss, aes(x=age, y=income,colour = sex))+
geom_point()+
labs(title="Gender distribution",
x="Gender",
y="Number of respondents",
caption ="Mambo 2025")+
theme_classic()+
theme(plot.title = element_text(hjust=0.5))
#Facets **using the name of the ~ operator
ggplot(data=gss, aes(x=age, y=income,colour = sex))+
geom_point()+
labs(title="Gender distribution",
x="Gender",
y="Number of respondents",
caption ="Mambo 2025")+
theme_classic()+
theme(plot.title = element_text(hjust=0.5))+
facet_wrap(~sex)
#Bar plots *** display the count or summary of the categorical data
ggplot(data=gss,aes(x=sex))+
geom_bar(fill="green")+
labs(title="Gender distribution",
x="Gender",
y="No. of respodents")+
theme_classic()
#Histogram *** show the distribution of a single variable #Example: Histogram of income
ggplot(data=gss,aes(x=income))+
geom_histogram(binwidth=10000,fill="green",colour="black")+
labs(title="Gender distribution",
x="Gender",
y="No. of respodents")+
theme_light()
#Reducing the number of bars
ggplot(data=gss,aes(x=income))+
geom_histogram(bins=20,fill="green",colour="black")+
labs(title="Gender distribution",
x="Gender",
y="No. of respodents")+
theme_light()
#Boxplot #Boxplot of income by gender
ggplot(data=gss,aes(x=sex, y=income))+
geom_boxplot(fill="green")+
labs(title="Gender distribution",
x="Gender",
y="No. of respodents")+
theme_classic()
##Combine the plots (story telling using the data)
library(patchwork)
p1=ggplot(data=gss, aes(x=age, y=income,colour = sex))+
geom_point()+
labs(title="Gender distribution",
x="Gender",
y="Number of respondents",
caption ="Mambo 2025")+
theme_classic()+
theme(plot.title = element_text(hjust=0.5))
p2= ggplot(data=gss,aes(x=sex))+
geom_bar(fill="green")+
labs(title="Gender distribution",
x="Gender",
y="No. of respodents")+
theme_classic()
p3=ggplot(data=gss,aes(x=sex, y=income))+
geom_boxplot(fill="green")+
labs(title="Gender distribution",
x="Gender",
y="No. of respodents")+
theme_classic()
combine<-(p1|p2|p3)+
plot_annotation(title="Combined plots Example")
print(combine)
#Exporting plots
plot<-ggsave("Plot.png",width=10,height=6,dpi = 300)
#SESSION 4 LESSON 2 #Install and Load the following packages
library("tm")
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
library("wordcloud")
## Loading required package: RColorBrewer
library("RColorBrewer")
library(ggplot2)
setwd("~/R TRAINING")
SMdata1<-read.csv("SMdata1.csv")
names(SMdata1)
## [1] "Region" "Year" "Month"
## [4] "Age" "Intervention_Type" "Total"
## [7] "Positive" "Incidence_Rate" "Latitude"
## [10] "Longitude" "period"
#Data visualaization with ggplot2
ggplot(data=SMdata1,aes(x=Total,y=Positive, color=Region))+
geom_point()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
ggplot(data=SMdata1,aes(x=Incidence_Rate))+
geom_histogram(fill="red")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).
#Putting a boundary to the bars
ggplot(data=SMdata1,aes(x=Incidence_Rate))+
geom_histogram(fill="red",color="green",bins = 10)
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).
##box plot
ggplot(data=SMdata1, aes(x= Intervention_Type,y=Total,fill=Region))+
geom_boxplot()+
facet_wrap(~Year)+
labs(x="intervention type",
caption = "Source:Mambo J.K.,2025",
y="Total cases",
title = "number of malaria case by intervention type")+
theme_light()+
theme(legend.position = "bottom")+
theme(plot.title = element_text(hjust = 0.5))
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).
#Exporting the plot
plot_1<-ggsave("plot_2.png",width = 10,height=6,dpi=300)
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).
#gradient of color changing depending on intensity?
ggplot(data=SMdata1,aes(x=Total,y=Positive,color=Incidence_Rate))+
geom_point()+
facet_wrap(~Intervention_Type)+
scale_color_gradient(low="blue",high = "red")+
labs(x="Total",
y="positive cases",
title="number of malaria case by Total cases")+
theme_bw()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).