##1.Install and load ggplot2

library(ggplot2)

##set a working directory

setwd("~/R TRAINING")

##import the dataset

gss<-read.csv("GSSsubset.csv")

##the data

#ggplot(data=gss)

##labels** labels can be added to various components of a plot using the

ggplot(data=gss, aes(x=age, y=income,colour = sex))+
  geom_point()+
  labs(title="Gender distribution",
       x="Gender",
       y="Number of respondents",
       caption ="Mambo 2025")+
  theme_classic()

#Moving the scatter plot title to the center

ggplot(data=gss, aes(x=age, y=income,colour = sex))+
  geom_point()+
  labs(title="Gender distribution",
       x="Gender",
       y="Number of respondents",
       caption ="Mambo 2025")+
  theme_classic()+
  theme(plot.title = element_text(hjust=0.5))

#Facets **using the name of the ~ operator

ggplot(data=gss, aes(x=age, y=income,colour = sex))+
  geom_point()+
  labs(title="Gender distribution",
       x="Gender",
       y="Number of respondents",
       caption ="Mambo 2025")+
  theme_classic()+
  theme(plot.title = element_text(hjust=0.5))+
  facet_wrap(~sex)

#Bar plots *** display the count or summary of the categorical data

ggplot(data=gss,aes(x=sex))+
  geom_bar(fill="green")+
  labs(title="Gender distribution",
       x="Gender",
       y="No. of respodents")+
  theme_classic()

#Histogram *** show the distribution of a single variable #Example: Histogram of income

ggplot(data=gss,aes(x=income))+
  geom_histogram(binwidth=10000,fill="green",colour="black")+
  labs(title="Gender distribution",
       x="Gender",
       y="No. of respodents")+
  theme_light()

#Reducing the number of bars

ggplot(data=gss,aes(x=income))+
  geom_histogram(bins=20,fill="green",colour="black")+
  labs(title="Gender distribution",
       x="Gender",
       y="No. of respodents")+
  theme_light()

#Boxplot #Boxplot of income by gender

ggplot(data=gss,aes(x=sex, y=income))+
  geom_boxplot(fill="green")+
  labs(title="Gender distribution",
       x="Gender",
       y="No. of respodents")+
  theme_classic()

##Combine the plots (story telling using the data)

library(patchwork)
p1=ggplot(data=gss, aes(x=age, y=income,colour = sex))+
  geom_point()+
  labs(title="Gender distribution",
       x="Gender",
       y="Number of respondents",
       caption ="Mambo 2025")+
  theme_classic()+
  theme(plot.title = element_text(hjust=0.5))

p2= ggplot(data=gss,aes(x=sex))+
  geom_bar(fill="green")+
  labs(title="Gender distribution",
       x="Gender",
       y="No. of respodents")+
  theme_classic()

p3=ggplot(data=gss,aes(x=sex, y=income))+
  geom_boxplot(fill="green")+
  labs(title="Gender distribution",
       x="Gender",
       y="No. of respodents")+
  theme_classic()

combine<-(p1|p2|p3)+
  plot_annotation(title="Combined plots Example")
print(combine)

#Exporting plots

plot<-ggsave("Plot.png",width=10,height=6,dpi = 300)

#SESSION 4 LESSON 2 #Install and Load the following packages

library("tm")
## Loading required package: NLP
## 
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
## 
##     annotate
library("wordcloud")
## Loading required package: RColorBrewer
library("RColorBrewer")
library(ggplot2)

setwd("~/R TRAINING")
SMdata1<-read.csv("SMdata1.csv")
names(SMdata1)
##  [1] "Region"            "Year"              "Month"            
##  [4] "Age"               "Intervention_Type" "Total"            
##  [7] "Positive"          "Incidence_Rate"    "Latitude"         
## [10] "Longitude"         "period"

#Data visualaization with ggplot2

ggplot(data=SMdata1,aes(x=Total,y=Positive, color=Region))+
  geom_point()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

use histogram

ggplot(data=SMdata1,aes(x=Incidence_Rate))+
  geom_histogram(fill="red")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).

#Putting a boundary to the bars

ggplot(data=SMdata1,aes(x=Incidence_Rate))+
  geom_histogram(fill="red",color="green",bins = 10)
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).

##box plot

ggplot(data=SMdata1, aes(x= Intervention_Type,y=Total,fill=Region))+
  geom_boxplot()+
  facet_wrap(~Year)+
  labs(x="intervention type",
       caption = "Source:Mambo J.K.,2025",
       y="Total cases",
       title = "number of malaria case by intervention type")+
  theme_light()+
  theme(legend.position = "bottom")+
  theme(plot.title = element_text(hjust = 0.5))
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

#Exporting the plot

plot_1<-ggsave("plot_2.png",width = 10,height=6,dpi=300)
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

#gradient of color changing depending on intensity?

ggplot(data=SMdata1,aes(x=Total,y=Positive,color=Incidence_Rate))+
  geom_point()+
  facet_wrap(~Intervention_Type)+
  scale_color_gradient(low="blue",high = "red")+
  labs(x="Total",
       y="positive cases",
       title="number of malaria case by Total cases")+
  theme_bw()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).