R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

#we need to load the required libraries
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)
install.packages("tidyverse")
## Warning: package 'tidyverse' is in use and will not be installed
# this to confirm if ggplot is available
ggplot()

#so to plot mpg ,we run the below code to put(display) displ
#on x axis and y axis hwy(higwh way miles) 
# so if you get error ,you have o first run gplot library ,then relod the below code
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=hwy))

# diplaying ggplot for displacement and city milage
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=cty))

# displaying ggplot for dsialacement and number of cylinders
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=cyl))

# displaying ggplot and geom_smooth for dsialacement and number of cylinders
# geom_smooth is use to add  a smooth line  or curve to a plot
ggplot(data=mpg)+geom_smooth(mapping=aes(x=displ,y=cyl))
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

#diplaying ggplot for displacement and hwy and class
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=hwy, color=class))

#diplaying ggplot for displacement and hwy and class, just making the point green
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=hwy), color="green")

#diplaying ggplot for displacement and hwy and class, just making the point green
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=hwy, shape=class))
## Warning: The shape palette can deal with a maximum of 6 discrete values because more
## than 6 becomes difficult to discriminate
## ℹ you have requested 7 values. Consider specifying shapes manually if you need
##   that many have them.
## Warning: Removed 62 rows containing missing values (`geom_point()`).

#diplaying ggplot for displacement and hwy and class size,
#This means that points belonging to different classes will have different sizes.
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=hwy, size=class))
## Warning: Using size for a discrete variable is not advised.

#diplaying ggplot for displacement and hwy and class size,
#different classes will have different transparency levels.
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=hwy, alpha=class))
## Warning: Using alpha for a discrete variable is not advised.

###Using Facet and subplot
#Faceting: Faceting is the process of breaking data into subsets and displaying those 
#subsets in separate panels (also known as small multiples) within the same plot. 
#Faceting is typically done based on one or more categorical variables.
#Each panel shows a different subset of the data, making it easier to compare different groups or categories.

#facet_wrap(~ class, nrow = 2) splits the data into panels based on the "class" variable,
#with two rows of panels. Each panel represents a different vehicle class.
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=hwy))+facet_wrap(~class,nrow=2)

#facet_wrap(~ class, nrow = 3) splits the data into panels based on the "class" variable,
#with three  rows of panels. Each panel represents a different vehicle class.
#note class is a variable in the mpg dataset that categorizes vehicles into different classes such
#as "subcompact", "compact", "midsize", etc.

#When you use facet_wrap(~ class) or facet_grid(class ~ .) in ggplot2, 
#you're telling ggplot2 to create separate panels for each level of the "class" variable. 
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=hwy))+facet_wrap(~class,nrow=3)

#When you use facet_wrap(~ class) or facet_grid(class ~ .) in ggplot2, 
#you're telling ggplot2 to create separate panels for each level of the "class" variable
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=hwy))+facet_grid(~class)

#facet_grid(drv~cyl): Facets the plot based on the interaction between the drv (drive train)
#and cyl (number of cylinders) variables
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=hwy))+facet_grid(drv~cyl)

#### Geo_Smooth################
ggplot(data=mpg)+geom_smooth(mapping=aes(x=displ,y=cyl))
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

#drv variable is mapped to the linetype aesthetic, meaning that different drive train types (drv) 
#will be represented by different line types on the plot.
ggplot(data=mpg)+geom_smooth(mapping=aes(x=displ,y=hwy, linetype=drv))
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

#By specifying group=drv, ggplot2 will fit separate smoothed lines for each unique value of the drv variable.
#Each line represents the trend between engine displacement (displ) and highway miles per gallon (hwy) for a 
#specific type of drive train (drv).
ggplot(data=mpg)+geom_smooth(mapping=aes(x=displ,y=hwy, group=drv))
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

#+: The + operator is used to add additional layers to the plot.
#geom_smooth(mapping=aes(x=displ, y=hwy)): Adds another smoothed line to the plot, again representing
#the relationship between engine displacement (displ) and highway miles per gallon (hwy).
ggplot(data=mpg)+geom_smooth(mapping=aes(x=displ,y=hwy)) + geom_smooth(mapping=aes(x=displ, y=hwy))
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

#(1) ggplot(data=mpg, mapping=aes(x=displ,y=hwy)): Initializes the plot using the mpg dataset as the data
#source and specifies the aesthetic mappings for the x-axis (displ) and y-axis (hwy).
#(2)geom_point(mapping=aes(color=class)): Adds points to the plot, where color=class specifies that the color 
#of the points should be mapped to the "class" variable. This means that each point will be colored 
#according to the vehicle class it belongs to.
#(3)geom_smooth(): Adds a smoothed line to the plot. Since no specific aesthetic mappings are provided,
#ggplot2 will use the default settings for the smoothing method and parameters.
ggplot(data=mpg, mapping=aes(x=displ,y=hwy)) + geom_point(mapping=aes(color=class)) + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

#filter(mpg, class=="subcompact") filters the mpg dataset to include only rows where the "class" 
#variable is equal to "subcompact". This subset of data is then used for fitting the smoothed line.
ggplot(data=mpg, mapping=aes(x=displ,y=hwy)) + geom_point(mapping=aes(color=class))+geom_smooth(data=filter(mpg, class=="subcompact"),se=FALSE) 
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

#filter(mpg, class=="minivan") filters the mpg dataset to include only rows where the "class" 
#variable is equal to "minivan". This subset of data is then used for fitting the smoothed lin
ggplot(data=mpg, mapping=aes(x=displ,y=hwy)) + geom_point(mapping=aes(color=class)) + geom_smooth(data=filter(mpg, class=="minivan"), se=FALSE) 
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 4.008
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 0.708
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 0.25

#?diamonds: This is a command to access the help documentation for the diamonds dataset. 
#Running ?diamonds will open the help page in R, providing detailed information about the
#dataset, including its description, variables

#head(diamonds): This is a command to view the first few rows of the diamonds dataset.
#Running head(diamonds) will display the first 6 rows 


#will display the first 6 rows of the dataset in the R console, allowing you to quickly
#inspect its structure and contents.
head(diamonds)
## # A tibble: 6 × 10
##   carat cut       color clarity depth table price     x     y     z
##   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
## 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
## 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
## 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48
#will open the help page in R, providing detailed information about the dataset, 
#including its description, variables, and usage examples.
?diamonds
## starting httpd help server ... done
## str(diamonds)  shows all the structure of the individual attributes
str(diamonds)
## tibble [53,940 × 10] (S3: tbl_df/tbl/data.frame)
##  $ carat  : num [1:53940] 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
##  $ cut    : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
##  $ color  : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
##  $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
##  $ depth  : num [1:53940] 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
##  $ table  : num [1:53940] 55 61 65 58 58 57 57 55 61 61 ...
##  $ price  : int [1:53940] 326 326 327 334 335 336 336 337 337 338 ...
##  $ x      : num [1:53940] 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
##  $ y      : num [1:53940] 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
##  $ z      : num [1:53940] 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
#It gives statistical summaries for each numerical variable in the dataset 
#(carat, depth, table, price, x, y, z), such as minimum, 1st quartile, median, mean, 3rd quartile, and maximum values.
summary(diamonds)
##      carat               cut        color        clarity          depth      
##  Min.   :0.2000   Fair     : 1610   D: 6775   SI1    :13065   Min.   :43.00  
##  1st Qu.:0.4000   Good     : 4906   E: 9797   VS2    :12258   1st Qu.:61.00  
##  Median :0.7000   Very Good:12082   F: 9542   SI2    : 9194   Median :61.80  
##  Mean   :0.7979   Premium  :13791   G:11292   VS1    : 8171   Mean   :61.75  
##  3rd Qu.:1.0400   Ideal    :21551   H: 8304   VVS2   : 5066   3rd Qu.:62.50  
##  Max.   :5.0100                     I: 5422   VVS1   : 3655   Max.   :79.00  
##                                     J: 2808   (Other): 2531                  
##      table           price             x                y         
##  Min.   :43.00   Min.   :  326   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.:56.00   1st Qu.:  950   1st Qu.: 4.710   1st Qu.: 4.720  
##  Median :57.00   Median : 2401   Median : 5.700   Median : 5.710  
##  Mean   :57.46   Mean   : 3933   Mean   : 5.731   Mean   : 5.735  
##  3rd Qu.:59.00   3rd Qu.: 5324   3rd Qu.: 6.540   3rd Qu.: 6.540  
##  Max.   :95.00   Max.   :18823   Max.   :10.740   Max.   :58.900  
##                                                                   
##        z         
##  Min.   : 0.000  
##  1st Qu.: 2.910  
##  Median : 3.530  
##  Mean   : 3.539  
##  3rd Qu.: 4.040  
##  Max.   :31.800  
## 
#This command provides a summary specifically for the cut
#variable in the diamonds dataset.
#Since cut is a categorical variable indicating the quality of the cut
#(e.g., Fair, Good, Very Good, Premium
summary(diamonds$cut)
##      Fair      Good Very Good   Premium     Ideal 
##      1610      4906     12082     13791     21551
#summary(diamonds$cut): This command provides a summary specifically for the cut variable in the diamonds dataset. 
#Since cut is a categorical variable indicating the quality of the cut (e.g., Fair, Good, Very Good, Premium, Ideal),
#summary() will display the frequency of each level of the cut variable.
summary(diamonds$carat)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2000  0.4000  0.7000  0.7979  1.0400  5.0100
##to view it in a table form
view(diamonds)

#The mean() function calculates the arithmetic mean (average) of a numeric vector in R
val <- c(46,34,87,22,91)
mean(val)
## [1] 56
#This command will calculate the mean of the price variable in the diamonds dataset, 
#which represents the average price of diamonds in the dataset
mean(diamonds$price)
## [1] 3932.8
#This commands returns summary statistics such as the minimum, 1st quartile, median, mean,
#3rd quartile, and maximum values, along with the number of missing values.
summary(diamonds$price)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     326     950    2401    3933    5324   18823
#
##x=carrat vs y=price using ggplot
ggplot(data=diamonds)+geom_point(mapping=aes(x=carat,y=price))

#This code creates a scatter plot using ggplot2 with diamond weight (carat) on the x-axis and
#diamond price (price) on the y-axis. Each point on the plot is colored according to the
#quality of the diamond's cut (cut).
ggplot(data=diamonds, mapping=aes(x=carat,y=price)) + geom_point(mapping = aes(color=cut)) 

####  Histogram

?hist
#hist() function in R is used to create histograms,

#code will display a histogram showing the distribution of diamond carat weights in the dataset, 
hist(diamonds$carat, main="Histogram of Diamond Carat Weight", xlab = "Carat")

#this command will produce a histogram displaying the distribution of diamond prices in the dataset,
#allowing you to visualize how prices are distributed and identify any patterns or outliers.
hist(diamonds$price, main="Histogram of Diamond Price", xlab = "Price")

#This command calculates the standard deviation of the carat variable in the diamonds datase
sd(diamonds$carat)
## [1] 0.4740112
#This command calculates the standard deviation of the carat variable in the diamonds datase
sd(diamonds$price)
## [1] 3989.44
#This command computes the variance of the carat variable in the diamonds dataset. 
var(diamonds$carat)
## [1] 0.2246867
#This command computes the variance of the price variable in the diamonds dataset. 
var(diamonds$price)
## [1] 15915629
#this command generates a bar plot where each bar represents the count of diamonds for a specific level of the cut variable.
ggplot(data=diamonds) + stat_count(mapping = aes(x=cut))

#geom_bar() is used instead of stat_count() to explicitly create a bar plot.
#mapping = aes(x = cut, y = ..prop.., group = 1) specifies the aesthetic mappings.
#x = cut maps the levels of the cut variable to the x-axis, while y = ..prop.. calculates the proportion of observations in each category. group = 1 ensures that all bars are grouped together.
#position = "fill" stacks the bars so that each bar represents the proportion of observations in each category, allowing for better comparison of proportions across categories.
ggplot(data=diamonds) + stat_count(mapping = aes(x=cut, y=..prop.., group=0.1))
## Warning: The dot-dot notation (`..prop..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(prop)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#This code creates a plot using ggplot2 that displays summary statistics for
#the depth variable across different levels of the cut variable in the diamonds dataset
#fun.min = min, fun.max = max, fun = median: These arguments specify the summary statistics 
#to be calculated for each level of the cut variable. In this case, fun.min = min calculates 
#the minimum depth, fun.max = max calculates the maximum depth, and fun = median calculates 
#the median depth for each level of cut.
ggplot(data=diamonds) + stat_summary(mapping = aes(x=cut, y=depth),
                                     fun.min=min,
                                     fun.max=max,
                                     fun=median)

# Colour border alone

#stat_count(mapping = aes(x = cut, color = cut)) creates a bar plot where each bar represents 
#the count of observations for a specific level of the cut variable, and the bars are colored based on the levels of the cut variable.
ggplot(data=diamonds) + stat_count(mapping = aes(x=cut, color=cut))

#geom_bar(mapping = aes(x = cut, color = cut)): Adds a layer to the plot representing bars. Each bar will represent the count of 
#observations for a specific level of the cut variable (x = cut). The color = cut aesthetic is specified within aes(), which means 
#it assigns colors to the bars based on the levels of the cut variable. This means each level of cut will have a different color.
ggplot(data=diamonds) + geom_bar(mapping = aes(x=cut, color=cut))

# Color all fill

ggplot(data=diamonds) + stat_count(mapping = aes(x=cut, fill=cut))

ggplot(data=diamonds) + geom_bar(mapping = aes(x=cut, fill=cut))

ggplot(data=diamonds) + stat_count(mapping = aes(x=cut, fill=clarity))

ggplot(data=diamonds) + geom_bar(mapping = aes(x=cut, fill=clarity))

ggplot(data=diamonds) + geom_bar(mapping = aes(x=cut, fill=color))

ggplot(data=diamonds) + geom_bar(mapping = aes(x=cut, fill=clarity), position = "dodge")

ggplot(data=diamonds) + geom_bar(mapping = aes(x=cut, fill=color), position = "dodge")

#nz <- map_data("nz"): Loads map data for New Zealand from the maps package into the nz dataframe.
#This dataframe contains longitude and latitude coordinates that define the boundaries of various regions in New Zealand.
#ggplot(nz, aes(long, lat, group=group)): Initializes the plot using the nz dataframe as the data source. The aes() function
#specifies the aesthetics of the plot, with long mapped to the x-axis, lat mapped to the y-axis, and group=group indicating the
#grouping structure of the polygons.
nz <- map_data("nz")
ggplot(nz, aes(long, lat, group=group)) + geom_polygon(fill="white", color="black")

usa <- map_data("usa")
ggplot(usa, aes(long, lat, group=group)) + geom_polygon(fill="white", color="black")

?map_data
library(naijR)
## Warning: package 'naijR' was built under R version 4.3.3
map_ng()

map_ng(lgas())

map_ng(states(gpz = "sw"), show.text = TRUE, col = 4)

kk <- "Kebbi"



##map_ng(kk, col = 6, title = paste(kk, "State"))
map_ng("Lagos", col = 6, title = paste(kk, "State"))

ng <- map_ng()

#ggplot(ng, aes(long, lat, group=group)) + geom_polygon(fill="white", color="black")
ng
## Simple feature collection with 37 features and 12 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 2.668534 ymin: 4.273007 xmax: 14.67882 ymax: 13.89442
## Geodetic CRS:  WGS 84
## First 10 features:
##     admin1Name admin1Pcod  admin1RefN admin1AltN admin1Al_1 admin0Name
## 1         Abia      NG001        Abia       <NA>       <NA>    Nigeria
## 2      Adamawa      NG002     Adamawa       <NA>       <NA>    Nigeria
## 3    Akwa Ibom      NG003   Akwa Ibom       <NA>       <NA>    Nigeria
## 4      Anambra      NG004     Anambra       <NA>       <NA>    Nigeria
## 5       Bauchi      NG005      Bauchi       <NA>       <NA>    Nigeria
## 6      Bayelsa      NG006     Bayelsa       <NA>       <NA>    Nigeria
## 7        Benue      NG007       Benue       <NA>       <NA>    Nigeria
## 8        Borno      NG008       Borno       <NA>       <NA>    Nigeria
## 9  Cross River      NG009 Cross River       <NA>       <NA>    Nigeria
## 10       Delta      NG010       Delta       <NA>       <NA>    Nigeria
##    admin0Pcod       date    validOn validTo Shape_Leng Shape_Area
## 1          NG 2016-11-29 2016-12-15    <NA>   4.695135  0.3965429
## 2          NG 2016-11-29 2016-12-15    <NA>  11.525443  3.1130065
## 3          NG 2016-11-29 2016-12-15    <NA>   5.263830  0.5494762
## 4          NG 2016-11-29 2016-12-15    <NA>   3.595960  0.3926608
## 5          NG 2016-11-29 2016-12-15    <NA>  13.952005  4.0110175
## 6          NG 2016-11-29 2016-12-15    <NA>   5.046708  0.7767679
## 7          NG 2016-11-29 2016-12-15    <NA>   9.408080  2.5783633
## 8          NG 2016-11-29 2016-12-15    <NA>  13.714364  5.9878487
## 9          NG 2016-11-29 2016-12-15    <NA>   8.779796  1.7112182
## 10         NG 2016-11-29 2016-12-15    <NA>   7.372526  1.3940820
##                          geometry
## 1  MULTIPOLYGON (((7.38681 6.0...
## 2  MULTIPOLYGON (((13.62129 10...
## 3  MULTIPOLYGON (((8.344815 4....
## 4  MULTIPOLYGON (((6.932539 6....
## 5  MULTIPOLYGON (((10.75125 12...
## 6  MULTIPOLYGON (((6.552828 5....
## 7  MULTIPOLYGON (((8.524425 8....
## 8  MULTIPOLYGON (((13.35885 13...
## 9  MULTIPOLYGON (((8.56068 4.7...
## 10 MULTIPOLYGON (((6.668921 6....