This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
#we need to load the required libraries
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)
install.packages("tidyverse")
## Warning: package 'tidyverse' is in use and will not be installed
# this to confirm if ggplot is available
ggplot()
#so to plot mpg ,we run the below code to put(display) displ
#on x axis and y axis hwy(higwh way miles)
# so if you get error ,you have o first run gplot library ,then relod the below code
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=hwy))
# diplaying ggplot for displacement and city milage
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=cty))
# displaying ggplot for dsialacement and number of cylinders
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=cyl))
# displaying ggplot and geom_smooth for dsialacement and number of cylinders
# geom_smooth is use to add a smooth line or curve to a plot
ggplot(data=mpg)+geom_smooth(mapping=aes(x=displ,y=cyl))
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#diplaying ggplot for displacement and hwy and class
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=hwy, color=class))
#diplaying ggplot for displacement and hwy and class, just making the point green
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=hwy), color="green")
#diplaying ggplot for displacement and hwy and class, just making the point green
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=hwy, shape=class))
## Warning: The shape palette can deal with a maximum of 6 discrete values because more
## than 6 becomes difficult to discriminate
## ℹ you have requested 7 values. Consider specifying shapes manually if you need
## that many have them.
## Warning: Removed 62 rows containing missing values (`geom_point()`).
#diplaying ggplot for displacement and hwy and class size,
#This means that points belonging to different classes will have different sizes.
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=hwy, size=class))
## Warning: Using size for a discrete variable is not advised.
#diplaying ggplot for displacement and hwy and class size,
#different classes will have different transparency levels.
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=hwy, alpha=class))
## Warning: Using alpha for a discrete variable is not advised.
###Using Facet and subplot
#Faceting: Faceting is the process of breaking data into subsets and displaying those
#subsets in separate panels (also known as small multiples) within the same plot.
#Faceting is typically done based on one or more categorical variables.
#Each panel shows a different subset of the data, making it easier to compare different groups or categories.
#facet_wrap(~ class, nrow = 2) splits the data into panels based on the "class" variable,
#with two rows of panels. Each panel represents a different vehicle class.
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=hwy))+facet_wrap(~class,nrow=2)
#facet_wrap(~ class, nrow = 3) splits the data into panels based on the "class" variable,
#with three rows of panels. Each panel represents a different vehicle class.
#note class is a variable in the mpg dataset that categorizes vehicles into different classes such
#as "subcompact", "compact", "midsize", etc.
#When you use facet_wrap(~ class) or facet_grid(class ~ .) in ggplot2,
#you're telling ggplot2 to create separate panels for each level of the "class" variable.
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=hwy))+facet_wrap(~class,nrow=3)
#When you use facet_wrap(~ class) or facet_grid(class ~ .) in ggplot2,
#you're telling ggplot2 to create separate panels for each level of the "class" variable
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=hwy))+facet_grid(~class)
#facet_grid(drv~cyl): Facets the plot based on the interaction between the drv (drive train)
#and cyl (number of cylinders) variables
ggplot(data=mpg)+geom_point(mapping=aes(x=displ,y=hwy))+facet_grid(drv~cyl)
#### Geo_Smooth################
ggplot(data=mpg)+geom_smooth(mapping=aes(x=displ,y=cyl))
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#drv variable is mapped to the linetype aesthetic, meaning that different drive train types (drv)
#will be represented by different line types on the plot.
ggplot(data=mpg)+geom_smooth(mapping=aes(x=displ,y=hwy, linetype=drv))
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#By specifying group=drv, ggplot2 will fit separate smoothed lines for each unique value of the drv variable.
#Each line represents the trend between engine displacement (displ) and highway miles per gallon (hwy) for a
#specific type of drive train (drv).
ggplot(data=mpg)+geom_smooth(mapping=aes(x=displ,y=hwy, group=drv))
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#+: The + operator is used to add additional layers to the plot.
#geom_smooth(mapping=aes(x=displ, y=hwy)): Adds another smoothed line to the plot, again representing
#the relationship between engine displacement (displ) and highway miles per gallon (hwy).
ggplot(data=mpg)+geom_smooth(mapping=aes(x=displ,y=hwy)) + geom_smooth(mapping=aes(x=displ, y=hwy))
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#(1) ggplot(data=mpg, mapping=aes(x=displ,y=hwy)): Initializes the plot using the mpg dataset as the data
#source and specifies the aesthetic mappings for the x-axis (displ) and y-axis (hwy).
#(2)geom_point(mapping=aes(color=class)): Adds points to the plot, where color=class specifies that the color
#of the points should be mapped to the "class" variable. This means that each point will be colored
#according to the vehicle class it belongs to.
#(3)geom_smooth(): Adds a smoothed line to the plot. Since no specific aesthetic mappings are provided,
#ggplot2 will use the default settings for the smoothing method and parameters.
ggplot(data=mpg, mapping=aes(x=displ,y=hwy)) + geom_point(mapping=aes(color=class)) + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#filter(mpg, class=="subcompact") filters the mpg dataset to include only rows where the "class"
#variable is equal to "subcompact". This subset of data is then used for fitting the smoothed line.
ggplot(data=mpg, mapping=aes(x=displ,y=hwy)) + geom_point(mapping=aes(color=class))+geom_smooth(data=filter(mpg, class=="subcompact"),se=FALSE)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#filter(mpg, class=="minivan") filters the mpg dataset to include only rows where the "class"
#variable is equal to "minivan". This subset of data is then used for fitting the smoothed lin
ggplot(data=mpg, mapping=aes(x=displ,y=hwy)) + geom_point(mapping=aes(color=class)) + geom_smooth(data=filter(mpg, class=="minivan"), se=FALSE)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 4.008
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 0.708
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 0.25
#?diamonds: This is a command to access the help documentation for the diamonds dataset.
#Running ?diamonds will open the help page in R, providing detailed information about the
#dataset, including its description, variables
#head(diamonds): This is a command to view the first few rows of the diamonds dataset.
#Running head(diamonds) will display the first 6 rows
#will display the first 6 rows of the dataset in the R console, allowing you to quickly
#inspect its structure and contents.
head(diamonds)
## # A tibble: 6 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
#will open the help page in R, providing detailed information about the dataset,
#including its description, variables, and usage examples.
?diamonds
## starting httpd help server ... done
## str(diamonds) shows all the structure of the individual attributes
str(diamonds)
## tibble [53,940 × 10] (S3: tbl_df/tbl/data.frame)
## $ carat : num [1:53940] 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
## $ cut : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
## $ color : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
## $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
## $ depth : num [1:53940] 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
## $ table : num [1:53940] 55 61 65 58 58 57 57 55 61 61 ...
## $ price : int [1:53940] 326 326 327 334 335 336 336 337 337 338 ...
## $ x : num [1:53940] 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
## $ y : num [1:53940] 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
## $ z : num [1:53940] 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
#It gives statistical summaries for each numerical variable in the dataset
#(carat, depth, table, price, x, y, z), such as minimum, 1st quartile, median, mean, 3rd quartile, and maximum values.
summary(diamonds)
## carat cut color clarity depth
## Min. :0.2000 Fair : 1610 D: 6775 SI1 :13065 Min. :43.00
## 1st Qu.:0.4000 Good : 4906 E: 9797 VS2 :12258 1st Qu.:61.00
## Median :0.7000 Very Good:12082 F: 9542 SI2 : 9194 Median :61.80
## Mean :0.7979 Premium :13791 G:11292 VS1 : 8171 Mean :61.75
## 3rd Qu.:1.0400 Ideal :21551 H: 8304 VVS2 : 5066 3rd Qu.:62.50
## Max. :5.0100 I: 5422 VVS1 : 3655 Max. :79.00
## J: 2808 (Other): 2531
## table price x y
## Min. :43.00 Min. : 326 Min. : 0.000 Min. : 0.000
## 1st Qu.:56.00 1st Qu.: 950 1st Qu.: 4.710 1st Qu.: 4.720
## Median :57.00 Median : 2401 Median : 5.700 Median : 5.710
## Mean :57.46 Mean : 3933 Mean : 5.731 Mean : 5.735
## 3rd Qu.:59.00 3rd Qu.: 5324 3rd Qu.: 6.540 3rd Qu.: 6.540
## Max. :95.00 Max. :18823 Max. :10.740 Max. :58.900
##
## z
## Min. : 0.000
## 1st Qu.: 2.910
## Median : 3.530
## Mean : 3.539
## 3rd Qu.: 4.040
## Max. :31.800
##
#This command provides a summary specifically for the cut
#variable in the diamonds dataset.
#Since cut is a categorical variable indicating the quality of the cut
#(e.g., Fair, Good, Very Good, Premium
summary(diamonds$cut)
## Fair Good Very Good Premium Ideal
## 1610 4906 12082 13791 21551
#summary(diamonds$cut): This command provides a summary specifically for the cut variable in the diamonds dataset.
#Since cut is a categorical variable indicating the quality of the cut (e.g., Fair, Good, Very Good, Premium, Ideal),
#summary() will display the frequency of each level of the cut variable.
summary(diamonds$carat)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.2000 0.4000 0.7000 0.7979 1.0400 5.0100
##to view it in a table form
view(diamonds)
#The mean() function calculates the arithmetic mean (average) of a numeric vector in R
val <- c(46,34,87,22,91)
mean(val)
## [1] 56
#This command will calculate the mean of the price variable in the diamonds dataset,
#which represents the average price of diamonds in the dataset
mean(diamonds$price)
## [1] 3932.8
#This commands returns summary statistics such as the minimum, 1st quartile, median, mean,
#3rd quartile, and maximum values, along with the number of missing values.
summary(diamonds$price)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 326 950 2401 3933 5324 18823
#
##x=carrat vs y=price using ggplot
ggplot(data=diamonds)+geom_point(mapping=aes(x=carat,y=price))
#This code creates a scatter plot using ggplot2 with diamond weight (carat) on the x-axis and
#diamond price (price) on the y-axis. Each point on the plot is colored according to the
#quality of the diamond's cut (cut).
ggplot(data=diamonds, mapping=aes(x=carat,y=price)) + geom_point(mapping = aes(color=cut))
#### Histogram
?hist
#hist() function in R is used to create histograms,
#code will display a histogram showing the distribution of diamond carat weights in the dataset,
hist(diamonds$carat, main="Histogram of Diamond Carat Weight", xlab = "Carat")
#this command will produce a histogram displaying the distribution of diamond prices in the dataset,
#allowing you to visualize how prices are distributed and identify any patterns or outliers.
hist(diamonds$price, main="Histogram of Diamond Price", xlab = "Price")
#This command calculates the standard deviation of the carat variable in the diamonds datase
sd(diamonds$carat)
## [1] 0.4740112
#This command calculates the standard deviation of the carat variable in the diamonds datase
sd(diamonds$price)
## [1] 3989.44
#This command computes the variance of the carat variable in the diamonds dataset.
var(diamonds$carat)
## [1] 0.2246867
#This command computes the variance of the price variable in the diamonds dataset.
var(diamonds$price)
## [1] 15915629
#this command generates a bar plot where each bar represents the count of diamonds for a specific level of the cut variable.
ggplot(data=diamonds) + stat_count(mapping = aes(x=cut))
#geom_bar() is used instead of stat_count() to explicitly create a bar plot.
#mapping = aes(x = cut, y = ..prop.., group = 1) specifies the aesthetic mappings.
#x = cut maps the levels of the cut variable to the x-axis, while y = ..prop.. calculates the proportion of observations in each category. group = 1 ensures that all bars are grouped together.
#position = "fill" stacks the bars so that each bar represents the proportion of observations in each category, allowing for better comparison of proportions across categories.
ggplot(data=diamonds) + stat_count(mapping = aes(x=cut, y=..prop.., group=0.1))
## Warning: The dot-dot notation (`..prop..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(prop)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#This code creates a plot using ggplot2 that displays summary statistics for
#the depth variable across different levels of the cut variable in the diamonds dataset
#fun.min = min, fun.max = max, fun = median: These arguments specify the summary statistics
#to be calculated for each level of the cut variable. In this case, fun.min = min calculates
#the minimum depth, fun.max = max calculates the maximum depth, and fun = median calculates
#the median depth for each level of cut.
ggplot(data=diamonds) + stat_summary(mapping = aes(x=cut, y=depth),
fun.min=min,
fun.max=max,
fun=median)
# Colour border alone
#stat_count(mapping = aes(x = cut, color = cut)) creates a bar plot where each bar represents
#the count of observations for a specific level of the cut variable, and the bars are colored based on the levels of the cut variable.
ggplot(data=diamonds) + stat_count(mapping = aes(x=cut, color=cut))
#geom_bar(mapping = aes(x = cut, color = cut)): Adds a layer to the plot representing bars. Each bar will represent the count of
#observations for a specific level of the cut variable (x = cut). The color = cut aesthetic is specified within aes(), which means
#it assigns colors to the bars based on the levels of the cut variable. This means each level of cut will have a different color.
ggplot(data=diamonds) + geom_bar(mapping = aes(x=cut, color=cut))
# Color all fill
ggplot(data=diamonds) + stat_count(mapping = aes(x=cut, fill=cut))
ggplot(data=diamonds) + geom_bar(mapping = aes(x=cut, fill=cut))
ggplot(data=diamonds) + stat_count(mapping = aes(x=cut, fill=clarity))
ggplot(data=diamonds) + geom_bar(mapping = aes(x=cut, fill=clarity))
ggplot(data=diamonds) + geom_bar(mapping = aes(x=cut, fill=color))
ggplot(data=diamonds) + geom_bar(mapping = aes(x=cut, fill=clarity), position = "dodge")
ggplot(data=diamonds) + geom_bar(mapping = aes(x=cut, fill=color), position = "dodge")
#nz <- map_data("nz"): Loads map data for New Zealand from the maps package into the nz dataframe.
#This dataframe contains longitude and latitude coordinates that define the boundaries of various regions in New Zealand.
#ggplot(nz, aes(long, lat, group=group)): Initializes the plot using the nz dataframe as the data source. The aes() function
#specifies the aesthetics of the plot, with long mapped to the x-axis, lat mapped to the y-axis, and group=group indicating the
#grouping structure of the polygons.
nz <- map_data("nz")
ggplot(nz, aes(long, lat, group=group)) + geom_polygon(fill="white", color="black")
usa <- map_data("usa")
ggplot(usa, aes(long, lat, group=group)) + geom_polygon(fill="white", color="black")
?map_data
library(naijR)
## Warning: package 'naijR' was built under R version 4.3.3
map_ng()
map_ng(lgas())
map_ng(states(gpz = "sw"), show.text = TRUE, col = 4)
kk <- "Kebbi"
##map_ng(kk, col = 6, title = paste(kk, "State"))
map_ng("Lagos", col = 6, title = paste(kk, "State"))
ng <- map_ng()
#ggplot(ng, aes(long, lat, group=group)) + geom_polygon(fill="white", color="black")
ng
## Simple feature collection with 37 features and 12 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 2.668534 ymin: 4.273007 xmax: 14.67882 ymax: 13.89442
## Geodetic CRS: WGS 84
## First 10 features:
## admin1Name admin1Pcod admin1RefN admin1AltN admin1Al_1 admin0Name
## 1 Abia NG001 Abia <NA> <NA> Nigeria
## 2 Adamawa NG002 Adamawa <NA> <NA> Nigeria
## 3 Akwa Ibom NG003 Akwa Ibom <NA> <NA> Nigeria
## 4 Anambra NG004 Anambra <NA> <NA> Nigeria
## 5 Bauchi NG005 Bauchi <NA> <NA> Nigeria
## 6 Bayelsa NG006 Bayelsa <NA> <NA> Nigeria
## 7 Benue NG007 Benue <NA> <NA> Nigeria
## 8 Borno NG008 Borno <NA> <NA> Nigeria
## 9 Cross River NG009 Cross River <NA> <NA> Nigeria
## 10 Delta NG010 Delta <NA> <NA> Nigeria
## admin0Pcod date validOn validTo Shape_Leng Shape_Area
## 1 NG 2016-11-29 2016-12-15 <NA> 4.695135 0.3965429
## 2 NG 2016-11-29 2016-12-15 <NA> 11.525443 3.1130065
## 3 NG 2016-11-29 2016-12-15 <NA> 5.263830 0.5494762
## 4 NG 2016-11-29 2016-12-15 <NA> 3.595960 0.3926608
## 5 NG 2016-11-29 2016-12-15 <NA> 13.952005 4.0110175
## 6 NG 2016-11-29 2016-12-15 <NA> 5.046708 0.7767679
## 7 NG 2016-11-29 2016-12-15 <NA> 9.408080 2.5783633
## 8 NG 2016-11-29 2016-12-15 <NA> 13.714364 5.9878487
## 9 NG 2016-11-29 2016-12-15 <NA> 8.779796 1.7112182
## 10 NG 2016-11-29 2016-12-15 <NA> 7.372526 1.3940820
## geometry
## 1 MULTIPOLYGON (((7.38681 6.0...
## 2 MULTIPOLYGON (((13.62129 10...
## 3 MULTIPOLYGON (((8.344815 4....
## 4 MULTIPOLYGON (((6.932539 6....
## 5 MULTIPOLYGON (((10.75125 12...
## 6 MULTIPOLYGON (((6.552828 5....
## 7 MULTIPOLYGON (((8.524425 8....
## 8 MULTIPOLYGON (((13.35885 13...
## 9 MULTIPOLYGON (((8.56068 4.7...
## 10 MULTIPOLYGON (((6.668921 6....