R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)

ggplot(data=mpg) + geom_smooth(mapping=aes(x=displ,y=hwy,linetype=drv))
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(data = mpg) +  geom_smooth(mapping = aes(x = displ, y = hwy))             
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(data = mpg) +  geom_smooth(mapping = aes(x = displ, y = hwy, group = drv))   
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(data = mpg) +  geom_smooth(    mapping = aes(x = displ, y = hwy, color = drv),    show.legend = FALSE  ) 
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy)) + geom_smooth(mapping = aes(x = displ, y = hwy)) 
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +   geom_point() +   geom_smooth() 
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +geom_point(mapping = aes(color = class)) +   geom_smooth() 
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +   geom_point(mapping = aes(color = class)) +   geom_smooth(data = filter(mpg, class == "subcompact"), se = FALSE)    # se represents standard error
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

#ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +   geom_point(mapping = aes(color = class)) +   geom_smooth(data = dplyr::filter(mpg, class == "subcompact"), se = FALSE)    

ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +   geom_point(mapping = aes(color = class)) +   geom_smooth(data = filter(mpg, class == "minivan"), se = FALSE)   
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 4.008
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 0.708
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 0.25

ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +   geom_point(mapping = aes(color = class)) +   geom_smooth(data = filter(mpg, class == "pickup"), se = FALSE)    
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

#Most used functions
#str()
str(diamonds) # Diamonds is a Dataset
## tibble [53,940 × 10] (S3: tbl_df/tbl/data.frame)
##  $ carat  : num [1:53940] 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
##  $ cut    : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
##  $ color  : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
##  $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
##  $ depth  : num [1:53940] 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
##  $ table  : num [1:53940] 55 61 65 58 58 57 57 55 61 61 ...
##  $ price  : int [1:53940] 326 326 327 334 335 336 336 337 337 338 ...
##  $ x      : num [1:53940] 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
##  $ y      : num [1:53940] 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
##  $ z      : num [1:53940] 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
summary(diamonds$carat)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2000  0.4000  0.7000  0.7979  1.0400  5.0100
#create a vector Val  with the values (46,34,87,22,91 and find its average
Val <- c(46, 34, 87, 22, 91)
average <- mean(Val)
average
## [1] 56
#Mean price of Diamonds in the dataset
Cost <-mean(diamonds$price)
Cost
## [1] 3932.8
#Price summary of the diamonds
Cost1<-summary(diamonds$price)
Cost1
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     326     950    2401    3933    5324   18823
ggplot(data=diamonds) + geom_smooth(mapping=aes(x=carat,y=price))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

ggplot(data=diamonds) + geom_smooth(mapping=aes(x=carat,y=price, linetype=cut))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

ggplot(data=diamonds) + geom_point(mapping=aes(x=carat,y=price, linetype=cut))
## Warning in geom_point(mapping = aes(x = carat, y = price, linetype = cut)):
## Ignoring unknown aesthetics: linetype

ggplot(data=diamonds) + geom_point(mapping=aes(x=carat,y=price, linetype=cut))+geom_smooth(mapping=aes(x=carat,y=price))
## Warning in geom_point(mapping = aes(x = carat, y = price, linetype = cut)):
## Ignoring unknown aesthetics: linetype
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

ggplot(data=diamonds) + geom_point(mapping=aes(x=carat,y=price, color=cut)) +geom_smooth(mapping=aes(x=carat,y=price))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

ggplot(data=diamonds) + geom_point(mapping=aes(x=carat,y=price, color=cut)) 

ggplot(data=diamonds) + geom_smooth(mapping=aes(x=carat,y=price, color=cut))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

ggplot(data=diamonds) + geom_smooth(mapping=aes(x=carat,y=price, group=cut))
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

ggplot(data = diamonds, mapping = aes(x = carat, y = price)) +geom_point(mapping = aes(color = cut)) +   geom_smooth() 
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

quantile(diamonds$carat)
##   0%  25%  50%  75% 100% 
## 0.20 0.40 0.70 1.04 5.01
#Histogram of Diamonds Carat Weight
hist(diamonds$carat, main="Histogram of Diamonds carat weight",xlab="Carat")

hist(diamonds$price, main="Histogram of Diamonds Price",xlab="Price")

#var() returns variance
var(diamonds$carat)
## [1] 0.2246867
#sd() returns Standard Deviation 
sd(diamonds$price)
## [1] 3989.44
var(diamonds$carat)
## [1] 0.2246867
sd(diamonds$price)
## [1] 3989.44
#


table(diamonds$cut)
## 
##      Fair      Good Very Good   Premium     Ideal 
##      1610      4906     12082     13791     21551
#BAR CHART
ggplot(data=diamonds) + geom_bar(mapping=aes(x=cut))

#Coloring the bar chart using fill
ggplot(data=diamonds) + geom_bar(mapping=aes(x=cut, fill=cut))

#Coloring the bar chart using color
ggplot(data=diamonds) + geom_bar(mapping=aes(x=cut,color=cut))

#stacking Variables
ggplot(data=diamonds) + geom_bar(mapping=aes(x=cut,fill=clarity))

ggplot(data=diamonds) + geom_bar(mapping=aes(x=cut,fill=clarity), position = "fill")

ggplot(data=diamonds) + geom_bar(mapping=aes(x=cut,fill=clarity), position = "dodge")

nz<- map_data("nz")



#MAP of NEW ZEALAND
ggplot(nz, aes(long,lat,group=group))+ geom_polygon(fill="white", colour="black")

ggplot(nz, aes(long,lat,group=group))+ geom_polygon(fill="white", colour="black")+coord_quickmap()

usa<- map_data("usa")
ggplot(usa, aes(long,lat,group=group))+ geom_polygon(fill="white", colour="black")

#can use stat_count instead of geom_bar
ggplot(data=diamonds) + stat_count(mapping=aes(x=cut))

#To display Proportions
ggplot(data=diamonds) + geom_bar(mapping=aes(x=cut, y=..prop.., group=1))
## Warning: The dot-dot notation (`..prop..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(prop)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#Stat_Summary
ggplot(data=diamonds) + stat_summary(mapping=aes(x=cut, y=depth),fun.min = min, fun.max = max, fun= median)