This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
ggplot(data = mpg) + geom_smooth(mapping = aes(x=displ, y = hwy, linetype =drv))
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
ggplot(data = mpg) + geom_smooth(mapping = aes(x=displ, y = hwy, group =drv))
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#Local Mappings
ggplot(data = mpg, mapping = aes(x = displ, y = hwy))+geom_point(mapping = aes(color = class)) + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#filter for subcompact class
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) + geom_point(mapping = aes(color = class)) + geom_smooth(data = dplyr::filter(mpg, class == "subcompact"), se = TRUE)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
#filter for minivan
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) + geom_point(mapping = aes(color = class)) + geom_smooth(data = dplyr::filter(mpg, class == "minivan"), se = TRUE)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at 4.008
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 0.708
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : There are other near singularities as well. 0.25
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 4.008
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.708
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 0
## Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
## else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 0.25
#what is diamonds
?diamonds
str(diamonds)
## tibble [53,940 × 10] (S3: tbl_df/tbl/data.frame)
## $ carat : num [1:53940] 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
## $ cut : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
## $ color : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
## $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
## $ depth : num [1:53940] 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
## $ table : num [1:53940] 55 61 65 58 58 57 57 55 61 61 ...
## $ price : int [1:53940] 326 326 327 334 335 336 336 337 337 338 ...
## $ x : num [1:53940] 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
## $ y : num [1:53940] 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
## $ z : num [1:53940] 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
#to see the dataset
view(diamonds)
#gives the statistics of a given attribute(1st quarter,mean median, 3rd quarter and max)
summary(diamonds$carat)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.2000 0.4000 0.7000 0.7979 1.0400 5.0100
#finding the average
val <- c(46,34, 87, 22, 91)
mean(val)
## [1] 56
#finding the mean of price in the diamond dataset
mean(diamonds$price)
## [1] 3932.8
#graph carat vs price
ggplot(data = diamonds) + geom_point(mapping = aes(x=carat, y = price))
#graph carat vs price
ggplot(data = diamonds) + geom_point(mapping = aes(x=carat, y = price, color = cut))
#histograms Carat
hist(diamonds$carat, main= "Histogram of diamonds carat weight", xlab = "Carat")
#histogram price
hist(diamonds$carat, main= "Histogram of diamonds carat price", xlab = "Price")
var(diamonds$carat)
## [1] 0.2246867
var(diamonds$price)
## [1] 15915629
sd(diamonds$carat)
## [1] 0.4740112
sd(diamonds$price)
## [1] 3989.44
#categorical data
table(diamonds$cut)
##
## Fair Good Very Good Premium Ideal
## 1610 4906 12082 13791 21551
#Bar graph
ggplot(data = diamonds) + geom_bar(mapping = aes(x=cut))
#Displaying proportions
ggplot(data = diamonds) + geom_bar(mapping = aes(x=cut, y = ..prop.., group = 1))
## Warning: The dot-dot notation (`..prop..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(prop)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#stat summary
ggplot(data = diamonds) + stat_summary(mapping = aes(x=cut, y = depth), fun.min = min, fun.max = max, fun = median)
#adding border color
ggplot(data = diamonds) + geom_bar(mapping = aes(x=cut, color = cut))
#adding fill color
ggplot(data = diamonds) + geom_bar(mapping = aes(x=cut, fill = cut))
#Stacking variables, each colored rectangle represents a combination of cut and clarity
ggplot(data = diamonds) + geom_bar(mapping = aes(x=cut, fill = clarity))
#position = 'dodge
ggplot(data = diamonds) + geom_bar(mapping = aes(x=cut, fill = clarity), position = "dodge")
#coord_quickmap, sets aspect ration
nz <- map_data("nz")
ggplot(nz,aes(long, lat, group = group)) + geom_polygon(fill = "white", colour = "black")
ggplot(nz,aes(long, lat, group = group)) + geom_polygon(fill = "white", colour = "black") + coord_quickmap()