SETTING UP MY ENVIRONMENT FOR DATA VISUALIZATION

Here we activate all the packages necessary for data visualization in Rstudion using ggplot2

library(palmerpenguins)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
data("diamonds")

Loading the ggplot2

library(ggplot2)
library(tidyr)

Scatter Plot for Body Mass and Flipper Length

ggplot(data=penguins,aes(x=flipper_length_mm,y=body_mass_g))+
  geom_point(alpha=0.5)+
  geom_smooth(method=lm)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).

Change the colour of our scatter plot

ggplot(data=penguins,aes(x=flipper_length_mm,y=body_mass_g))+
  geom_point(alpha=0.5)+
  geom_smooth(method=lm)+
  geom_point(color="purple")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
## Removed 2 rows containing missing values (geom_point).

ggplot(data=penguins,aes(x=flipper_length_mm,y=body_mass_g))+
  geom_point(alpha=0.5)+
  geom_smooth(method=lm)+
  geom_point(color="purple")+
  labs(title = "Palmer Penguins: Body Mass vs Flipper Length")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
## Removed 2 rows containing missing values (geom_point).

Attach Additional dataset

attach(diamonds)

Explore bar graphs using ggplot2

head(diamonds,10)
tail(diamonds,10)
ggplot(data=diamonds)+
  geom_bar(mapping = aes(x=cut,color=cut))

## Add color to the bar graphs

ggplot(data=diamonds)+
  geom_bar(mapping = aes(x=cut),color="blue")

ggplot(data=diamonds)+
  geom_bar(mapping = aes(x=cut,fill=cut))

ggplot(data=diamonds)+
  geom_bar(mapping = aes(x=cut,fill=clarity))

Histogram

ggplot(data=diamonds)+
  geom_histogram(mapping=aes(x=price))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Add color and title

ggplot(data=diamonds)+
  geom_histogram(mapping=aes(x=depth,color="red"))+
  scale_x_continuous(limits = c(55,69))+
  labs(title = "Histogram Showing Depth")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 64 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).

Add Normal curve to the histogram

ggplot(data=diamonds, aes(depth)) +        
  geom_histogram(aes(y = ..density..),color="red")+
  scale_x_continuous(limits = c(55,70))+
  stat_function(fun = dnorm,
                args = list(mean = mean(depth),
                            sd = sd(depth)),
                col = "#1b98e0",
                size = 1)+
  labs(title = "Histogram Showing Depth")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 45 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).

Distinguish to have histogram for each category of cut

ggplot(data=diamonds, aes(depth)) +        
  geom_histogram(aes(y = ..density..),color="red")+
  scale_x_continuous(limits = c(55,70))+
  stat_function(fun = dnorm,
                args = list(mean = mean(depth),
                            sd = sd(depth)),
                col = "#1b98e0",
                size = 1)+
  labs(title = "Histogram Showing Depth")+
  facet_wrap(~cut)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 45 rows containing non-finite values (stat_bin).
## Warning: Removed 10 rows containing missing values (geom_bar).