About mpg dataset

The mpg dataset in R comes from the ggplot2 package and is commonly used for data visualization and statistical analysis. It contains data about different car models, including information on fuel economy, car manufacturer, engine size, and other specifications


You can perform various analyses on the mpg dataset, such as summarizing the data, filtering specific conditions, or plotting different graphs.


The mpg dataset is often used to practice data visualization with ggplot2, such as creating scatter plots, bar charts, and box plots. It is also useful for exploring statistical relationships, like comparing fuel efficiency across different car manufacturers or analyzing how engine size impacts fuel economy.


loading dataset and data

library(ggplot2)
data(package="ggplot2")
milgal=ggplot2::mpg 
str(milgal)
## tibble [234 × 11] (S3: tbl_df/tbl/data.frame)
##  $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
##  $ model       : chr [1:234] "a4" "a4" "a4" "a4" ...
##  $ displ       : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr [1:234] "f" "f" "f" "f" ...
##  $ cty         : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr [1:234] "p" "p" "p" "p" ...
##  $ class       : chr [1:234] "compact" "compact" "compact" "compact" ...

get number of rows

n=nrow(milgal)
rowind = seq(1,n)


Scatter plot

ggplot(milgal, aes(x=rowind,y=hwy)) + 
  geom_point() 

point_plot1 = ggplot(milgal, 
                          aes(x=rowind,y=hwy)) + 
  geom_point(shape=5,size=1,color="red")
point_plot1

point_plot2=point_plot1 +
                  labs(title="Highway vs data series number",
                    subtitle="From mpg dataset", 
                    y="hwy", 
                    x="Data Series Number")
point_plot2

point_plot3=point_plot2+
  theme(
    plot.title = element_text(color="blue", size=10, 
                              face="italic"),
    plot.subtitle = element_text(color="green", size=5, 
                                 face="italic"),
    axis.title.x = element_text(color="pink", 
                                size=7, face="bold"),
    axis.title.y = element_text(color="red",
                                size=7, face="bold"))

point_plot3


Box plot

ggplot(milgal, aes(y=hwy)) + 
  geom_boxplot(color="blue",fill="yellow")


Histogram

ggplot(milgal, aes(hwy)) + 
  geom_histogram(color="green",
                 fill="yellow")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.


Dot plot

ggplot(milgal, aes(x=hwy)) +
  geom_dotplot(dotsize=1,color="pink",fill="white")
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.


Area plot

ggplot(milgal, aes(hwy))+
  geom_area(stat = "bin",color="brown",fill='blue')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.


Pie chart

ggplot(milgal,
       aes(x = factor(""), fill = model) ) +
  geom_bar() +
  coord_polar(theta = "y") +
  scale_x_discrete("")+
  theme(axis.ticks=element_blank(),  
        axis.title=element_blank(),  
        axis.text.y=element_blank(),
        axis.text.x=element_blank(),
        panel.grid  = element_blank(),
        legend.position = "top")+
  labs(title = "MODEL DISTRIBUTION",
       caption="ggplot2")


Donut plot

ggplot(milgal,
       aes(x = 2, fill = manufacturer) ) +
  geom_bar() +
  coord_polar(theta = "y") +
  scale_x_discrete("")+
  theme(axis.ticks=element_blank(),  
        axis.title=element_blank(),  
        axis.text.y=element_blank(),
        axis.text.x=element_blank(),
        panel.grid  = element_blank(),
        legend.position = "bottom")+
  labs(title = "MANUFACTURER DISTRIBUTION",
       caption="ggplot2")+
  xlim(0,2.5)+
  theme_void()
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.