knitr::opts_chunk$set(echo = TRUE)
# R Notebook: Exploring and Understanding
## Example 1: Loading and Summarizing Data
``` r
# Load the necessary library
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Load the dataset
mtcars_data <- mtcars
# Display the first few rows
head(mtcars_data)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
# Summary statistics
summary(mtcars_data)
## mpg cyl disp hp
## Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5
## Median :19.20 Median :6.000 Median :196.3 Median :123.0
## Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0
## Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0
## drat wt qsec vs
## Min. :2.760 Min. :1.513 Min. :14.50 Min. :0.0000
## 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:0.0000
## Median :3.695 Median :3.325 Median :17.71 Median :0.0000
## Mean :3.597 Mean :3.217 Mean :17.85 Mean :0.4375
## 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:1.0000
## Max. :4.930 Max. :5.424 Max. :22.90 Max. :1.0000
## am gear carb
## Min. :0.0000 Min. :3.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
## Median :0.0000 Median :4.000 Median :2.000
## Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :5.000 Max. :8.000
summary function tell us about
the dataset?The summary function provides descriptive statistics for
each column in the dataset, including the minimum, first quartile,
median, mean, third quartile, and maximum for numeric columns.
# Filter cars with mpg greater than 20
high_mpg_cars <- mtcars_data %>% filter(mpg > 20)
# View filtered data
head(high_mpg_cars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
To find the count of cars with MPG greater than 20, we can use the
nrow function:
nrow(high_mpg_cars)
## [1] 14
# Add a new column: Power-to-Weight Ratio
mtcars_data <- mtcars_data %>%
mutate(power_to_weight = hp / wt)
# Display the updated data
head(mtcars_data)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
## power_to_weight
## Mazda RX4 41.98473
## Mazda RX4 Wag 38.26087
## Datsun 710 40.08621
## Hornet 4 Drive 34.21462
## Hornet Sportabout 50.87209
## Valiant 30.34682
The power_to_weight column represents the ratio of
horsepower to the weight of each car. It can help in understanding the
performance potential of a car relative to its weight.
# Load ggplot2 library
library(ggplot2)
# Create a scatter plot: MPG vs Horsepower
scatter_plot <- ggplot(mtcars_data, aes(x = hp, y = mpg)) +
geom_point() +
labs(title = "Scatter Plot of MPG vs Horsepower",
x = "Horsepower",
y = "Miles Per Gallon (MPG)")
# Display the plot
scatter_plot
The scatter plot shows a negative relationship between horsepower and MPG. Cars with higher horsepower tend to have lower MPG, indicating less fuel efficiency.
# Group data by the number of cylinders and summarize average MPG
cyl_mpg_summary <- mtcars_data %>%
group_by(cyl) %>%
summarize(avg_mpg = mean(mpg, na.rm = TRUE))
# View the summary
cyl_mpg_summary
## # A tibble: 3 × 2
## cyl avg_mpg
## <dbl> <dbl>
## 1 4 26.7
## 2 6 19.7
## 3 8 15.1
The cylinder group with the highest average MPG can be identified by
inspecting the cyl_mpg_summary table.