knitr::opts_chunk$set(echo = TRUE)


# R Notebook: Exploring and Understanding

## Example 1: Loading and Summarizing Data



``` r
# Load the necessary library
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Load the dataset
mtcars_data <- mtcars

# Display the first few rows
head(mtcars_data)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
# Summary statistics
summary(mtcars_data)
##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb      
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :0.0000   Median :4.000   Median :2.000  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000

Question: What does the summary function tell us about the dataset?

Answer:

The summary function provides descriptive statistics for each column in the dataset, including the minimum, first quartile, median, mean, third quartile, and maximum for numeric columns.


Example 2: Filtering Data

# Filter cars with mpg greater than 20
high_mpg_cars <- mtcars_data %>% filter(mpg > 20)

# View filtered data
head(high_mpg_cars)
##                 mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4      21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710     22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive 21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Merc 240D      24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 230       22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2

Question: How many cars have an MPG greater than 20?

Answer:

To find the count of cars with MPG greater than 20, we can use the nrow function:

nrow(high_mpg_cars)
## [1] 14

Example 3: Creating New Variables

# Add a new column: Power-to-Weight Ratio
mtcars_data <- mtcars_data %>% 
  mutate(power_to_weight = hp / wt)

# Display the updated data
head(mtcars_data)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
##                   power_to_weight
## Mazda RX4                41.98473
## Mazda RX4 Wag            38.26087
## Datsun 710               40.08621
## Hornet 4 Drive           34.21462
## Hornet Sportabout        50.87209
## Valiant                  30.34682

Question: What does the new column represent, and why might it be useful?

Answer:

The power_to_weight column represents the ratio of horsepower to the weight of each car. It can help in understanding the performance potential of a car relative to its weight.


Example 4: Visualization

# Load ggplot2 library
library(ggplot2)

# Create a scatter plot: MPG vs Horsepower
scatter_plot <- ggplot(mtcars_data, aes(x = hp, y = mpg)) +
  geom_point() +
  labs(title = "Scatter Plot of MPG vs Horsepower",
       x = "Horsepower",
       y = "Miles Per Gallon (MPG)")

# Display the plot
scatter_plot

Question: What insights can you gain from the scatter plot?

Answer:

The scatter plot shows a negative relationship between horsepower and MPG. Cars with higher horsepower tend to have lower MPG, indicating less fuel efficiency.


Example 5: Grouping and Summarizing Data

# Group data by the number of cylinders and summarize average MPG
cyl_mpg_summary <- mtcars_data %>% 
  group_by(cyl) %>% 
  summarize(avg_mpg = mean(mpg, na.rm = TRUE))

# View the summary
cyl_mpg_summary
## # A tibble: 3 × 2
##     cyl avg_mpg
##   <dbl>   <dbl>
## 1     4    26.7
## 2     6    19.7
## 3     8    15.1

Question: Which cylinder group has the highest average MPG?

Answer:

The cylinder group with the highest average MPG can be identified by inspecting the cyl_mpg_summary table.