This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.
#Data Understanding:
#read in file
df <- read.csv("mtcars.csv")
head(df)
#Prints dimensions
dim(df)
## [1] 32 12
#Print data structures
class(df)
## [1] "data.frame"
#Print data types of columns
cat("Data types of columns:\n")
## Data types of columns:
str(df[c("model", "mpg", "hp", "am")])
## 'data.frame': 32 obs. of 4 variables:
## $ model: chr "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ hp : int 110 110 93 110 175 105 245 62 95 123 ...
## $ am : int 1 1 1 0 0 0 0 0 0 0 ...
#function to get some details of the dataset
summary(df)
## model mpg cyl disp
## Length:32 Min. :10.40 Min. :4.000 Min. : 71.1
## Class :character 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8
## Mode :character Median :19.20 Median :6.000 Median :196.3
## Mean :20.09 Mean :6.188 Mean :230.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0
## Max. :33.90 Max. :8.000 Max. :472.0
## hp drat wt qsec
## Min. : 52.0 Min. :2.760 Min. :1.513 Min. :14.50
## 1st Qu.: 96.5 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89
## Median :123.0 Median :3.695 Median :3.325 Median :17.71
## Mean :146.7 Mean :3.597 Mean :3.217 Mean :17.85
## 3rd Qu.:180.0 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90
## Max. :335.0 Max. :4.930 Max. :5.424 Max. :22.90
## vs am gear carb
## Min. :0.0000 Min. :0.0000 Min. :3.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
## Median :0.0000 Median :0.0000 Median :4.000 Median :2.000
## Mean :0.4375 Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :1.0000 Max. :5.000 Max. :8.000
#Convert the 'am' column to logical
df$am <- as.logical(df$am)
str(df$am)
## logi [1:32] TRUE TRUE TRUE FALSE FALSE FALSE ...
#Scatter Plot
plot(df$hp, df$mpg,
xlab = "Horsepower (hp)",
ylab = "Miles per Gallon (mpg)",
main = "Scatter Plot of hp vs. mpg")
cylinder_counts <- table(df$cyl)
#Create Bar plot
barplot(cylinder_counts,
main = "Distribution of Cars by Number of Cylinders",
xlab = "Number of Cylinders",
ylab = "Count",
col = "skyblue",
border = "black")
#Histogram
hist(df$mpg,
main = "Histogram of Miles per Gallon (mpg)",
xlab = "Miles per Gallon (mpg)",
ylab = "Frequency",
col = "skyblue",
border = "black",
breaks = 10) # Adjust the number of bins as needed