This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

#Data Understanding:
#read in file
df <- read.csv("mtcars.csv")
head(df)
#Prints dimensions
dim(df)
## [1] 32 12
#Print data structures
class(df)
## [1] "data.frame"
#Print data types of columns 
cat("Data types of columns:\n")
## Data types of columns:
str(df[c("model", "mpg", "hp", "am")])
## 'data.frame':    32 obs. of  4 variables:
##  $ model: chr  "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
##  $ mpg  : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ hp   : int  110 110 93 110 175 105 245 62 95 123 ...
##  $ am   : int  1 1 1 0 0 0 0 0 0 0 ...
#function to get some details of the dataset

summary(df)
##     model                mpg             cyl             disp      
##  Length:32          Min.   :10.40   Min.   :4.000   Min.   : 71.1  
##  Class :character   1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8  
##  Mode  :character   Median :19.20   Median :6.000   Median :196.3  
##                     Mean   :20.09   Mean   :6.188   Mean   :230.7  
##                     3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0  
##                     Max.   :33.90   Max.   :8.000   Max.   :472.0  
##        hp             drat             wt             qsec      
##  Min.   : 52.0   Min.   :2.760   Min.   :1.513   Min.   :14.50  
##  1st Qu.: 96.5   1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89  
##  Median :123.0   Median :3.695   Median :3.325   Median :17.71  
##  Mean   :146.7   Mean   :3.597   Mean   :3.217   Mean   :17.85  
##  3rd Qu.:180.0   3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90  
##  Max.   :335.0   Max.   :4.930   Max.   :5.424   Max.   :22.90  
##        vs               am              gear            carb      
##  Min.   :0.0000   Min.   :0.0000   Min.   :3.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :0.0000   Median :0.0000   Median :4.000   Median :2.000  
##  Mean   :0.4375   Mean   :0.4062   Mean   :3.688   Mean   :2.812  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :5.000   Max.   :8.000
#Convert the 'am' column to logical
df$am <- as.logical(df$am)
str(df$am)
##  logi [1:32] TRUE TRUE TRUE FALSE FALSE FALSE ...
#Scatter Plot
plot(df$hp, df$mpg, 
     xlab = "Horsepower (hp)", 
     ylab = "Miles per Gallon (mpg)",
     main = "Scatter Plot of hp vs. mpg")

cylinder_counts <- table(df$cyl)

#Create Bar plot
barplot(cylinder_counts, 
        main = "Distribution of Cars by Number of Cylinders",
        xlab = "Number of Cylinders",
        ylab = "Count",
        col = "skyblue",
        border = "black")

#Histogram
hist(df$mpg,
     main = "Histogram of Miles per Gallon (mpg)",
     xlab = "Miles per Gallon (mpg)",
     ylab = "Frequency",
     col = "skyblue",
     border = "black",
     breaks = 10)  # Adjust the number of bins as needed