library('tidyverse')
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.0
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
#this is loading data from my machine using relative path
carData = read.csv('mtcars.csv')
#loads data into a data frame
head(carData,5)
## X mpg cyl disp hp drat wt qsec vs am gear carb
## 1 Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## 2 Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## 3 Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## 4 Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## 5 Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
#used to print structure of the data
str(carData)
## 'data.frame': 32 obs. of 12 variables:
## $ X : chr "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : int 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : int 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : int 0 0 1 1 0 1 0 1 1 1 ...
## $ am : int 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: int 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: int 4 4 1 1 2 1 4 2 2 4 ...
#used to print the column names of the data frame
colnames(carData)
## [1] "X" "mpg" "cyl" "disp" "hp" "drat" "wt" "qsec" "vs" "am"
## [11] "gear" "carb"
# print number of rows an columns
nrow(carData)
## [1] 32
ncol(carData)
## [1] 12
new code block
#mutate() is used to add new columents(features or variables) pr modify currrent ones
# add a new columns called cyltype
carData %>% mutate(cyltype ='High')
## X mpg cyl disp hp drat wt qsec vs am gear carb
## 1 Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## 2 Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## 3 Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## 4 Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 5 Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## 6 Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## 7 Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## 8 Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 9 Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## 10 Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## 11 Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## 12 Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## 13 Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## 14 Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## 15 Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## 16 Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## 17 Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## 18 Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## 19 Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## 20 Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## 21 Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## 22 Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## 23 AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## 24 Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## 25 Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## 26 Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## 27 Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## 28 Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## 29 Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## 30 Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## 31 Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## 32 Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
## cyltype
## 1 High
## 2 High
## 3 High
## 4 High
## 5 High
## 6 High
## 7 High
## 8 High
## 9 High
## 10 High
## 11 High
## 12 High
## 13 High
## 14 High
## 15 High
## 16 High
## 17 High
## 18 High
## 19 High
## 20 High
## 21 High
## 22 High
## 23 High
## 24 High
## 25 High
## 26 High
## 27 High
## 28 High
## 29 High
## 30 High
## 31 High
## 32 High
carData %>% mutate(cyltype =ifelse(cyl>6,'High','Low'))
## X mpg cyl disp hp drat wt qsec vs am gear carb
## 1 Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## 2 Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## 3 Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## 4 Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 5 Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## 6 Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## 7 Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## 8 Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 9 Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## 10 Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## 11 Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## 12 Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## 13 Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## 14 Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## 15 Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## 16 Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## 17 Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## 18 Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## 19 Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## 20 Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## 21 Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## 22 Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## 23 AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## 24 Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## 25 Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## 26 Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## 27 Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## 28 Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## 29 Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## 30 Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## 31 Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## 32 Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
## cyltype
## 1 Low
## 2 Low
## 3 Low
## 4 Low
## 5 High
## 6 Low
## 7 High
## 8 Low
## 9 Low
## 10 Low
## 11 Low
## 12 High
## 13 High
## 14 High
## 15 High
## 16 High
## 17 High
## 18 Low
## 19 Low
## 20 Low
## 21 Low
## 22 High
## 23 High
## 24 High
## 25 High
## 26 Low
## 27 Low
## 28 Low
## 29 High
## 30 Low
## 31 High
## 32 Low
#at this moment the proginial data frame is not changed, , it doesnt have the newly added column
#meaningn this change is only temporary
#add a new column called wtton
carData %>% mutate (wtton = 0.45*wt)
## X mpg cyl disp hp drat wt qsec vs am gear carb
## 1 Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## 2 Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## 3 Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## 4 Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 5 Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## 6 Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## 7 Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## 8 Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 9 Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## 10 Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## 11 Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## 12 Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## 13 Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## 14 Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## 15 Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## 16 Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## 17 Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## 18 Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## 19 Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## 20 Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## 21 Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## 22 Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## 23 AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## 24 Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## 25 Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## 26 Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## 27 Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## 28 Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## 29 Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## 30 Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## 31 Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## 32 Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
## wtton
## 1 1.17900
## 2 1.29375
## 3 1.04400
## 4 1.44675
## 5 1.54800
## 6 1.55700
## 7 1.60650
## 8 1.43550
## 9 1.41750
## 10 1.54800
## 11 1.54800
## 12 1.83150
## 13 1.67850
## 14 1.70100
## 15 2.36250
## 16 2.44080
## 17 2.40525
## 18 0.99000
## 19 0.72675
## 20 0.82575
## 21 1.10925
## 22 1.58400
## 23 1.54575
## 24 1.72800
## 25 1.73025
## 26 0.87075
## 27 0.96300
## 28 0.68085
## 29 1.42650
## 30 1.24650
## 31 1.60650
## 32 1.25100
#adding a new columne to an exisitng data frame
carData.new = carData %>% mutate(cyltype=ifelse(cyl>6,'High','Low'), wtton=0.45*wt)
str(carData.new)
## 'data.frame': 32 obs. of 14 variables:
## $ X : chr "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : int 6 6 4 6 8 6 8 4 4 6 ...
## $ disp : num 160 160 108 258 360 ...
## $ hp : int 110 110 93 110 175 105 245 62 95 123 ...
## $ drat : num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec : num 16.5 17 18.6 19.4 17 ...
## $ vs : int 0 0 1 1 0 1 0 1 1 1 ...
## $ am : int 1 1 1 0 0 0 0 0 0 0 ...
## $ gear : int 4 4 4 3 3 3 3 4 4 4 ...
## $ carb : int 4 4 1 1 2 1 4 2 2 4 ...
## $ cyltype: chr "Low" "Low" "Low" "Low" ...
## $ wtton : num 1.18 1.29 1.04 1.45 1.55 ...
#summarize basically collapses all rows of a sample and returns a summary of the information
#e.g mean weigh of car
#meanweight is the variable getting the mean of the summarized value
carData.new %>% summarize(meanweight = mean(wtton))
## meanweight
## 1 1.447763
carData.new %>% summarize(meanweight = mean(wtton), mean(disp))
## meanweight mean(disp)
## 1 1.447763 230.7219
#group_by() function is used to group samples according to the features
#group cars according to cyltype nd calculate mean weigh and mean displacement
carData.new %>% group_by(cyltype) %>%
summarize(meanweight = mean(wtton), mean(disp))
## # A tibble: 2 × 3
## cyltype meanweight `mean(disp)`
## <chr> <dbl> <dbl>
## 1 High 1.80 353.
## 2 Low 1.17 136.
#filter() function is used to retain samples satisfying a specific condition
#filter cars that weigh more then 2 tons and have more that 4 cylinders
carData.new %>% filter(wtton>2 & cyl>4)
## X mpg cyl disp hp drat wt qsec vs am gear carb
## 1 Cadillac Fleetwood 10.4 8 472 205 2.93 5.250 17.98 0 0 3 4
## 2 Lincoln Continental 10.4 8 460 215 3.00 5.424 17.82 0 0 3 4
## 3 Chrysler Imperial 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4
## cyltype wtton
## 1 High 2.36250
## 2 High 2.44080
## 3 High 2.40525
#select() function is used to retain specific features
#select only the feature wtton
carData.new %>% select(wtton)
## wtton
## 1 1.17900
## 2 1.29375
## 3 1.04400
## 4 1.44675
## 5 1.54800
## 6 1.55700
## 7 1.60650
## 8 1.43550
## 9 1.41750
## 10 1.54800
## 11 1.54800
## 12 1.83150
## 13 1.67850
## 14 1.70100
## 15 2.36250
## 16 2.44080
## 17 2.40525
## 18 0.99000
## 19 0.72675
## 20 0.82575
## 21 1.10925
## 22 1.58400
## 23 1.54575
## 24 1.72800
## 25 1.73025
## 26 0.87075
## 27 0.96300
## 28 0.68085
## 29 1.42650
## 30 1.24650
## 31 1.60650
## 32 1.25100
#deselect
carData.new %>% select(-wtton)
## X mpg cyl disp hp drat wt qsec vs am gear carb
## 1 Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## 2 Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## 3 Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## 4 Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 5 Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## 6 Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## 7 Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## 8 Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 9 Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## 10 Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## 11 Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## 12 Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## 13 Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## 14 Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## 15 Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## 16 Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## 17 Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## 18 Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## 19 Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## 20 Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## 21 Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## 22 Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## 23 AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## 24 Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## 25 Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## 26 Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## 27 Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## 28 Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## 29 Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## 30 Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## 31 Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## 32 Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
## cyltype
## 1 Low
## 2 Low
## 3 Low
## 4 Low
## 5 High
## 6 Low
## 7 High
## 8 Low
## 9 Low
## 10 Low
## 11 Low
## 12 High
## 13 High
## 14 High
## 15 High
## 16 High
## 17 High
## 18 Low
## 19 Low
## 20 Low
## 21 Low
## 22 High
## 23 High
## 24 High
## 25 High
## 26 Low
## 27 Low
## 28 Low
## 29 High
## 30 Low
## 31 High
## 32 Low
#select multipe features
carData.new %>% select(cyltype,wtton)
## cyltype wtton
## 1 Low 1.17900
## 2 Low 1.29375
## 3 Low 1.04400
## 4 Low 1.44675
## 5 High 1.54800
## 6 Low 1.55700
## 7 High 1.60650
## 8 Low 1.43550
## 9 Low 1.41750
## 10 Low 1.54800
## 11 Low 1.54800
## 12 High 1.83150
## 13 High 1.67850
## 14 High 1.70100
## 15 High 2.36250
## 16 High 2.44080
## 17 High 2.40525
## 18 Low 0.99000
## 19 Low 0.72675
## 20 Low 0.82575
## 21 Low 1.10925
## 22 High 1.58400
## 23 High 1.54575
## 24 High 1.72800
## 25 High 1.73025
## 26 Low 0.87075
## 27 Low 0.96300
## 28 Low 0.68085
## 29 High 1.42650
## 30 Low 1.24650
## 31 High 1.60650
## 32 Low 1.25100
new code block for visualizations
#initiate the ggplot() function binding to the data frame
ggplot(data=carData)
# carplot is called the plot object and creates an empty canvas
carplot = ggplot(data=carData)
#aes means aesthetics ans is used to specify the aesthetic mappong, that is, which variables should be plotted
carplot = ggplot(data=carData, aes(x=wt,y=disp))
#use the geom_type fuctions to add geometic elements
carplot = carplot + geom_point()
#add labels and title
carplot = carplot + labs(x='Weight (1000 lbs)', y='Displacement (cu. in)', title='Weight vs Displacemnt')
carplot