Choose two methods from Multidimensional data: direct methods.
Make visualizations.
Prepare presentation and upload.
library(reshape2)
library(readr)
library(tidyverse)
library(DataExplorer)
library(treemap)
library(scales)
library(dplyr)
library(ggplot2)
library(philentropy)
library(hrbrthemes)
library(GGally)
library(viridis)
library(psych)
cars <- read_csv("C:/Users/Tautvydas/Desktop/datavisualization/train.csv")
data <- as.data.frame(cars)
data <- data[complete.cases(data), ] #Deleting NA values
head(data)
## ID Price Levy Manufacturer Model Prod.year Category Leatherinterior
## 1 45654403 13328 1399 LEXUS RX450 2010 Jeep Yes
## 2 44731507 16621 1018 CHEVROLET Equinox 2011 Jeep No
## 3 45774419 8467 0 HONDA FIT 2006 Hatchback No
## 4 45769185 3607 862 FORD Escape 2011 Jeep Yes
## 5 45809263 11726 446 HONDA FIT 2014 Hatchback Yes
## 6 45802912 39493 891 HYUNDAI SantaFE 2016 Jeep Yes
## Fueltype Enginevolume Mileage Cylinders Gearboxtype Drivewheels Doors
## 1 Hybrid 3.5 186005 6 Automatic 4x4 040May
## 2 Petrol 3 192000 6 Tiptronic 4x4 040May
## 3 Petrol 1.3 200000 4 Variator Front 040May
## 4 Hybrid 2.5 168966 4 Automatic 4x4 040May
## 5 Petrol 1.3 91901 4 Automatic Front 040May
## 6 Diesel 2 160931 4 Automatic Front 040May
## Wheel Color Airbags
## 1 Leftwheel Silver 12
## 2 Leftwheel Black 8
## 3 Right0handdrive Black 2
## 4 Leftwheel White 0
## 5 Leftwheel Silver 4
## 6 Leftwheel White 4
newdata <-data %>%
filter( Prod.year >= 2000 & Price < 50000 & Price > 300)
hyundai <- subset(newdata, Manufacturer == "HYUNDAI")
toyota <- subset(newdata, Manufacturer == "TOYOTA" )
mercedez <- subset(newdata, Manufacturer == "MERCEDES0BENZ")
chevrolet <- subset(newdata, Manufacturer == "CHEVROLET")
ford <- subset(newdata, Manufacturer == "FORD")
bmw <- subset(newdata, Manufacturer == "BMW")
honda <- subset(newdata, Manufacturer == "HONDA")
lexus <- subset(newdata, Manufacturer == "LEXUS")
nissan <- subset(newdata, Manufacturer == "NISSAN")
volkswagen <- subset(newdata, Manufacturer == "VOLKSWAGEN")
top10 <- rbind(hyundai, toyota, mercedez, chevrolet, ford, bmw, honda, lexus, nissan, volkswagen)
top10 <- select(top10, Price, Levy, Manufacturer, Prod.year, Category, Leatherinterior, Fueltype, Mileage, Cylinders, Gearboxtype, Color, Airbags)
head(top10)
## Price Levy Manufacturer Prod.year Category Leatherinterior Fueltype Mileage
## 6 39493 891 HYUNDAI 2016 Jeep Yes Diesel 160931
## 8 549 751 HYUNDAI 2013 Sedan Yes Petrol 216118
## 14 7683 810 HYUNDAI 2016 Sedan Yes Petrol 121840
## 15 28382 810 HYUNDAI 2016 Sedan Yes Petrol 54317
## 16 549 2386 HYUNDAI 2006 Sedan Yes Petrol 295059
## 18 18826 531 HYUNDAI 2012 Sedan Yes Petrol 112645
## Cylinders Gearboxtype Color Airbags
## 6 4 Automatic White 4
## 8 4 Automatic Grey 12
## 14 4 Automatic Blue 12
## 15 4 Automatic White 4
## 16 6 Automatic Blue 12
## 18 4 Automatic Silver 4
mosaic1 <- as.data.frame(table(data$Manufacturer))
treemap(mosaic1,
index="Var1",
vSize="Freq",
type="index",
title="Mosaic of cars manufacturers by popularity"
)
mosaic2 <- as.data.frame(table(data$Category))
treemap(mosaic2,
index="Var1",
vSize="Freq",
type="index",
palette = "Set1",
title="Mosaic of cars category by popularity"
)
coord <- select(top10, Price, Levy, Prod.year, Airbags, Gearboxtype)
ggparcoord(coord, columns = c(1:4), groupColumn = 5,alphaLines=0.6, title = "Parallel Coordinate plot for cars dataset")
pairs <- select(top10, Price, Levy, Prod.year, Cylinders, Airbags)
pairs.panels(pairs,
method = "pearson", # correlation method
hist.col = "#00AFBB",
density = TRUE, # show density plots
ellipses = TRUE, # show correlation ellipses
main = "Pairs plot of cars dataset"
)