Home work no.6

  1. Choose two methods from Multidimensional data: direct methods.

  2. Make visualizations.

  3. Prepare presentation and upload.

library(reshape2)
library(readr)
library(tidyverse)
library(DataExplorer)
library(treemap)
library(scales)
library(dplyr)
library(ggplot2)
library(philentropy)
library(hrbrthemes)
library(GGally)
library(viridis)
library(psych)
cars <- read_csv("C:/Users/Tautvydas/Desktop/datavisualization/train.csv")
data <- as.data.frame(cars)
data <- data[complete.cases(data), ]  #Deleting NA values
head(data)
##         ID Price Levy Manufacturer   Model Prod.year  Category Leatherinterior
## 1 45654403 13328 1399        LEXUS   RX450      2010      Jeep             Yes
## 2 44731507 16621 1018    CHEVROLET Equinox      2011      Jeep              No
## 3 45774419  8467    0        HONDA     FIT      2006 Hatchback              No
## 4 45769185  3607  862         FORD  Escape      2011      Jeep             Yes
## 5 45809263 11726  446        HONDA     FIT      2014 Hatchback             Yes
## 6 45802912 39493  891      HYUNDAI SantaFE      2016      Jeep             Yes
##   Fueltype Enginevolume Mileage Cylinders Gearboxtype Drivewheels  Doors
## 1   Hybrid          3.5  186005         6   Automatic         4x4 040May
## 2   Petrol            3  192000         6   Tiptronic         4x4 040May
## 3   Petrol          1.3  200000         4    Variator       Front 040May
## 4   Hybrid          2.5  168966         4   Automatic         4x4 040May
## 5   Petrol          1.3   91901         4   Automatic       Front 040May
## 6   Diesel            2  160931         4   Automatic       Front 040May
##             Wheel  Color Airbags
## 1       Leftwheel Silver      12
## 2       Leftwheel  Black       8
## 3 Right0handdrive  Black       2
## 4       Leftwheel  White       0
## 5       Leftwheel Silver       4
## 6       Leftwheel  White       4
newdata <-data %>%
  filter( Prod.year >= 2000 & Price < 50000 & Price > 300) 
hyundai    <- subset(newdata, Manufacturer == "HYUNDAI")
toyota     <- subset(newdata, Manufacturer == "TOYOTA" )
mercedez   <- subset(newdata, Manufacturer == "MERCEDES0BENZ")
chevrolet  <- subset(newdata, Manufacturer == "CHEVROLET")
ford       <- subset(newdata, Manufacturer == "FORD")
bmw        <- subset(newdata, Manufacturer == "BMW")
honda      <- subset(newdata, Manufacturer == "HONDA")
lexus      <- subset(newdata, Manufacturer == "LEXUS")
nissan     <- subset(newdata, Manufacturer == "NISSAN")
volkswagen <- subset(newdata, Manufacturer == "VOLKSWAGEN")


top10 <- rbind(hyundai, toyota, mercedez, chevrolet, ford, bmw, honda, lexus, nissan, volkswagen)
top10 <- select(top10, Price, Levy, Manufacturer, Prod.year, Category, Leatherinterior, Fueltype, Mileage, Cylinders, Gearboxtype, Color, Airbags)
head(top10)
##    Price Levy Manufacturer Prod.year Category Leatherinterior Fueltype Mileage
## 6  39493  891      HYUNDAI      2016     Jeep             Yes   Diesel  160931
## 8    549  751      HYUNDAI      2013    Sedan             Yes   Petrol  216118
## 14  7683  810      HYUNDAI      2016    Sedan             Yes   Petrol  121840
## 15 28382  810      HYUNDAI      2016    Sedan             Yes   Petrol   54317
## 16   549 2386      HYUNDAI      2006    Sedan             Yes   Petrol  295059
## 18 18826  531      HYUNDAI      2012    Sedan             Yes   Petrol  112645
##    Cylinders Gearboxtype  Color Airbags
## 6          4   Automatic  White       4
## 8          4   Automatic   Grey      12
## 14         4   Automatic   Blue      12
## 15         4   Automatic  White       4
## 16         6   Automatic   Blue      12
## 18         4   Automatic Silver       4

MOSAIC PLOTS

mosaic1 <- as.data.frame(table(data$Manufacturer))
treemap(mosaic1,
        index="Var1",
        vSize="Freq",
        type="index",
        title="Mosaic of cars manufacturers by popularity"
)

mosaic2 <- as.data.frame(table(data$Category))
treemap(mosaic2,
        index="Var1",
        vSize="Freq",
        type="index",
        palette = "Set1",
        title="Mosaic of cars category by popularity"
)

PARALLEL PLOT

coord <- select(top10, Price, Levy, Prod.year, Airbags, Gearboxtype)
ggparcoord(coord, columns = c(1:4), groupColumn = 5,alphaLines=0.6, title = "Parallel Coordinate plot for cars dataset")

Matrix of pair plots

pairs <- select(top10, Price, Levy, Prod.year, Cylinders, Airbags)
pairs.panels(pairs, 
             method = "pearson", # correlation method
             hist.col = "#00AFBB",
             density = TRUE,  # show density plots
             ellipses = TRUE, # show correlation ellipses
             main = "Pairs plot of cars dataset"
)