1/13/2022

1.Load the library

library(tidyverse)
  ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
  ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
  ✓ tibble  3.1.3     ✓ dplyr   1.0.7
  ✓ tidyr   1.1.3     ✓ stringr 1.4.0
  ✓ readr   2.0.0     ✓ forcats 0.5.1
  ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
  x dplyr::filter() masks stats::filter()
  x dplyr::lag()    masks stats::lag()

2.Load the dataset

file <- '5G Smartphone Available in India.csv'
smartphone5g <- read_csv(file,show_col_types = FALSE)
smartphone5g %>% glimpse
  Rows: 183
  Columns: 15
  $ `product name`                <chr> "Vivo V23 5G", "Vivo V23 Pro 5G", "OnePl…
  $ `processor price`             <chr> "MediaTek Dimensity 920 MT6877T Processo…
  $ `camera specs rear`           <chr> "64 + 8 + 2 MP Triple Rear Camera", "108…
  $ `camera specs front`          <chr> "50+8 MP Dual Front Camera", "50+8 MP Du…
  $ `display size`                <chr> "6.44 inches AMOLED Display", "6.56 inch…
  $ `ram of phone`                <chr> "8 GB RAM", "8 GB RAM", "6 GB RAM", "6 G…
  $ storage                       <chr> "128 GB Storage", "128 GB Storage", "128…
  $ battery                       <chr> "4200 mAh Battery", "4300 mAh Battery", …
  $ `android version`             <chr> "Android v12 OS", "Android v12 OS", "And…
  $ `score by smartprice`         <dbl> NA, NA, 8.0, 7.8, NA, 7.5, 8.0, 7.3, 8.3…
  $ `first site`                  <chr> NA, NA, NA, "Amazon", NA, "Amazon", "Ama…
  $ `price in first site`         <dbl> NA, NA, NA, 16999, NA, 29390, 24999, 179…
  $ `second site`                 <chr> NA, NA, NA, "Flipkart", NA, "Flipkart", …
  $ `price in second site`        <dbl> NA, NA, NA, 18885, NA, 29990, 25999, NA,…
  $ `Price of product(available)` <dbl> 29990, 38990, 27999, 16999, 26999, 29390…

3.Check the null values in the dataset

sapply(smartphone5g,function(x) sum(is.na(x)))
                 product name             processor price 
                            0                           0 
            camera specs rear          camera specs front 
                            0                           0 
                 display size                ram of phone 
                            0                           0 
                      storage                     battery 
                            0                           0 
              android version         score by smartprice 
                            0                          29 
                   first site         price in first site 
                           56                          56 
                  second site        price in second site 
                           92                          92 
  Price of product(available) 
                            0

4.Summary score column to check average score

summary(smartphone5g$`score by smartprice`)
     Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
    7.000   7.800   8.000   7.973   8.200   8.800      29

Data source from the websites below

table(smartphone5g$`first site`) %>% names
  [1] "Amazon"   "Flipkart" "Tatacliq"

5.Replace null in score column with average score

smartphone5g$`score by smartprice`[is.na(smartphone5g$`score by smartprice`)] <- 7.973

6.Convert columns with character to factors

smartphone5g <- smartphone5g %>% mutate_if(is.character,as.factor)

7.Cut price at interval 10,000INR

smartphone5g$price_range <- 
    cut(smartphone5g$`Price of product(available)`,
        breaks=seq(1e+04,2e+05,1e+04))

8.Plot01_Does the higher price mean the better smartphone?

9.Plot02_5G Smartphone Price Overview by Storage and RAM in India

10.Plot03_5G Smartphone Average Price by OS/RAM/Storage in India

  • Extract brand from product name
smartphone5g %>% select(c(`product name`)) %>% head
  # A tibble: 6 × 1
    `product name`          
    <fct>                   
  1 Vivo V23 5G             
  2 Vivo V23 Pro 5G         
  3 OnePlus Nord 2          
  4 Xiaomi Redmi Note 11T 5G
  5 Xiaomi 11i HyperCharge  
  6 OPPO Reno 6
smartphone5g$brand<- 
    sapply(strsplit(as.character(smartphone5g$`product name`),' '),'[[',1)
  • Lowercase the brands
smartphone5g$brand <- tolower(smartphone5g$brand)
smartphone5g$brand[1:5]
  [1] "vivo"    "vivo"    "oneplus" "xiaomi"  "xiaomi"

11.Plot04_5G Smartphone Brand Overview in India

12.Extract brand information from processor column

smartphone5g$`processor price` %>% head
  [1] MediaTek Dimensity 920 MT6877T Processor
  [2] MediaTek Dimensity 1200 MT6893 Processor
  [3] MediaTek Dimensity 1200 MT6893 Processor
  [4] MediaTek Dimensity 810 MT6833 Processor 
  [5] MediaTek Dimensity 920 MT6877T Processor
  [6] MediaTek Dimensity 900 MT6877 Processor 
  30 Levels: Apple A14 Bionic Processor ... Samsung Exynos 9 Octa 990 Processor
smartphone5g$processor_brand<- 
    sapply(strsplit(as.character(smartphone5g$`processor price`),' '),'[[',1)
smartphone5g$processor_brand %>% head
  [1] "MediaTek" "MediaTek" "MediaTek" "MediaTek" "MediaTek" "MediaTek"

13.Plot05_5G Smartphone Processor Brand Overview in India