## Univariate Analysis/Descriptive Analysis
##Univariate/Multivariate
##Univariate: When we use one variable to describe a person, place, or thing. (e.g. Heights of individuals)
#Multivariate: When we use two or more variables to measure a person, place or thing. Variables may or may not be dependent on each other.(Bivariate e.g. name and marks, Multivariate: name, caste and marks)
#Cross-sectional data/Time-ordered data (business, social sciences)
#Cross-Sectional: Measurements taken at one time period
#E.g. (caste and per capita incomes)
#Time-Ordered: Measurements taken over time in chronological sequence. e.g. years and per capita income
##The type of data will dictate (in part) the appropriate data-analysis method.
## Getting curent path
getwd()
## [1] "D:/R Working Directory"
## Setting Path
setwd("D:/R Working Directory")
## reading the csv
sampledata<-read.csv(file="used cars.csv",stringsAsFactors = FALSE,header = TRUE)
## printing the sample data
print(sampledata)
## year model price mileage color transmission
## 1 2011 SEL 21992 7413 Yellow AUTO
## 2 2011 SEL 20995 10926 Gray AUTO
## 3 2011 SEL 19995 7351 Silver AUTO
## 4 2011 SEL 17809 11613 Gray AUTO
## 5 2012 SE 17500 8367 White AUTO
## 6 2010 SEL 17495 25125 Silver AUTO
## 7 2011 SEL 17000 27393 Blue AUTO
## 8 2010 SEL 16995 21026 Silver AUTO
## 9 2011 SES 16995 32655 Silver AUTO
## 10 2010 SES 16995 36116 Silver AUTO
## 11 2010 SES 16995 40539 Black AUTO
## 12 2011 SES 16992 9199 Silver AUTO
## 13 2011 SEL 16950 9388 Green AUTO
## 14 2010 SES 16950 32058 Red AUTO
## 15 2011 SE 16000 15367 White AUTO
## 16 2011 SES 15999 16368 Blue AUTO
## 17 2010 SEL 15999 19926 Silver AUTO
## 18 2010 SES 15995 36049 Silver AUTO
## 19 2011 SEL 15992 11662 Blue AUTO
## 20 2011 SEL 15992 32069 Silver AUTO
## 21 2010 SES 15988 16035 Silver MANUAL
## 22 2010 SEL 15980 39943 White AUTO
## 23 2011 SE 15899 36685 Silver AUTO
## 24 2010 SEL 15889 24920 Black AUTO
## 25 2009 SEL 15688 20019 Blue AUTO
## 26 2010 SE 15500 29338 Blue AUTO
## 27 2010 SE 15499 7784 Black AUTO
## 28 2010 SE 15499 35636 Black AUTO
## 29 2010 SES 15298 22029 Gray AUTO
## 30 2009 SEL 14999 33107 Silver AUTO
## 31 2010 SES 14999 36306 Red AUTO
## 32 2009 SE 14995 34419 Black MANUAL
## 33 2011 SE 14992 4867 Black AUTO
## 34 2011 SEL 14992 18948 Black AUTO
## 35 2009 SES 14992 24030 Red AUTO
## 36 2010 SEL 14990 33036 Black AUTO
## 37 2011 SE 14989 23967 White AUTO
## 38 2010 SE 14906 37905 Silver AUTO
## 39 2010 SE 14900 28955 White AUTO
## 40 2010 SE 14893 11165 White AUTO
## 41 2010 SES 14761 44813 Black AUTO
## 42 2010 SES 14699 36469 Gray AUTO
## 43 2010 SES 14677 22143 Black MANUAL
## 44 2009 SES 14549 34046 Silver AUTO
## 45 2010 SE 14499 32703 Red AUTO
## 46 2010 SES 14495 35894 Silver AUTO
## 47 2010 SE 14495 38275 Black AUTO
## 48 2010 SE 14480 24855 Blue AUTO
## 49 2009 SEL 14477 29501 Gray MANUAL
## 50 2009 SEL 14355 35394 Red AUTO
## 51 2010 SE 14299 36447 Black AUTO
## 52 2010 SES 14275 35318 Black AUTO
## 53 2010 SES 14000 24929 Silver AUTO
## 54 2009 SE 13999 23785 Red AUTO
## 55 2010 SE 13997 15167 Black MANUAL
## 56 2010 SE 13995 13541 Silver AUTO
## 57 2010 SE 13995 20278 Black MANUAL
## 58 2009 SES 13995 46126 Black AUTO
## 59 2009 SES 13995 53733 Silver AUTO
## 60 2009 SES 13992 21108 Silver AUTO
## 61 2010 SE 13992 21721 Green AUTO
## 62 2010 SES 13992 26716 Gray MANUAL
## 63 2009 SES 13992 26887 Black AUTO
## 64 2009 SE 13991 36252 Silver MANUAL
## 65 2009 SE 13950 9450 Black AUTO
## 66 2010 SE 13950 31414 Black AUTO
## 67 2010 SE 13950 37185 Blue AUTO
## 68 2010 SE 13895 48174 Gray AUTO
## 69 2009 SE 13888 50533 White AUTO
## 70 2009 SE 13845 36713 Blue AUTO
## 71 2009 SES 13799 34888 Black AUTO
## 72 2009 SES 13742 38380 Black AUTO
## 73 2010 SEL 13687 35574 Gray AUTO
## 74 2009 SEL 13663 27528 Silver AUTO
## 75 2010 SES 13599 33302 Red AUTO
## 76 2009 SEL 13584 43369 Red AUTO
## 77 2009 SES 13425 64055 Black AUTO
## 78 2010 SE 13384 41342 Gray AUTO
## 79 2010 SE 13383 34503 Black AUTO
## 80 2010 SE 13350 16573 Blue AUTO
## 81 2009 SES 12999 32403 Blue AUTO
## 82 2009 SE 12998 34846 Blue AUTO
## 83 2007 SE 12997 39665 Red AUTO
## 84 2010 SE 12995 21325 Black AUTO
## 85 2010 SE 12995 32743 Black MANUAL
## 86 2010 SE 12995 40058 White MANUAL
## 87 2009 SE 12995 42325 Blue AUTO
## 88 2009 SE 12995 44518 Red AUTO
## 89 2009 SE 12995 53902 Gray AUTO
## 90 2008 SE 12995 127327 Red AUTO
## 91 2009 SE 12992 27136 Gray AUTO
## 92 2009 SES 12990 45813 Silver AUTO
## 93 2009 SE 12988 31538 Gray AUTO
## 94 2010 SE 12849 29517 Silver AUTO
## 95 2010 SE 12780 35871 Black AUTO
## 96 2008 SE 12777 49787 Black MANUAL
## 97 2008 SES 12704 36323 Blue AUTO
## 98 2009 SES 12595 39211 Blue AUTO
## 99 2009 SE 12507 44789 Gray AUTO
## 100 2008 SE 12500 45996 White MANUAL
## 101 2009 SE 12500 54988 White MANUAL
## 102 2009 SE 12280 29288 Red AUTO
## 103 2009 SE 11999 36124 Blue AUTO
## 104 2009 SE 11992 32559 Black MANUAL
## 105 2009 SES 11984 59048 Black AUTO
## 106 2009 SE 11980 55170 Red AUTO
## 107 2010 SE 11792 39722 Green AUTO
## 108 2008 SE 11754 38286 Black AUTO
## 109 2008 SES 11749 57341 Red AUTO
## 110 2008 SES 11495 82221 Silver AUTO
## 111 2008 SE 11450 85229 Red MANUAL
## 112 2009 SES 10995 42834 Red AUTO
## 113 2005 SES 10995 69415 Blue AUTO
## 114 2009 SEL 10995 78264 Gray AUTO
## 115 2009 SE 10979 60709 Red AUTO
## 116 2008 SE 10955 39643 Gray AUTO
## 117 2009 SE 10955 40180 Gold AUTO
## 118 2008 SE 10836 40330 Green MANUAL
## 119 2007 SES 10815 77231 Red AUTO
## 120 2007 SE 10770 72937 Silver MANUAL
## 121 2010 SE 10717 64199 Black AUTO
## 122 2007 SES 10000 63926 Red AUTO
## 123 2007 SES 9999 74427 Silver AUTO
## 124 2007 SES 9999 78948 Black MANUAL
## 125 2006 SE 9995 51311 Silver AUTO
## 126 2008 SE 9995 95364 White AUTO
## 127 2008 SE 9992 74109 White AUTO
## 128 2007 SE 9651 63296 Blue AUTO
## 129 2007 SES 9000 80605 Red AUTO
## 130 2006 SE 8999 49656 Silver AUTO
## 131 2007 SE 8996 48652 Silver MANUAL
## 132 2006 SE 8800 71331 White AUTO
## 133 2008 SE 8495 106171 Black AUTO
## 134 2008 SE 8494 68901 Silver AUTO
## 135 2009 SE 8480 70036 White MANUAL
## 136 2007 SES 7999 81596 Yellow MANUAL
## 137 2006 SES 7995 35000 Black MANUAL
## 138 2006 SES 7995 97987 Red AUTO
## 139 2003 SES 7900 96000 White AUTO
## 140 2005 SES 7488 59013 Red AUTO
## 141 2004 SE 6999 105714 Silver AUTO
## 142 2007 SE 6995 86862 White AUTO
## 143 2000 SE 6980 60161 Green AUTO
## 144 2004 SES 6980 101130 Gray AUTO
## 145 2004 SES 6950 119720 Black AUTO
## 146 2006 SES 6200 95000 Silver AUTO
## 147 2002 SE 5995 87003 Red AUTO
## 148 2000 SE 5980 96841 Red AUTO
## 149 2001 SE 4899 151479 Yellow AUTO
## 150 2000 SE 3800 109259 Red AUTO
## Compactly display the structure of an sample R object
str(sampledata)
## 'data.frame': 150 obs. of 6 variables:
## $ year : int 2011 2011 2011 2011 2012 2010 2011 2010 2011 2010 ...
## $ model : chr "SEL" "SEL" "SEL" "SEL" ...
## $ price : int 21992 20995 19995 17809 17500 17495 17000 16995 16995 16995 ...
## $ mileage : int 7413 10926 7351 11613 8367 25125 27393 21026 32655 36116 ...
## $ color : chr "Yellow" "Gray" "Silver" "Gray" ...
## $ transmission: chr "AUTO" "AUTO" "AUTO" "AUTO" ...
## retrieving the top records
head(sampledata)
## year model price mileage color transmission
## 1 2011 SEL 21992 7413 Yellow AUTO
## 2 2011 SEL 20995 10926 Gray AUTO
## 3 2011 SEL 19995 7351 Silver AUTO
## 4 2011 SEL 17809 11613 Gray AUTO
## 5 2012 SE 17500 8367 White AUTO
## 6 2010 SEL 17495 25125 Silver AUTO
## retrieving one column to see either is a factor or character
sampledata$transmission
## [1] "AUTO" "AUTO" "AUTO" "AUTO" "AUTO" "AUTO" "AUTO"
## [8] "AUTO" "AUTO" "AUTO" "AUTO" "AUTO" "AUTO" "AUTO"
## [15] "AUTO" "AUTO" "AUTO" "AUTO" "AUTO" "AUTO" "MANUAL"
## [22] "AUTO" "AUTO" "AUTO" "AUTO" "AUTO" "AUTO" "AUTO"
## [29] "AUTO" "AUTO" "AUTO" "MANUAL" "AUTO" "AUTO" "AUTO"
## [36] "AUTO" "AUTO" "AUTO" "AUTO" "AUTO" "AUTO" "AUTO"
## [43] "MANUAL" "AUTO" "AUTO" "AUTO" "AUTO" "AUTO" "MANUAL"
## [50] "AUTO" "AUTO" "AUTO" "AUTO" "AUTO" "MANUAL" "AUTO"
## [57] "MANUAL" "AUTO" "AUTO" "AUTO" "AUTO" "MANUAL" "AUTO"
## [64] "MANUAL" "AUTO" "AUTO" "AUTO" "AUTO" "AUTO" "AUTO"
## [71] "AUTO" "AUTO" "AUTO" "AUTO" "AUTO" "AUTO" "AUTO"
## [78] "AUTO" "AUTO" "AUTO" "AUTO" "AUTO" "AUTO" "AUTO"
## [85] "MANUAL" "MANUAL" "AUTO" "AUTO" "AUTO" "AUTO" "AUTO"
## [92] "AUTO" "AUTO" "AUTO" "AUTO" "MANUAL" "AUTO" "AUTO"
## [99] "AUTO" "MANUAL" "MANUAL" "AUTO" "AUTO" "MANUAL" "AUTO"
## [106] "AUTO" "AUTO" "AUTO" "AUTO" "AUTO" "MANUAL" "AUTO"
## [113] "AUTO" "AUTO" "AUTO" "AUTO" "AUTO" "MANUAL" "AUTO"
## [120] "MANUAL" "AUTO" "AUTO" "AUTO" "MANUAL" "AUTO" "AUTO"
## [127] "AUTO" "AUTO" "AUTO" "AUTO" "MANUAL" "AUTO" "AUTO"
## [134] "AUTO" "MANUAL" "MANUAL" "MANUAL" "AUTO" "AUTO" "AUTO"
## [141] "AUTO" "AUTO" "AUTO" "AUTO" "AUTO" "AUTO" "AUTO"
## [148] "AUTO" "AUTO" "AUTO"
## converting character to factor by using Factor() function
sampledata$transmission<- factor(sampledata$transmission)
head(sampledata)
## year model price mileage color transmission
## 1 2011 SEL 21992 7413 Yellow AUTO
## 2 2011 SEL 20995 10926 Gray AUTO
## 3 2011 SEL 19995 7351 Silver AUTO
## 4 2011 SEL 17809 11613 Gray AUTO
## 5 2012 SE 17500 8367 White AUTO
## 6 2010 SEL 17495 25125 Silver AUTO
str(sampledata)
## 'data.frame': 150 obs. of 6 variables:
## $ year : int 2011 2011 2011 2011 2012 2010 2011 2010 2011 2010 ...
## $ model : chr "SEL" "SEL" "SEL" "SEL" ...
## $ price : int 21992 20995 19995 17809 17500 17495 17000 16995 16995 16995 ...
## $ mileage : int 7413 10926 7351 11613 8367 25125 27393 21026 32655 36116 ...
## $ color : chr "Yellow" "Gray" "Silver" "Gray" ...
## $ transmission: Factor w/ 2 levels "AUTO","MANUAL": 1 1 1 1 1 1 1 1 1 1 ...
## displayS the structure of an sample R object
head(sampledata)
## year model price mileage color transmission
## 1 2011 SEL 21992 7413 Yellow AUTO
## 2 2011 SEL 20995 10926 Gray AUTO
## 3 2011 SEL 19995 7351 Silver AUTO
## 4 2011 SEL 17809 11613 Gray AUTO
## 5 2012 SE 17500 8367 White AUTO
## 6 2010 SEL 17495 25125 Silver AUTO
str(sampledata)
## 'data.frame': 150 obs. of 6 variables:
## $ year : int 2011 2011 2011 2011 2012 2010 2011 2010 2011 2010 ...
## $ model : chr "SEL" "SEL" "SEL" "SEL" ...
## $ price : int 21992 20995 19995 17809 17500 17495 17000 16995 16995 16995 ...
## $ mileage : int 7413 10926 7351 11613 8367 25125 27393 21026 32655 36116 ...
## $ color : chr "Yellow" "Gray" "Silver" "Gray" ...
## $ transmission: Factor w/ 2 levels "AUTO","MANUAL": 1 1 1 1 1 1 1 1 1 1 ...
## to see the count of Auto and MANUAL
table(sampledata$transmission)
##
## AUTO MANUAL
## 128 22
## to see the length
length(sampledata$transmission)
## [1] 150
## divide frequencies
table(sampledata$transmission)/length(sampledata$transmission)
##
## AUTO MANUAL
## 0.8533333 0.1466667
## To represent in pie chart
pie(table(sampledata$transmission))

## To represent in barplot
barplot(table(sampledata$transmission))

## to see the Univariate Analysis/Descriptive Statistics like mean ,median,mode,min,max,range,standard deviation
head(sampledata$mileage)
## [1] 7413 10926 7351 11613 8367 25125
mean(sampledata$mileage)
## [1] 44260.65
median(sampledata$mileage)
## [1] 36385
mode(sampledata$mileage)
## [1] "numeric"
var(sampledata$mileage)
## [1] 728033954
sd(sampledata$mileage)
## [1] 26982.1
range(sampledata$mileage)
## [1] 4867 151479
min(sampledata$mileage)
## [1] 4867
max(sampledata$mileage)
## [1] 151479
hist(sampledata$mileage)

boxplot(sampledata$mileage)

boxplot(sampledata$mileage,horizontal=TRUE)
