library(Hmisc)
## Loading required package: grid
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
data(mtcars)
head(mtcars,10)
##                    mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Duster 360        14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Merc 240D         24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 230          22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 280          19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
tail(mtcars,5)
##                 mpg cyl  disp  hp drat    wt qsec vs am gear carb
## Lotus Europa   30.4   4  95.1 113 3.77 1.513 16.9  1  1    5    2
## Ford Pantera L 15.8   8 351.0 264 4.22 3.170 14.5  0  1    5    4
## Ferrari Dino   19.7   6 145.0 175 3.62 2.770 15.5  0  1    5    6
## Maserati Bora  15.0   8 301.0 335 3.54 3.570 14.6  0  1    5    8
## Volvo 142E     21.4   4 121.0 109 4.11 2.780 18.6  1  1    4    2
names(mtcars)
##  [1] "mpg"  "cyl"  "disp" "hp"   "drat" "wt"   "qsec" "vs"   "am"   "gear"
## [11] "carb"
str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
mtcars[1,]
##           mpg cyl disp  hp drat   wt  qsec vs am gear carb
## Mazda RX4  21   6  160 110  3.9 2.62 16.46  0  1    4    4
mtcars[,2]
##  [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4
mtcars[2,3]
## [1] 160
mtcars$cyl
##  [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4
row.names(mtcars)
##  [1] "Mazda RX4"           "Mazda RX4 Wag"       "Datsun 710"         
##  [4] "Hornet 4 Drive"      "Hornet Sportabout"   "Valiant"            
##  [7] "Duster 360"          "Merc 240D"           "Merc 230"           
## [10] "Merc 280"            "Merc 280C"           "Merc 450SE"         
## [13] "Merc 450SL"          "Merc 450SLC"         "Cadillac Fleetwood" 
## [16] "Lincoln Continental" "Chrysler Imperial"   "Fiat 128"           
## [19] "Honda Civic"         "Toyota Corolla"      "Toyota Corona"      
## [22] "Dodge Challenger"    "AMC Javelin"         "Camaro Z28"         
## [25] "Pontiac Firebird"    "Fiat X1-9"           "Porsche 914-2"      
## [28] "Lotus Europa"        "Ford Pantera L"      "Ferrari Dino"       
## [31] "Maserati Bora"       "Volvo 142E"
row.names(iris)
##   [1] "1"   "2"   "3"   "4"   "5"   "6"   "7"   "8"   "9"   "10"  "11" 
##  [12] "12"  "13"  "14"  "15"  "16"  "17"  "18"  "19"  "20"  "21"  "22" 
##  [23] "23"  "24"  "25"  "26"  "27"  "28"  "29"  "30"  "31"  "32"  "33" 
##  [34] "34"  "35"  "36"  "37"  "38"  "39"  "40"  "41"  "42"  "43"  "44" 
##  [45] "45"  "46"  "47"  "48"  "49"  "50"  "51"  "52"  "53"  "54"  "55" 
##  [56] "56"  "57"  "58"  "59"  "60"  "61"  "62"  "63"  "64"  "65"  "66" 
##  [67] "67"  "68"  "69"  "70"  "71"  "72"  "73"  "74"  "75"  "76"  "77" 
##  [78] "78"  "79"  "80"  "81"  "82"  "83"  "84"  "85"  "86"  "87"  "88" 
##  [89] "89"  "90"  "91"  "92"  "93"  "94"  "95"  "96"  "97"  "98"  "99" 
## [100] "100" "101" "102" "103" "104" "105" "106" "107" "108" "109" "110"
## [111] "111" "112" "113" "114" "115" "116" "117" "118" "119" "120" "121"
## [122] "122" "123" "124" "125" "126" "127" "128" "129" "130" "131" "132"
## [133] "133" "134" "135" "136" "137" "138" "139" "140" "141" "142" "143"
## [144] "144" "145" "146" "147" "148" "149" "150"
mtcars$brand=row.names(mtcars)

summary(mtcars)
##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb          brand          
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000   Length:32         
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000   Class :character  
##  Median :0.0000   Median :4.000   Median :2.000   Mode  :character  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812                     
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000                     
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000
describe(mtcars)
## mtcars 
## 
##  12  Variables      32  Observations
## ---------------------------------------------------------------------------
## mpg 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##      32       0      25       1   20.09   12.00   14.34   15.43   19.20 
##     .75     .90     .95 
##   22.80   30.09   31.30 
## 
## lowest : 10.4 13.3 14.3 14.7 15.0, highest: 26.0 27.3 30.4 32.4 33.9 
## ---------------------------------------------------------------------------
## cyl 
##       n missing  unique    Info    Mean 
##      32       0       3    0.87   6.188 
## 
## 4 (11, 34%), 6 (7, 22%), 8 (14, 44%) 
## ---------------------------------------------------------------------------
## disp 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##      32       0      27       1   230.7   77.35   80.61  120.83  196.30 
##     .75     .90     .95 
##  326.00  396.00  449.00 
## 
## lowest :  71.1  75.7  78.7  79.0  95.1
## highest: 360.0 400.0 440.0 460.0 472.0 
## ---------------------------------------------------------------------------
## hp 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##      32       0      22       1   146.7   63.65   66.00   96.50  123.00 
##     .75     .90     .95 
##  180.00  243.50  253.55 
## 
## lowest :  52  62  65  66  91, highest: 215 230 245 264 335 
## ---------------------------------------------------------------------------
## drat 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##      32       0      22       1   3.597   2.853   3.007   3.080   3.695 
##     .75     .90     .95 
##   3.920   4.209   4.314 
## 
## lowest : 2.76 2.93 3.00 3.07 3.08, highest: 4.08 4.11 4.22 4.43 4.93 
## ---------------------------------------------------------------------------
## wt 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##      32       0      29       1   3.217   1.736   1.956   2.581   3.325 
##     .75     .90     .95 
##   3.610   4.048   5.293 
## 
## lowest : 1.513 1.615 1.835 1.935 2.140
## highest: 3.845 4.070 5.250 5.345 5.424 
## ---------------------------------------------------------------------------
## qsec 
##       n missing  unique    Info    Mean     .05     .10     .25     .50 
##      32       0      30       1   17.85   15.05   15.53   16.89   17.71 
##     .75     .90     .95 
##   18.90   19.99   20.10 
## 
## lowest : 14.50 14.60 15.41 15.50 15.84
## highest: 19.90 20.00 20.01 20.22 22.90 
## ---------------------------------------------------------------------------
## vs 
##       n missing  unique    Info     Sum    Mean 
##      32       0       2    0.74      14  0.4375 
## ---------------------------------------------------------------------------
## am 
##       n missing  unique    Info     Sum    Mean 
##      32       0       2    0.72      13  0.4062 
## ---------------------------------------------------------------------------
## gear 
##       n missing  unique    Info    Mean 
##      32       0       3    0.84   3.688 
## 
## 3 (15, 47%), 4 (12, 38%), 5 (5, 16%) 
## ---------------------------------------------------------------------------
## carb 
##       n missing  unique    Info    Mean 
##      32       0       6    0.93   2.812 
## 
##            1  2 3  4 6 8
## Frequency  7 10 3 10 1 1
## %         22 31 9 31 3 3
## ---------------------------------------------------------------------------
## brand 
##       n missing  unique 
##      32       0      32 
## 
## lowest : AMC Javelin        Cadillac Fleetwood Camaro Z28         Chrysler Imperial  Datsun 710        
## highest: Porsche 914-2      Toyota Corolla     Toyota Corona      Valiant            Volvo 142E         
## ---------------------------------------------------------------------------
#Q What does describe do as compared to summary
#editDataset(mtcars)
 
mtcars2=mtcars
mtcars2[3,1:6]=NA
mtcars2[3:9,2]=NA


mean(mtcars2$cyl, na.rm=T)
## [1] 6.32
#na.rm ignores missing values

is.na(mtcars2$cyl) #this is true only when missing values are there
##  [1] FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
str(mtcars)
## 'data.frame':    32 obs. of  12 variables:
##  $ mpg  : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl  : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp : num  160 160 108 258 360 ...
##  $ hp   : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat : num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt   : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec : num  16.5 17 18.6 19.4 17 ...
##  $ vs   : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am   : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear : num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb : num  4 4 1 1 2 1 4 2 2 4 ...
##  $ brand: chr  "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
mtcars$cyl
##  [1] 6 6 4 6 8 6 8 4 4 6 6 8 8 8 8 8 8 4 4 4 4 8 8 8 8 4 4 4 8 6 8 4
#Task is replace all values which are missing in mtcars with mean /median and create a new dataset  # But do it only using code
#HINT - You will use ifelse , na.rm and is.na functions #HINT - DO YOU NEED MORE HINTS ?

mtcars3=na.omit(mtcars)

mtcars2$cyl21=
  ifelse(is.na(mtcars$cyl),
         " ",
         mtcars$cyl)

#?ifelse
head(mtcars2,9)
##                    mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710          NA  NA    NA  NA   NA    NA 18.61  1  1    4    1
## Hornet 4 Drive    21.4  NA 258.0 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7  NA 360.0 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1  NA 225.0 105 2.76 3.460 20.22  1  0    3    1
## Duster 360        14.3  NA 360.0 245 3.21 3.570 15.84  0  0    3    4
## Merc 240D         24.4  NA 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 230          22.8  NA 140.8  95 3.92 3.150 22.90  1  0    4    2
##                               brand cyl21
## Mazda RX4                 Mazda RX4     6
## Mazda RX4 Wag         Mazda RX4 Wag     6
## Datsun 710               Datsun 710     4
## Hornet 4 Drive       Hornet 4 Drive     6
## Hornet Sportabout Hornet Sportabout     8
## Valiant                     Valiant     6
## Duster 360               Duster 360     8
## Merc 240D                 Merc 240D     4
## Merc 230                   Merc 230     4
str(mtcars2)
## 'data.frame':    32 obs. of  13 variables:
##  $ mpg  : num  21 21 NA 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl  : num  6 6 NA NA NA NA NA NA NA 6 ...
##  $ disp : num  160 160 NA 258 360 ...
##  $ hp   : num  110 110 NA 110 175 105 245 62 95 123 ...
##  $ drat : num  3.9 3.9 NA 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt   : num  2.62 2.88 NA 3.21 3.44 ...
##  $ qsec : num  16.5 17 18.6 19.4 17 ...
##  $ vs   : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am   : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear : num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb : num  4 4 1 1 2 1 4 2 2 4 ...
##  $ brand: chr  "Mazda RX4" "Mazda RX4 Wag" "Datsun 710" "Hornet 4 Drive" ...
##  $ cyl21: num  6 6 4 6 8 6 8 4 4 6 ...
mtcars2$cyl2=
  ifelse(is.na(mtcars2$cyl),
         median(mtcars2$cyl,na.rm=T),
         mtcars2$cyl)

table(mtcars$cyl2)
## < table of extent 0 >
mtcars3=na.omit(mtcars2)
cor(mtcars3[1:11])
##             mpg        cyl       disp          hp       drat         wt
## mpg   1.0000000 -0.8758400 -0.8556910 -0.77961838  0.7127164 -0.8819429
## cyl  -0.8758400  1.0000000  0.8985035  0.81853826 -0.7665601  0.8324075
## disp -0.8556910  0.8985035  1.0000000  0.78859201 -0.7365131  0.9164049
## hp   -0.7796184  0.8185383  0.7885920  1.00000000 -0.5203642  0.6966594
## drat  0.7127164 -0.7665601 -0.7365131 -0.52036417  1.0000000 -0.7533969
## wt   -0.8819429  0.8324075  0.9164049  0.69665941 -0.7533969  1.0000000
## qsec  0.4441360 -0.5350648 -0.4116137 -0.71443266  0.1481272 -0.2072452
## vs    0.6958696 -0.7992424 -0.7185176 -0.67406989  0.5892788 -0.6070036
## am    0.6679774 -0.6295948 -0.6355251 -0.31649083  0.7291590 -0.7232991
## gear  0.4679080 -0.4788756 -0.5271718 -0.08737484  0.6541172 -0.5972689
## carb -0.5724860  0.5232139  0.3982365  0.74859820 -0.2007564  0.4467421
##            qsec         vs           am        gear         carb
## mpg   0.4441360  0.6958696  0.667977355  0.46790795 -0.572486018
## cyl  -0.5350648 -0.7992424 -0.629594787 -0.47887557  0.523213913
## disp -0.4116137 -0.7185176 -0.635525058 -0.52717181  0.398236524
## hp   -0.7144327 -0.6740699 -0.316490831 -0.08737484  0.748598205
## drat  0.1481272  0.5892788  0.729159008  0.65411720 -0.200756408
## wt   -0.2072452 -0.6070036 -0.723299122 -0.59726894  0.446742146
## qsec  1.0000000  0.7005019 -0.152673790 -0.35401014 -0.685801376
## vs    0.7005019  1.0000000  0.280224269  0.23586409 -0.499125907
## am   -0.1526738  0.2802243  1.000000000  0.82671008  0.001973435
## gear -0.3540101  0.2358641  0.826710084  1.00000000  0.273886265
## carb -0.6858014 -0.4991259  0.001973435  0.27388627  1.000000000
mean(mtcars2$mpg,na.rm=T)
## [1] 20.00323
summarize(mtcars2$mpg,mtcars2$cyl,mean)
##   mtcars2$cyl mtcars2$mpg
## 1           4    27.91250
## 2           6    19.74000
## 3           8    14.86667
## 4          NA          NA
head(mtcars2)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710          NA  NA   NA  NA   NA    NA 18.61  1  1    4    1
## Hornet 4 Drive    21.4  NA  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7  NA  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1  NA  225 105 2.76 3.460 20.22  1  0    3    1
##                               brand cyl21 cyl2
## Mazda RX4                 Mazda RX4     6    6
## Mazda RX4 Wag         Mazda RX4 Wag     6    6
## Datsun 710               Datsun 710     4    6
## Hornet 4 Drive       Hornet 4 Drive     6    6
## Hornet Sportabout Hornet Sportabout     8    6
## Valiant                     Valiant     6    6
head(mtcars[1:3],9)
##                    mpg cyl  disp
## Mazda RX4         21.0   6 160.0
## Mazda RX4 Wag     21.0   6 160.0
## Datsun 710        22.8   4 108.0
## Hornet 4 Drive    21.4   6 258.0
## Hornet Sportabout 18.7   8 360.0
## Valiant           18.1   6 225.0
## Duster 360        14.3   8 360.0
## Merc 240D         24.4   4 146.7
## Merc 230          22.8   4 140.8
head(mtcars2[1:3],9)
##                    mpg cyl  disp
## Mazda RX4         21.0   6 160.0
## Mazda RX4 Wag     21.0   6 160.0
## Datsun 710          NA  NA    NA
## Hornet 4 Drive    21.4  NA 258.0
## Hornet Sportabout 18.7  NA 360.0
## Valiant           18.1  NA 225.0
## Duster 360        14.3  NA 360.0
## Merc 240D         24.4  NA 146.7
## Merc 230          22.8  NA 140.8
mtcars3=na.omit(mtcars2)
summarize(mtcars3$disp,mtcars3$cyl,mean)
##   mtcars3$cyl mtcars3$disp
## 1           4       95.125
## 2           6      160.040
## 3           8      351.950
data(Boston,package="MASS")
names(Boston)
##  [1] "crim"    "zn"      "indus"   "chas"    "nox"     "rm"      "age"    
##  [8] "dis"     "rad"     "tax"     "ptratio" "black"   "lstat"   "medv"
Boston[2:40,14]=NA
summary(Boston$medv)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    5.00   17.20   21.50   22.75   25.15   50.00      39
#write.csv(Boston,file="Boston.csv")
#Impute the missing values in Boston and show me the code
#getwd()