From Data Smart by John Foreman
http://www.wiley.com/WileyCDA/WileyTitle/productCd-111866146X.html.
almostpi <- 355/113
almostpi
## [1] 3.141593
someprimes <- c(1,2,3,5,7,11)
someprimes
## [1] 1 2 3 5 7 11
length(someprimes)
## [1] 6
someprimes[4]
## [1] 5
#this
someprimes[c(4,5,6)]
## [1] 5 7 11
#or that
someprimes[4:6]
## [1] 5 7 11
#there's that which again
which(someprimes<7)
## [1] 1 2 3 4
primestimes2 <- someprimes*2
someprimes
## [1] 1 2 3 5 7 11
primestimes2
## [1] 2 4 6 10 14 22
summary(someprimes)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.250 4.000 4.833 6.500 11.000
amatrix <- matrix(data=c(someprimes, primestimes2), nrow=2, ncol=6)
amatrix
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 1 3 7 2 6 14
## [2,] 2 5 11 4 10 22
#number of rows/cols
nrow(amatrix)
## [1] 2
ncol(amatrix)
## [1] 6
#transpose
t(amatrix)
## [,1] [,2]
## [1,] 1 2
## [2,] 3 5
## [3,] 7 11
## [4,] 2 4
## [5,] 6 10
## [6,] 14 22
amatrix[1:2,3]
## [1] 7 11
amatrix[,3]
## [1] 7 11
# smush new rows/cols of data into the matrix
amatrix
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 1 3 7 2 6 14
## [2,] 2 5 11 4 10 22
primestimes3 <- someprimes*3
amatrix <- rbind(amatrix, primestimes3)
amatrix
## [,1] [,2] [,3] [,4] [,5] [,6]
## 1 3 7 2 6 14
## 2 5 11 4 10 22
## primestimes3 3 6 9 15 21 33
John <- list(gender='male',
age='ancient',
height=72,
spawn=3,
spawn_ages=c(.5, 2.5))
John
## $gender
## [1] "male"
##
## $age
## [1] "ancient"
##
## $height
## [1] 72
##
## $spawn
## [1] 3
##
## $spawn_ages
## [1] 0.5 2.5
John$age
## [1] "ancient"
# let's set up a dataframe here
bondnames <- c('connery', 'laezenby', 'moore', 'dalton', 'brosnan', 'craig')
firstyear <- c(1962, 1969, 1973, 1987, 1995, 2006)
eyecolor <- c('brown', 'brown', 'blue', 'green', 'blue', 'blue')
womenkissed <- c(17, 3, 20, 4, 12, 4)
countofbondjamesbonds <- c(3, 2, 10, 2, 5, 1)
bonddata <- data.frame(bondnames,
firstyear,
eyecolor,
womenkissed,
countofbondjamesbonds)
bonddata
## bondnames firstyear eyecolor womenkissed countofbondjamesbonds
## 1 connery 1962 brown 17 3
## 2 laezenby 1969 brown 3 2
## 3 moore 1973 blue 20 10
## 4 dalton 1987 green 4 2
## 5 brosnan 1995 blue 12 5
## 6 craig 2006 blue 4 1
#structures
str(bonddata)
## 'data.frame': 6 obs. of 5 variables:
## $ bondnames : Factor w/ 6 levels "brosnan","connery",..: 2 5 6 4 1 3
## $ firstyear : num 1962 1969 1973 1987 1995 ...
## $ eyecolor : Factor w/ 3 levels "blue","brown",..: 2 2 1 3 1 1
## $ womenkissed : num 17 3 20 4 12 4
## $ countofbondjamesbonds: num 3 2 10 2 5 1
#some summary info
summary(bonddata)
## bondnames firstyear eyecolor womenkissed
## brosnan :1 Min. :1962 blue :3 Min. : 3.00
## connery :1 1st Qu.:1970 brown:2 1st Qu.: 4.00
## craig :1 Median :1980 green:1 Median : 8.00
## dalton :1 Mean :1982 Mean :10.00
## laezenby:1 3rd Qu.:1993 3rd Qu.:15.75
## moore :1 Max. :2006 Max. :20.00
## countofbondjamesbonds
## Min. : 1.000
## 1st Qu.: 2.000
## Median : 2.500
## Mean : 3.833
## 3rd Qu.: 4.500
## Max. :10.000
The year data is treated as a number here, but if we want to treat it categorically, we can factor it
bonddata$firstyear <- factor(bonddata$firstyear)
bonddata
## bondnames firstyear eyecolor womenkissed countofbondjamesbonds
## 1 connery 1962 brown 17 3
## 2 laezenby 1969 brown 3 2
## 3 moore 1973 blue 20 10
## 4 dalton 1987 green 4 2
## 5 brosnan 1995 blue 12 5
## 6 craig 2006 blue 4 1
summary(bonddata)
## bondnames firstyear eyecolor womenkissed countofbondjamesbonds
## brosnan :1 1962:1 blue :3 Min. : 3.00 Min. : 1.000
## connery :1 1969:1 brown:2 1st Qu.: 4.00 1st Qu.: 2.000
## craig :1 1973:1 green:1 Median : 8.00 Median : 2.500
## dalton :1 1987:1 Mean :10.00 Mean : 3.833
## laezenby:1 1995:1 3rd Qu.:15.75 3rd Qu.: 4.500
## moore :1 2006:1 Max. :20.00 Max. :10.000
#getwd()
winedata <- read.csv('data/WineKMC.csv')
winedata[,1:9]
## Offer Mth Varietal MinQty Disc Origin PastPeak Adams Allen
## 1 1 Jan Malbec 72 56 France FALSE NA NA
## 2 2 Jan Pinot Noir 72 17 France FALSE NA NA
## 3 3 Feb Espumante 144 32 Oregon TRUE NA NA
## 4 4 Feb Champagne 72 48 France TRUE NA NA
## 5 5 Feb Cab. Sauv. 144 44 NZ TRUE NA NA
## 6 6 Mar Prosecco 144 86 Chile FALSE NA NA
## 7 7 Mar Prosecco 6 40 Australia TRUE NA NA
## 8 8 Mar Espumante 6 45 S. Africa FALSE NA NA
## 9 9 Apr Chardonnay 144 57 Chile FALSE NA 1
## 10 10 Apr Prosecco 72 52 CA FALSE NA NA
## 11 11 May Champagne 72 85 France FALSE NA NA
## 12 12 May Prosecco 72 83 Australia FALSE NA NA
## 13 13 May Merlot 6 43 Chile FALSE NA NA
## 14 14 Jun Merlot 72 64 Chile FALSE NA NA
## 15 15 Jun Cab. Sauv. 144 19 Italy FALSE NA NA
## 16 16 Jun Merlot 72 88 CA FALSE NA NA
## 17 17 Jul Pinot Noir 12 47 Germany FALSE NA NA
## 18 18 Jul Espumante 6 50 Oregon FALSE 1 NA
## 19 19 Jul Champagne 12 66 Germany FALSE NA NA
## 20 20 Aug Cab. Sauv. 72 82 Italy FALSE NA NA
## 21 21 Aug Champagne 12 50 CA FALSE NA NA
## 22 22 Aug Champagne 72 63 France FALSE NA NA
## 23 23 Sept Chardonnay 144 39 S. Africa FALSE NA NA
## 24 24 Sept Pinot Noir 6 34 Italy FALSE NA NA
## 25 25 Oct Cab. Sauv. 72 59 Oregon TRUE NA NA
## 26 26 Oct Pinot Noir 144 83 Australia FALSE NA NA
## 27 27 Oct Champagne 72 88 NZ FALSE NA 1
## 28 28 Nov Cab. Sauv. 12 56 France TRUE NA NA
## 29 29 Nov P. Grigio 6 87 France FALSE 1 NA
## 30 30 Dec Malbec 6 54 France FALSE 1 NA
## 31 31 Dec Champagne 72 89 France FALSE NA NA
## 32 32 Dec Cab. Sauv. 72 45 Germany TRUE NA NA
Notice here that all the spaces became NAs. Lets convert those.
winedata[is.na(winedata)] <- 0
winedata[,1:9]
## Offer Mth Varietal MinQty Disc Origin PastPeak Adams Allen
## 1 1 Jan Malbec 72 56 France FALSE 0 0
## 2 2 Jan Pinot Noir 72 17 France FALSE 0 0
## 3 3 Feb Espumante 144 32 Oregon TRUE 0 0
## 4 4 Feb Champagne 72 48 France TRUE 0 0
## 5 5 Feb Cab. Sauv. 144 44 NZ TRUE 0 0
## 6 6 Mar Prosecco 144 86 Chile FALSE 0 0
## 7 7 Mar Prosecco 6 40 Australia TRUE 0 0
## 8 8 Mar Espumante 6 45 S. Africa FALSE 0 0
## 9 9 Apr Chardonnay 144 57 Chile FALSE 0 1
## 10 10 Apr Prosecco 72 52 CA FALSE 0 0
## 11 11 May Champagne 72 85 France FALSE 0 0
## 12 12 May Prosecco 72 83 Australia FALSE 0 0
## 13 13 May Merlot 6 43 Chile FALSE 0 0
## 14 14 Jun Merlot 72 64 Chile FALSE 0 0
## 15 15 Jun Cab. Sauv. 144 19 Italy FALSE 0 0
## 16 16 Jun Merlot 72 88 CA FALSE 0 0
## 17 17 Jul Pinot Noir 12 47 Germany FALSE 0 0
## 18 18 Jul Espumante 6 50 Oregon FALSE 1 0
## 19 19 Jul Champagne 12 66 Germany FALSE 0 0
## 20 20 Aug Cab. Sauv. 72 82 Italy FALSE 0 0
## 21 21 Aug Champagne 12 50 CA FALSE 0 0
## 22 22 Aug Champagne 72 63 France FALSE 0 0
## 23 23 Sept Chardonnay 144 39 S. Africa FALSE 0 0
## 24 24 Sept Pinot Noir 6 34 Italy FALSE 0 0
## 25 25 Oct Cab. Sauv. 72 59 Oregon TRUE 0 0
## 26 26 Oct Pinot Noir 144 83 Australia FALSE 0 0
## 27 27 Oct Champagne 72 88 NZ FALSE 0 1
## 28 28 Nov Cab. Sauv. 12 56 France TRUE 0 0
## 29 29 Nov P. Grigio 6 87 France FALSE 1 0
## 30 30 Dec Malbec 6 54 France FALSE 1 0
## 31 31 Dec Champagne 72 89 France FALSE 0 0
## 32 32 Dec Cab. Sauv. 72 45 Germany TRUE 0 0