pfunction=function(x){x**3+3*x}
pfunction(10)
## [1] 1030
pfunction(20)
## [1] 8060
pfunction
## function(x){x**3+3*x}
kmeans
## function (x, centers, iter.max = 10L, nstart = 1L, algorithm = c("Hartigan-Wong", 
##     "Lloyd", "Forgy", "MacQueen"), trace = FALSE) 
## {
##     .Mimax <- .Machine$integer.max
##     do_one <- function(nmeth) {
##         switch(nmeth, {
##             isteps.Qtran <- as.integer(min(.Mimax, 50 * m))
##             iTran <- c(isteps.Qtran, integer(max(0, k - 1)))
##             Z <- .Fortran(C_kmns, x, m, p, centers = centers, 
##                 as.integer(k), c1 = integer(m), c2 = integer(m), 
##                 nc = integer(k), double(k), double(k), ncp = integer(k), 
##                 D = double(m), iTran = iTran, live = integer(k), 
##                 iter = iter.max, wss = double(k), ifault = as.integer(trace))
##             switch(Z$ifault, stop("empty cluster: try a better set of initial centers", 
##                 call. = FALSE), Z$iter <- max(Z$iter, iter.max + 
##                 1L), stop("number of cluster centres must lie between 1 and nrow(x)", 
##                 call. = FALSE), warning(gettextf("Quick-TRANSfer stage steps exceeded maximum (= %d)", 
##                 isteps.Qtran), call. = FALSE))
##         }, {
##             Z <- .C(C_kmeans_Lloyd, x, m, p, centers = centers, 
##                 k, c1 = integer(m), iter = iter.max, nc = integer(k), 
##                 wss = double(k))
##         }, {
##             Z <- .C(C_kmeans_MacQueen, x, m, p, centers = as.double(centers), 
##                 k, c1 = integer(m), iter = iter.max, nc = integer(k), 
##                 wss = double(k))
##         })
##         if (m23 <- any(nmeth == c(2L, 3L))) {
##             if (any(Z$nc == 0)) 
##                 warning("empty cluster: try a better set of initial centers", 
##                   call. = FALSE)
##         }
##         if (Z$iter > iter.max) {
##             warning(sprintf(ngettext(iter.max, "did not converge in %d iteration", 
##                 "did not converge in %d iterations"), iter.max), 
##                 call. = FALSE, domain = NA)
##             if (m23) 
##                 Z$ifault <- 2L
##         }
##         if (nmeth %in% c(2L, 3L)) {
##             if (any(Z$nc == 0)) 
##                 warning("empty cluster: try a better set of initial centers", 
##                   call. = FALSE)
##         }
##         Z
##     }
##     x <- as.matrix(x)
##     m <- as.integer(nrow(x))
##     if (is.na(m)) 
##         stop("invalid nrow(x)")
##     p <- as.integer(ncol(x))
##     if (is.na(p)) 
##         stop("invalid ncol(x)")
##     if (missing(centers)) 
##         stop("'centers' must be a number or a matrix")
##     nmeth <- switch(match.arg(algorithm), `Hartigan-Wong` = 1L, 
##         Lloyd = 2L, Forgy = 2L, MacQueen = 3L)
##     storage.mode(x) <- "double"
##     if (length(centers) == 1L) {
##         k <- centers
##         if (nstart == 1L) 
##             centers <- x[sample.int(m, k), , drop = FALSE]
##         if (nstart >= 2L || any(duplicated(centers))) {
##             cn <- unique(x)
##             mm <- nrow(cn)
##             if (mm < k) 
##                 stop("more cluster centers than distinct data points.")
##             centers <- cn[sample.int(mm, k), , drop = FALSE]
##         }
##     }
##     else {
##         centers <- as.matrix(centers)
##         if (any(duplicated(centers))) 
##             stop("initial centers are not distinct")
##         cn <- NULL
##         k <- nrow(centers)
##         if (m < k) 
##             stop("more cluster centers than data points")
##     }
##     k <- as.integer(k)
##     if (is.na(k)) 
##         stop("'invalid value of 'k'")
##     if (k == 1L) 
##         nmeth <- 3L
##     iter.max <- as.integer(iter.max)
##     if (is.na(iter.max) || iter.max < 1L) 
##         stop("'iter.max' must be positive")
##     if (ncol(x) != ncol(centers)) 
##         stop("must have same number of columns in 'x' and 'centers'")
##     storage.mode(centers) <- "double"
##     Z <- do_one(nmeth)
##     best <- sum(Z$wss)
##     if (nstart >= 2L && !is.null(cn)) 
##         for (i in 2:nstart) {
##             centers <- cn[sample.int(mm, k), , drop = FALSE]
##             ZZ <- do_one(nmeth)
##             if ((z <- sum(ZZ$wss)) < best) {
##                 Z <- ZZ
##                 best <- z
##             }
##         }
##     centers <- matrix(Z$centers, k)
##     dimnames(centers) <- list(1L:k, dimnames(x)[[2L]])
##     cluster <- Z$c1
##     if (!is.null(rn <- rownames(x))) 
##         names(cluster) <- rn
##     totss <- sum(scale(x, scale = FALSE)^2)
##     structure(list(cluster = cluster, centers = centers, totss = totss, 
##         withinss = Z$wss, tot.withinss = best, betweenss = totss - 
##             best, size = Z$nc, iter = Z$iter, ifault = Z$ifault), 
##         class = "kmeans")
## }
## <bytecode: 0x0000000007439420>
## <environment: namespace:stats>
data(iris, package="datasets")

data(mtcars)

head(mtcars,10)
##                    mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Duster 360        14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Merc 240D         24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 230          22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 280          19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
mtcars$model=row.names(mtcars)
class(mtcars)
## [1] "data.frame"
class(mtcars$mpg)
## [1] "numeric"
summary(mtcars)
##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb          model          
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000   Length:32         
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000   Class :character  
##  Median :0.0000   Median :4.000   Median :2.000   Mode  :character  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812                     
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000                     
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000
mtcars$cyl=as.numeric(mtcars$cyl)
summary(mtcars$cyl)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.000   4.000   6.000   6.188   8.000   8.000
table(mtcars$cyl)
## 
##  4  6  8 
## 11  7 14
data(mtcars)
table(mtcars$cyl)
## 
##  4  6  8 
## 11  7 14
table(mtcars$gear)
## 
##  3  4  5 
## 15 12  5
table(mtcars$am)
## 
##  0  1 
## 19 13
table(mtcars$carb)
## 
##  1  2  3  4  6  8 
##  7 10  3 10  1  1
table(mtcars$mpg)
## 
## 10.4 13.3 14.3 14.7   15 15.2 15.5 15.8 16.4 17.3 17.8 18.1 18.7 19.2 19.7 
##    2    1    1    1    1    2    1    1    1    1    1    1    1    2    1 
##   21 21.4 21.5 22.8 24.4   26 27.3 30.4 32.4 33.9 
##    2    2    1    2    1    1    1    2    1    1
attach(mtcars)
boxplot(mpg)

boxplot(mpg~am)

boxplot(mpg~cyl)
table(cyl,am)
##    am
## cyl  0  1
##   4  3  8
##   6  4  3
##   8 12  2
library(sqldf)
## Warning: package 'sqldf' was built under R version 3.3.3
## Loading required package: gsubfn
## Warning: package 'gsubfn' was built under R version 3.3.3
## Loading required package: proto
## Warning: package 'proto' was built under R version 3.3.3
## Loading required package: RSQLite
## Warning: package 'RSQLite' was built under R version 3.3.3
a1=sqldf("select * from mtcars limit 10")
## Loading required package: tcltk
## Warning: Quoted identifiers should have class SQL, use DBI::SQL() if the
## caller performs the quoting.
a1
##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## 1  21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## 2  21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## 3  22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## 4  21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## 5  18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## 6  18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## 7  14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## 8  24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## 9  22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## 10 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
a2=sqldf("select avg(mpg),cyl from mtcars group by cyl")
a2
##   avg(mpg) cyl
## 1 26.66364   4
## 2 19.74286   6
## 3 15.10000   8
a3=sqldf("select avg(mpg),gear from mtcars group by gear")
a3
##   avg(mpg) gear
## 1 16.10667    3
## 2 24.53333    4
## 3 21.38000    5
library("Hmisc")
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'mtcars':
## 
##     mpg
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units

summary(mtcars$mpg)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10.40   15.42   19.20   20.09   22.80   33.90
boxplot(mtcars$mpg)

describe(mtcars$mpg)
## mtcars$mpg 
##        n  missing distinct     Info     Mean      Gmd      .05      .10 
##       32        0       25    0.999    20.09    6.796    12.00    14.34 
##      .25      .50      .75      .90      .95 
##    15.43    19.20    22.80    30.09    31.30 
## 
## lowest : 10.4 13.3 14.3 14.7 15.0, highest: 26.0 27.3 30.4 32.4 33.9
summarize(mtcars$mpg,mtcars$cyl,mean)
##   mtcars$cyl mtcars$mpg
## 1          4   26.66364
## 2          6   19.74286
## 3          8   15.10000
summarize(mtcars$mpg,llist(mtcars$cyl,mtcars$gear),mean)
##   mtcars$cyl mtcars$gear mtcars$mpg
## 1          4           3     21.500
## 2          4           4     26.925
## 3          4           5     28.200
## 4          6           3     19.750
## 5          6           4     19.750
## 6          6           5     19.700
## 7          8           3     15.050
## 8          8           5     15.400
data(iris)
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
attach(iris)
boxplot(Sepal.Length~Species)

iris$PetalLength=iris$Petal.Length
sqldf("select PetalLength,Species from iris group by Species")
##   PetalLength    Species
## 1         1.4     setosa
## 2         4.1 versicolor
## 3         5.1  virginica