RScript_1.R

ruhil — Nov 2, 2013, 1:41 PM

## Introduction to Basic Operations in R ##

x = 10 
x <- 10

x <- c(1,2,3,4,5,6)

y <- c(6,5,4,3,2,1)
z <- x * y
V <- cbind(x,y,z)
Vr <- rbind(x,y,z)

pi
[1] 3.142
exp(1)
[1] 2.718
log10(10)
[1] 1

s <- seq(10)
print(s)
 [1]  1  2  3  4  5  6  7  8  9 10
s
 [1]  1  2  3  4  5  6  7  8  9 10

s <- seq(20, 0, by=-2); s
 [1] 20 18 16 14 12 10  8  6  4  2  0

getwd()
[1] "/Users/ruhil/Library/Teaching/PBIO 3150_5150/R Scripts"

setwd("/Users/ruhil/Library/Teaching/PBIO 3150_5150/Data/")

objects()
[1] "s"  "V"  "Vr" "x"  "y"  "z" 
rm(s)
rm(x,y)


ImportDataTAB <- read.table(file="ImportDataTab.txt", header=TRUE, sep="\t")

ImportDataCSV <- read.csv(file="ImportDataCSV.csv", header=TRUE, sep=",")

library(xlsx)
Loading required package: xlsxjars
Loading required package: rJava
ImportDataXLSX <- read.xlsx(file="ImportDataXLSX.xlsx", 1)

library(sas7bdat)
Loading required package: chron
ImportDataSAS <- read.sas7bdat(file="ImportDataSAS.sas7bdat")

library(Hmisc)
Loading required package: survival
Loading required package: splines
Loading required package: Formula
Hmisc library by Frank E Harrell Jr

Type library(help='Hmisc'), ?Overview, or ?Hmisc.Overview')
to see overall documentation.


Attaching package: 'Hmisc'

The following object is masked from 'package:survival':

    untangle.specials

The following objects are masked from 'package:base':

    format.pval, round.POSIXt, trunc.POSIXt, units
ImportDataSPSS <- spss.get(file="ImportDataSPSS.sav")
Loading required package: foreign
re-encoding from UTF-8

library(foreign)
ImportDataStata <- read.dta(file="ImportDataStata.dta")


data(trees)
str(trees)
'data.frame':   31 obs. of  3 variables:
 $ Girth : num  8.3 8.6 8.8 10.5 10.7 10.8 11 11 11.1 11.2 ...
 $ Height: num  70 65 63 72 81 83 66 75 80 75 ...
 $ Volume: num  10.3 10.3 10.2 16.4 18.8 19.7 15.6 18.2 22.6 19.9 ...
summary(trees)
     Girth          Height       Volume    
 Min.   : 8.3   Min.   :63   Min.   :10.2  
 1st Qu.:11.1   1st Qu.:72   1st Qu.:19.4  
 Median :12.9   Median :76   Median :24.2  
 Mean   :13.2   Mean   :76   Mean   :30.2  
 3rd Qu.:15.2   3rd Qu.:80   3rd Qu.:37.3  
 Max.   :20.6   Max.   :87   Max.   :77.0  


## Two Examples of Data Scraped from the Web ##
library(XML)

# Example 1
theurl<-"http://elections.nytimes.com/2010/results/senate/big-board"
tables <- readHTMLTable(theurl)
n.rows <- unlist(lapply(tables, function(t) dim(t)[1]))
#This combines all but the first table
Elections<-do.call(rbind, tables[-1])
cleanElections = cbind(Elections[1], sapply(Elections[-1], function(xx) as.numeric(gsub('[^0-9]', '', xx))))
rownames(cleanElections) <-1:nrow(cleanElections)

# Example 2
tables = readHTMLTable('http://www.disastercenter.com/crime/iacrime.htm')
## the 3rd element is what we want
x = tables[[3]]
## names are in the first 2 rows, paste column-wise "2", not rowwise "1"
nms = as.vector(apply(x[1:2, ], 2, paste, collapse = ''))
## remove the first 2 rows because they are not data
x = x[-(1:2), ]
## assign the names to data
names(x) = nms
## then remove any characters which are not numbers (i.e. 0-9)
x = sapply(x, function(xx) as.numeric(gsub('[^0-9]', '', xx)))
## x is a matrix, so put into dataframe
crimetable<-data.frame(x)


data(trees)
str(trees)
'data.frame':   31 obs. of  3 variables:
 $ Girth : num  8.3 8.6 8.8 10.5 10.7 10.8 11 11 11.1 11.2 ...
 $ Height: num  70 65 63 72 81 83 66 75 80 75 ...
 $ Volume: num  10.3 10.3 10.2 16.4 18.8 19.7 15.6 18.2 22.6 19.9 ...


## What else can R do? ##
# Example 1 -- Simple yet Elegant Plots
hist(trees$Girth)

plot of chunk unnamed-chunk-1

hist(trees$Girth, col="lightblue", xlab="Girth", main="Histogram of Girth")

plot of chunk unnamed-chunk-1


library(ggplot2)
ggplot(trees, aes(Girth)) + geom_histogram(binwidth=2, fill="lightblue")

plot of chunk unnamed-chunk-1




# help.start() # Help on Basic R stuff
# vignette() # Show available package vignettes
# vignette("MatchIt") # Show vignette for ggplot2
# data() # Show datasets preloaded in base R packages
# data(package = .packages(all.available = TRUE)) # Show datasets preloaded in packages



## END ##