by(iris$Sepal.Width, iris$Species, summary)
## iris$Species: setosa
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.300 3.200 3.400 3.428 3.675 4.400
## --------------------------------------------------------
## iris$Species: versicolor
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 2.525 2.800 2.770 3.000 3.400
## --------------------------------------------------------
## iris$Species: virginica
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.200 2.800 3.000 2.974 3.175 3.800
cyl2 = factor(mtcars$cyl)
boxplot(mpg~cyl2, data=mtcars, main="Car MPG", xlab="Number of Cylinders", ylab="MPG")
set.seed(101)
rSamples = replicate(1000, rnorm(100,10,5), simplify=FALSE)
rSamples.means = sapply(rSamples,mean)
mean(rSamples.means)
## [1] 10.0144
sd(rSamples.means)
## [1] 0.4776565
The results are consisent with the Central Limit Theorem. The mean of the 1000 samples is right around 10 with a standard deviation of around .5 which provides a rather normal distrubtion.
geese <- read.csv(file="/Users/Alex/Dropbox/College/4-Senior/Machine Learning/Project1/geese.txt", header=TRUE, sep=",")
par(mfrow=c(1,2))
hist(geese$Aestimate, main="A Estimate")
hist(geese$Bestimate, main="B Estimate")
cs1979 <- read.csv(file="/Users/Alex/Dropbox/College/4-Senior/Machine Learning/Project1/CerealSugar1979.txt", header=FALSE, sep="\t")
names(cs1979)[1]<-paste("Cereal")
names(cs1979)[2]<-paste("SugarContent")
cs2006 <- read.csv(file="/Users/Alex/Dropbox/College/4-Senior/Machine Learning/Project1/CerealSugar2006.txt", header=FALSE, sep="\t")
names(cs2006)[1]<-paste("Cereal")
names(cs2006)[2]<-paste("SugarContent")
t.test(cs1979$SugarContent, cs2006$SugarContent)
##
## Welch Two Sample t-test
##
## data: cs1979$SugarContent and cs2006$SugarContent
## t = -0.42044, df = 108.99, p-value = 0.675
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.861317 4.459737
## sample estimates:
## mean of x mean of y
## 26.76452 27.96531
retail = read.table(file="/Users/Alex/Dropbox/College/4-Senior/Machine Learning/Project1/retail.txt", header=TRUE, sep="\t")
grossSalesPredictor <- lm(Gross.Sales~Gross.Cash+Cash.Items+Gross.Check, retail)
grossSalesPredictor
##
## Call:
## lm(formula = Gross.Sales ~ Gross.Cash + Cash.Items + Gross.Check,
## data = retail)
##
## Coefficients:
## (Intercept) Gross.Cash Cash.Items Gross.Check
## -12.92979 0.08669 7.62916 1.16480
From this Linear Model the following equation can be created: \[Gross Sales = -12.93 + 0.087(Gross Cash) + 7.629(Cash Items) + 1.165(Gross Check)\]
kudzu = read.table(file="/Users/Alex/Dropbox/College/4-Senior/Machine Learning/Project1/kudzu.txt", header=TRUE, sep="\t")
by(kudzu$BMD, kudzu$Treatment, sd)
## kudzu$Treatment: Control
## [1] 0.01158735
## --------------------------------------------------------
## kudzu$Treatment: HighDose
## [1] 0.01877105
## --------------------------------------------------------
## kudzu$Treatment: LowDose
## [1] 0.01151066
kudzuModel <- lm(BMD ~ Treatment, data=kudzu)
anova(kudzuModel)
## Analysis of Variance Table
##
## Response: BMD
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 0.0031856 0.00159282 7.7182 0.001397 **
## Residuals 42 0.0086676 0.00020637
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ss06hid = read.csv(file="/Users/Alex/Dropbox/College/4-Senior/Machine Learning/Project1/ss06hid.csv", header=TRUE, sep=",")
length(which(ss06hid$VAL==24))
## [1] 53
country = read.csv(file="/Users/Alex/Dropbox/College/4-Senior/Machine Learning/Project1/Country.txt", header=TRUE, sep=",")
popDensity <- log(country$Population/country$Area)
hist(popDensity, main="Histogram of Logarithm of Population Density", xlab="Log of Population Density")
library(maps)
city = read.csv(file="/Users/Alex/Dropbox/College/4-Senior/Machine Learning/Project1/City.csv", header=TRUE, sep=",")
map()
points(city$Longitude, city$Latitude, col="red", pch=19, cex=.5)