by(iris$Sepal.Width, iris$Species, summary)
## iris$Species: setosa
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.300 3.200 3.400 3.428 3.675 4.400
## --------------------------------------------------------
## iris$Species: versicolor
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 2.525 2.800 2.770 3.000 3.400
## --------------------------------------------------------
## iris$Species: virginica
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.200 2.800 3.000 2.974 3.175 3.800
by(iris\(Sepal.Width, iris\)Species, summary)
cy12<-as.factor(mtcars$cyl)
boxplot(mtcars$mpg~mtcars$cyl, data=mtcars, xlab="Number of Cylinders", ylab="Miles per Gallon")
set.seed(987)
small<-sample(0:6)
large <- replicate(1000, runif(75, min=0, max=6))
sampleMeans <- apply(large, 2, mean)
hist(sampleMeans, main = "Sample Mean Distribution", xlab = "Sample Means")
sd(sampleMeans)
## [1] 0.1976011
mean(sampleMeans)
## [1] 3.002946
Central Theorm limit is proven because the sample mean is around the expected mean and the standard deviation is .19 which is around the expected deviation of .2
geese <- read.csv(file ='C:/Users/Shahid/Desktop/R/Data/geese.txt')
AG<-log(geese$Aestimate)
BG<-log(geese$Bestimate)
par(mfrow=c(1,2))
hist(AG,main = "Aestimate", xlab="Log of Aestimate")
hist(BG, main = "Bestimate", xlab="Log of Bestimate")
CerealSugar1979 <- read.csv("C:/Users/Shahid/Desktop/R/Data/CerealSugar1979.txt", header=FALSE)
CerealSugar2006 <- read.csv("C:/Users/Shahid/Desktop/R/Data/CerealSugar2006.txt", header=FALSE)
names(CerealSugar1979)<-c("Cereal", "SugarContent")
names(CerealSugar2006)<-c("Cereal", "SugarContent")
Sugar1979<-(CerealSugar1979[ ,2])
Sugar2006<-(CerealSugar2006[ ,2])
t.test(Sugar1979,Sugar2006,var.equal=TRUE, alternative="two.sided",mu=0,paired=FALSE, conf.level = .95)
##
## Two Sample t-test
##
## data: Sugar1979 and Sugar2006
## t = -0.40946, df = 109, p-value = 0.683
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -7.013194 4.611614
## sample estimates:
## mean of x mean of y
## 26.76452 27.96531
P value equals .683 so we can reject the null hypthoesis. This means that the sugar level has not changed much.
retail <- read.delim("C:/Users/Shahid/Desktop/R/Data/retail.txt")
lm(formula= retail$Gross.Sales ~ retail$Gross.Cash + retail$Cash.Items + retail$Gross.Check, data = retail)
##
## Call:
## lm(formula = retail$Gross.Sales ~ retail$Gross.Cash + retail$Cash.Items +
## retail$Gross.Check, data = retail)
##
## Coefficients:
## (Intercept) retail$Gross.Cash retail$Cash.Items
## -12.92979 0.08669 7.62916
## retail$Gross.Check
## 1.16480
Gross sales = -12.92979+.08669+7.62916+1.16480
kudzu <- read.csv("C:/Users/Shahid/Desktop/R/Data/kudzu.txt", sep="")
kudzu <- by(kudzu$BMD, kudzu$Treatment, sd)
kudzu
## kudzu$Treatment: Control
## [1] 0.01158735
## --------------------------------------------------------
## kudzu$Treatment: HighDose
## [1] 0.01877105
## --------------------------------------------------------
## kudzu$Treatment: LowDose
## [1] 0.01151066
kudzu <- read.csv("C:/Users/Shahid/Desktop/R/Data/kudzu.txt", sep="")
kudzu1<-lm(formula = kudzu$BMD~kudzu$Treatment, data= kudzu)
anova(kudzu1)
## Analysis of Variance Table
##
## Response: kudzu$BMD
## Df Sum Sq Mean Sq F value Pr(>F)
## kudzu$Treatment 2 0.0031856 0.00159282 7.7182 0.001397 **
## Residuals 42 0.0086676 0.00020637
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
STD is larger than one meaning that a one way ANOVA can happen. P value is .001397 which reject the null hypothesis. There is signifcant evidence that treatments levels have an effect on the BMD.
Random <- read.csv("C:/Users/Shahid/Desktop/R/Data/Random.csv", header=FALSE)
sum(Random$V37==24)
## [1] 53
library(readxl)
MONDIAL <- read_excel("C:/Users/Shahid/Desktop/R/Data/exporteda.xlsx")
Data<-log(MONDIAL$Population/MONDIAL$Area)
hist(Data, xlab= "Log of Population Density", main = "Population Density")
City <- read_excel("C:/Users/Shahid/Desktop/R/Data/City.xlsx")
library(maps)
map(database="world")
longitude <- (City$Longitude)
latitude <- (City$Latitude)
points(City$Longitude, City$Latitude, col="blue", cex= .4)