getwd() #see the directory
## [1] "I:/Varsity DOCU/WM ASDS/Semester 3/WMASDS03 Statistical Infarence/Code"
#setwd() #set the directory
#######putting values for a variable/ in a array
x<-3
class (x); str(x)
## [1] "numeric"
## num 3
x=3
str(x) ; class (x)
## num 3
## [1] "numeric"
######two types of observations: string/factor OR integers/numbers
x<-"factors"
x
## [1] "factors"
class(x); str(x)
## [1] "character"
## chr "factors"
x1<-1:10; x2<-c(1:10); x3<-c(1,10)
x1; x2; x3
## [1] 1 2 3 4 5 6 7 8 9 10
## [1] 1 2 3 4 5 6 7 8 9 10
## [1] 1 10
class(x1); str(x1)
## [1] "integer"
## int [1:10] 1 2 3 4 5 6 7 8 9 10
as.numeric(x1)
## [1] 1 2 3 4 5 6 7 8 9 10
class(x1); str(x1)
## [1] "integer"
## int [1:10] 1 2 3 4 5 6 7 8 9 10
#######4 arithmatic operations+; -; *; /
######square/cube etcsqrt; x^2; x^3
Let’s make a 2 X 3 matrix:
xx<-1:6
mat<-matrix(xx, 2,3)
mat1<-matrix(xx, 2,3,byrow=TRUE)
class(mat);str(mat)
## [1] "matrix" "array"
## int [1:2, 1:3] 1 2 3 4 5 6
Let’s make a 2 X 3 matrix:
x1<-1:3;x2<-4:6
mat<-rbind(x1,x2)
class(mat);str(mat)
## [1] "matrix" "array"
## int [1:2, 1:3] 1 4 2 5 3 6
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:2] "x1" "x2"
## ..$ : NULL
x<-1:3
as.matrix(x)
## [,1]
## [1,] 1
## [2,] 2
## [3,] 3
class(x);str(x)
## [1] "integer"
## int [1:3] 1 2 3
Let’s consider a buit in data of R: iris
data<-iris
class(data); str(data); dim(data)
## [1] "data.frame"
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
## [1] 150 5
#write.table(data,”data.txt”)
summary(data)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
#making histogram for 4 numeric variables separately in separate window
hist(data$Sepal.Length)
hist(data$Sepal.Width)
hist(data$Petal.Length)
hist(data$Petal.Width)
#making histogram for 4 numeric variables separately in same window
par(mfrow=c(2,2))
hist(data$Sepal.Length)
hist(data$Sepal.Width)
hist(data$Petal.Length)
hist(data$Petal.Width)
par(mfrow=c(1,1))
#making histogram for 4 numeric variables separately in same window by LOOPING
ndata <- data[,-5]
par(mfrow=c(2,2))
for(i in 1:ncol(ndata)){
hist(ndata[,i])
}
par(mfrow=c(1,1))
#making pie chart for the categorical variable
table<-table(iris[,5])
table
##
## setosa versicolor virginica
## 50 50 50
pie(table,lmain="Pie Chart of Species")
## Warning in text.default(1.1 * P$x, 1.1 * P$y, labels[i], xpd = TRUE, adj =
## ifelse(P$x < : "lmain" is not a graphical parameter
## Warning in text.default(1.1 * P$x, 1.1 * P$y, labels[i], xpd = TRUE, adj =
## ifelse(P$x < : "lmain" is not a graphical parameter
## Warning in text.default(1.1 * P$x, 1.1 * P$y, labels[i], xpd = TRUE, adj =
## ifelse(P$x < : "lmain" is not a graphical parameter
## Warning in title(main = main, ...): "lmain" is not a graphical parameter
####################3D pie install.packages("plotrix")
#library(plotrix)
#pie3D(table,main="Pie Chart of Species")
plot(table)
prop.table(table)
##
## setosa versicolor virginica
## 0.3333333 0.3333333 0.3333333
===========================================================================================
Lecture 1
Mu (µ) -> N (Population) [We need to analysis it with systematic way to select the sample size] x_bar -> n (Sample) [Must be represent of N]
µ_hat = x_bar µ_hat - x_bar -> minimum [(R1,R2)-> Interval estimate ]
H0: µ_hat = 3.25 H1: µ_hat != 3.25
Hypothesis is an unproven statement.
NUll means their are no different between population mean and sample mean
Data and objective direct what should we do
| Categorical | Count Data |
|---|---|
| Gender | Phone call per day |
Property of point estimator: * Unbiased * Consistency * Efficiency
@ If data comes from normal distribution it’s have a mean @ When we find the distribution then we go to the next step
Unbiased Condition µ_hat - x_bar -> minimum µ_hat - E(x_bar) = 0; Unbiased [if 0 increase that’s means bias high,if 0 goes negative its means low bias ]
Consistency
Efficiency Variation minimum consistency; at the time of comparison which have low variation
Important Distribution Normal,Binomial,Poision, Chi-Square,T-Distribution, F-Distribution
============================================================================================
Lecture 2
xx<-1:4
mat<-matrix(xx, 2,2)
mat
## [,1] [,2]
## [1,] 1 3
## [2,] 2 4
dmat <- as.data.frame(mat)
dmat
## V1 V2
## 1 1 3
## 2 2 4
** If it’s a data frame than we can directly call it, if it’s a matrix then we call data from it’s location. Basically matrix used in Decomposition
=> hist ; Check the distribution of the data
data <- iris
head(data)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
hist(data$Sepal.Length, main="Figure",xlab="Sepal Length", ylab="")
par(mfrow=c(2,2)) #printing 2 row and 2 columns
for (i in 1:4){
hist(data[,i])
}
par(mfrow=c(1,1))
m <- c()
for (i in 1:4){
m[i]<-mean(data[,i])
print(m[i])
}
## [1] 5.843333
## [1] 3.057333
## [1] 3.758
## [1] 1.199333
m <- list()
for (i in 1:4){
m[i]<-mean(data[,i])
print(m[i])
}
## [[1]]
## [1] 5.843333
##
## [[1]]
## [1] 3.057333
##
## [[1]]
## [1] 3.758
##
## [[1]]
## [1] 1.199333
@ For non numeric data
tab <- table(data$Species)
print(tab)
##
## setosa versicolor virginica
## 50 50 50
prop.table(tab) #proportion of the table
##
## setosa versicolor virginica
## 0.3333333 0.3333333 0.3333333
When there are so many category then we skip the pie chart
Moments works with fact
For wide application we need to drive equation
For well judgment we need to understand data set very well
For distribution moments give different types of output
MLE Model What is the chance to happen a event = Probability. How this condition full fill under this condition and how frequently it happen is called MLE
f(x) = 1/Π √(2)σ e^-1/2*((x-µ)/σ)^2 ; -α <= x <= +α
L(x)/L(x;µ,σ) [attempts to find the unknown parameters µ,σ function goes maximum]
Example of MLE:
#install.packages("mle.tools")
library(mle.tools)
## Warning: package 'mle.tools' was built under R version 4.2.3
x <- rnorm(n = 100, mean = 0.0, sd = 1.0)
hist(x)
d<-density(x)
plot(d$x,d$y,col=2)
pdf <- quote(1 / (sqrt(2 * pi) * sigma) * exp(-0.5 / sigma ^ 2 * (x - mu) ^ 2))
lpdf <- quote(- log(sigma) - 0.5 / sigma ^ 2 * (x - mu) ^ 2)
mu.hat <- mean(x)
sigma.hat<-sqrt(var(x))
r<-coxsnell.bc(density = pdf,
logdensity = lpdf,
n = length(x),
parms = c("mu", "sigma"),
mle = c(mu.hat, sigma.hat),
lower = '-Inf',
upper = 'Inf')
print(mu.hat)
## [1] 0.02422493
print(r$mle[1])
## mu
## 0.02422493
#abline(v=mu.hat,col=4)
#abline(v=r$mle[1],col=1)
=============================================================================================
Third Class 26.05.23
set.seed(100) #simple random sampling without replacement
rnorm(5)
## [1] -0.50219235 0.13153117 -0.07891709 0.88678481 0.11697127
set.seed(100) #fixed variable
rnorm(5)
## [1] -0.50219235 0.13153117 -0.07891709 0.88678481 0.11697127
pop <- rnorm(10000,10,1) #population distribution
hist(pop) #Less expensive visualization, Very informal judgmental representation
est1 <- replicate(expr = mean(sample(x=pop,size=5)),n=25000)
est2 <- replicate(expr = mean(sample(x=pop,size=25)),n=25000)
par(mfrow=c(2,2))
hist(est1);hist(est2);hist(pop)
ss <- sample(x=pop,size = 1000) #sample distribution #only taking a sample of size 1000
Checking the properties of a good estimator in R: An Example
# plot density estimate
plot(density(ss),col="green",lwd = 2,ylim = c(0, 2),xlab ="estimates",main = "Sampling Distributions of Unbiased Estimators") #here there are no length for x because population range same
# add density estimate for the distribution of the sample
#mean with n=5 to the plot
lines(density(est1),col="steelblue",lwd = 2)
# add density estimate for the distribution of the sample mean with n=25 to the plot
lines(density(est2),col="red2",lwd = 2)
# add a vertical line at the true parameter
abline(v = 10, lty = 2) #Mu 10 for this that's why we take 10
# add N(10,1)/pop. density to the plot
curve(dnorm(x, mean = 10),lwd = 2,lty = 2,add = T)
############################moments install.packages('moments')
library(moments)
x<-sample(pop,size=100,replace=T)
# Raw Moments/Central Moments
all.moments(x,order.max=4,central=F)
## [1] 1.00000 10.12434 103.38861 1064.75082 11056.53480
hist(x);qqnorm(x);qqline(x,col="red")
###Interval Estimator
pnorm(0.05)
## [1] 0.5199388
#z.test(x=sample,sigma.x = sqrt(var(sample)),conf.level = 0.95)