Q#1: Relation between student’s population and quarterly sales of Pizza
x<-c(2,6,8,8,12,16,20,20,22,26)
y<-c(58,105,88,118,117,137,157,169,149,202)
plot(x,y)
plot(x,y, xlab="student pop",ylab="sales", main="Simple Regression")
lm(y~x)
##
## Call:
## lm(formula = y ~ x)
##
## Coefficients:
## (Intercept) x
## 60 5
summary(lm(y~x))
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.00 -9.75 -3.00 11.25 18.00
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 60.0000 9.2260 6.503 0.000187 ***
## x 5.0000 0.5803 8.617 2.55e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.83 on 8 degrees of freedom
## Multiple R-squared: 0.9027, Adjusted R-squared: 0.8906
## F-statistic: 74.25 on 1 and 8 DF, p-value: 2.549e-05
Q#2:
orion=read.csv("C:/Users/Abdul Qudoos/Downloads/orion1.csv", header=TRUE) #choose
attach(orion)
head(orion)
## car age miles price
## 1 1 5 57000 8500
## 2 2 4 40000 10300
## 3 3 6 77000 7000
## 4 4 5 60000 8200
## 5 5 5 49000 8900
## 6 6 5 47000 9800
model.1=lm(price~age+miles, data=orion)
summary(model.1)
##
## Call:
## lm(formula = price ~ age + miles, data = orion)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1236.4 -524.3 102.8 592.6 1154.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.830e+04 1.135e+03 16.130 2.19e-07 ***
## age -9.504e+02 3.874e+02 -2.453 0.0397 *
## miles -8.215e-02 2.552e-02 -3.219 0.0123 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 880.5 on 8 degrees of freedom
## Multiple R-squared: 0.9361, Adjusted R-squared: 0.9201
## F-statistic: 58.61 on 2 and 8 DF, p-value: 1.666e-05
round((summary(model.1)$coefficients), 5) # to preset outcome with 5 decimals
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18303.52076 1134.76186 16.12983 0.00000
## age -950.42704 387.41888 -2.45323 0.03974
## miles -0.08215 0.02552 -3.21889 0.01226
Q#3:
# Creating a data frame with the provided dataset
data <- data.frame(
Risk = c(12, 24, 13, 56, 28, 51, 18, 31, 37, 15, 22, 36, 15, 48, 15, 36, 8, 34, 3, 37),
Age = c(57, 67, 58, 86, 59, 76, 56, 78, 80, 78, 71, 70, 67, 77, 60, 82, 66, 80, 62, 59),
Pressure = c(152, 163, 155, 177, 196, 189, 155, 120, 135, 98, 152, 173, 135, 209, 199, 119, 166, 125, 117, 207),
Smoker = c("No", "No", "No", "Yes", "No", "Yes", "Yes", "No", "Yes", "No", "No", "Yes", "Yes", "Yes", "No", "Yes", "No", "Yes", "No", "Yes")
)
# Convert Smoker variable to a dummy variable
data$Smoker <- ifelse(data$Smoker == "Yes", 1, 0)
# Display the data
print(data)
## Risk Age Pressure Smoker
## 1 12 57 152 0
## 2 24 67 163 0
## 3 13 58 155 0
## 4 56 86 177 1
## 5 28 59 196 0
## 6 51 76 189 1
## 7 18 56 155 1
## 8 31 78 120 0
## 9 37 80 135 1
## 10 15 78 98 0
## 11 22 71 152 0
## 12 36 70 173 1
## 13 15 67 135 1
## 14 48 77 209 1
## 15 15 60 199 0
## 16 36 82 119 1
## 17 8 66 166 0
## 18 34 80 125 1
## 19 3 62 117 0
## 20 37 59 207 1
# Fit a linear regression model
model <- lm(Risk ~ Age + Pressure + Smoker, data = data)
# Display the summary of the regression model
summary(model)
##
## Call:
## lm(formula = Risk ~ Age + Pressure + Smoker, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.1064 -1.5715 0.4225 3.4855 8.5561
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -91.75950 15.22276 -6.028 1.76e-05 ***
## Age 1.07674 0.16596 6.488 7.49e-06 ***
## Pressure 0.25181 0.04523 5.568 4.24e-05 ***
## Smoker 8.73987 3.00082 2.912 0.0102 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.757 on 16 degrees of freedom
## Multiple R-squared: 0.8735, Adjusted R-squared: 0.8498
## F-statistic: 36.82 on 3 and 16 DF, p-value: 2.064e-07
Q#4:
hiring=read.csv("C:/Users/Abdul Qudoos/Downloads/hire (1).csv")
attach(hiring) # attach data so that variables can be accessed directly
head(hiring) # display first few rows of the data frame
## hire educ exp genderM
## 1 0 6 2 0
## 2 0 4 0 1
## 3 1 6 6 1
## 4 1 6 3 1
## 5 0 4 1 0
## 6 1 8 3 0
model.2=glm(hire~educ+exp+genderM, data=hiring, family = binomial()) # estimate the logistic regression model
summary(model.2) # show the resulting model
##
## Call:
## glm(formula = hire ~ educ + exp + genderM, family = binomial(),
## data = hiring)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -14.2483 6.0805 -2.343 0.0191 *
## educ 1.1549 0.6023 1.917 0.0552 .
## exp 0.9098 0.4293 2.119 0.0341 *
## genderM 5.6037 2.6028 2.153 0.0313 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 35.165 on 27 degrees of freedom
## Residual deviance: 14.735 on 24 degrees of freedom
## AIC: 22.735
##
## Number of Fisher Scoring iterations: 7
Q#5:
permanent=read.csv("C:/Users/Abdul Qudoos/Downloads/permanentJob (1).csv")
attach(permanent) # attach data so that variables can be accessed directly
## The following object is masked _by_ .GlobalEnv:
##
## y
## The following object is masked from orion:
##
## age
head(permanent) # display first few rows of the data frame :
## age edupost edupri edusec w y
## 1 57 0 0 0 0 0
## 2 48 1 0 0 0 1
## 3 38 0 0 0 0 0
## 4 27 0 0 1 0 0
## 5 23 0 0 1 1 0
## 6 22 0 0 1 0 0
model.3=glm(y~age+edupri+edupost+edusec+w, data=permanent, family = binomial()) # estimate the logistic regression model
summary(model.3) # show the resulting model.
##
## Call:
## glm(formula = y ~ age + edupri + edupost + edusec + w, family = binomial(),
## data = permanent)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.60823 0.49164 -5.305 1.13e-07 ***
## age 0.05491 0.01224 4.486 7.26e-06 ***
## edupri 0.20809 0.38280 0.544 0.586718
## edupost 3.56272 1.05540 3.376 0.000736 ***
## edusec 1.12428 0.35429 3.173 0.001507 **
## w 0.14253 0.34831 0.409 0.682392
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 354.71 on 260 degrees of freedom
## Residual deviance: 301.80 on 255 degrees of freedom
## AIC: 313.8
##
## Number of Fisher Scoring iterations: 5
Q#6:
# using hiring data
library(tree)
## Warning: package 'tree' was built under R version 4.3.2
hiring=read.csv("C:/Users/Abdul Qudoos/Downloads/hire (1).csv")
attach(hiring)
## The following objects are masked from hiring (pos = 5):
##
## educ, exp, genderM, hire
head(hiring)
## hire educ exp genderM
## 1 0 6 2 0
## 2 0 4 0 1
## 3 1 6 6 1
## 4 1 6 3 1
## 5 0 4 1 0
## 6 1 8 3 0
hiring$hire=as.factor(hiring$hire) #note dependent variable must be factor
set.seed(100)
hiring_model=tree(hire~educ+exp+genderM, data=hiring) # fit the model
# note deviance (crorss-entropy is the default impurity measure)
plot(hiring_model)
text(hiring_model, pretty=0) # pruning tree
# pruning results in a more manageable and interoperable
prune.tree=prune.tree(hiring_model , best=4) # a 4 leaves tree
plot(prune.tree)
text(prune.tree,pretty =0)