Shaz Assignment

R Markdown

Q#1: Relation between student’s population and quarterly sales of Pizza

 x<-c(2,6,8,8,12,16,20,20,22,26)
 y<-c(58,105,88,118,117,137,157,169,149,202)
 plot(x,y)

 plot(x,y, xlab="student pop",ylab="sales", main="Simple Regression")

 lm(y~x)

## 
## Call:
## lm(formula = y ~ x)
## 
## Coefficients:
## (Intercept)            x  
##          60            5

 summary(lm(y~x))

## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -21.00  -9.75  -3.00  11.25  18.00 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  60.0000     9.2260   6.503 0.000187 ***
## x             5.0000     0.5803   8.617 2.55e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.83 on 8 degrees of freedom
## Multiple R-squared:  0.9027, Adjusted R-squared:  0.8906 
## F-statistic: 74.25 on 1 and 8 DF,  p-value: 2.549e-05

Q#2:

orion=read.csv("C:/Users/Abdul Qudoos/Downloads/orion1.csv", header=TRUE) #choose
attach(orion)
head(orion)

##   car age miles price
## 1   1   5 57000  8500
## 2   2   4 40000 10300
## 3   3   6 77000  7000
## 4   4   5 60000  8200
## 5   5   5 49000  8900
## 6   6   5 47000  9800

model.1=lm(price~age+miles, data=orion)
summary(model.1)

## 
## Call:
## lm(formula = price ~ age + miles, data = orion)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1236.4  -524.3   102.8   592.6  1154.5 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.830e+04  1.135e+03  16.130 2.19e-07 ***
## age         -9.504e+02  3.874e+02  -2.453   0.0397 *  
## miles       -8.215e-02  2.552e-02  -3.219   0.0123 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 880.5 on 8 degrees of freedom
## Multiple R-squared:  0.9361, Adjusted R-squared:  0.9201 
## F-statistic: 58.61 on 2 and 8 DF,  p-value: 1.666e-05

round((summary(model.1)$coefficients), 5) # to preset outcome with 5 decimals

##                Estimate Std. Error  t value Pr(>|t|)
## (Intercept) 18303.52076 1134.76186 16.12983  0.00000
## age          -950.42704  387.41888 -2.45323  0.03974
## miles          -0.08215    0.02552 -3.21889  0.01226

Q#3:

# Creating a data frame with the provided dataset
data <- data.frame(
  Risk = c(12, 24, 13, 56, 28, 51, 18, 31, 37, 15, 22, 36, 15, 48, 15, 36, 8, 34, 3, 37),
  Age = c(57, 67, 58, 86, 59, 76, 56, 78, 80, 78, 71, 70, 67, 77, 60, 82, 66, 80, 62, 59),
  Pressure = c(152, 163, 155, 177, 196, 189, 155, 120, 135, 98, 152, 173, 135, 209, 199, 119, 166, 125, 117, 207),
  Smoker = c("No", "No", "No", "Yes", "No", "Yes", "Yes", "No", "Yes", "No", "No", "Yes", "Yes", "Yes", "No", "Yes", "No", "Yes", "No", "Yes")
)

# Convert Smoker variable to a dummy variable
data$Smoker <- ifelse(data$Smoker == "Yes", 1, 0)

# Display the data
print(data)

##    Risk Age Pressure Smoker
## 1    12  57      152      0
## 2    24  67      163      0
## 3    13  58      155      0
## 4    56  86      177      1
## 5    28  59      196      0
## 6    51  76      189      1
## 7    18  56      155      1
## 8    31  78      120      0
## 9    37  80      135      1
## 10   15  78       98      0
## 11   22  71      152      0
## 12   36  70      173      1
## 13   15  67      135      1
## 14   48  77      209      1
## 15   15  60      199      0
## 16   36  82      119      1
## 17    8  66      166      0
## 18   34  80      125      1
## 19    3  62      117      0
## 20   37  59      207      1

# Fit a linear regression model
model <- lm(Risk ~ Age + Pressure + Smoker, data = data)

# Display the summary of the regression model
summary(model)

## 
## Call:
## lm(formula = Risk ~ Age + Pressure + Smoker, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.1064  -1.5715   0.4225   3.4855   8.5561 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -91.75950   15.22276  -6.028 1.76e-05 ***
## Age           1.07674    0.16596   6.488 7.49e-06 ***
## Pressure      0.25181    0.04523   5.568 4.24e-05 ***
## Smoker        8.73987    3.00082   2.912   0.0102 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.757 on 16 degrees of freedom
## Multiple R-squared:  0.8735, Adjusted R-squared:  0.8498 
## F-statistic: 36.82 on 3 and 16 DF,  p-value: 2.064e-07

Q#4:

hiring=read.csv("C:/Users/Abdul Qudoos/Downloads/hire (1).csv")
attach(hiring) # attach data so that variables can be accessed directly 
head(hiring) # display first few rows of the data frame

##   hire educ exp genderM
## 1    0    6   2       0
## 2    0    4   0       1
## 3    1    6   6       1
## 4    1    6   3       1
## 5    0    4   1       0
## 6    1    8   3       0

 model.2=glm(hire~educ+exp+genderM, data=hiring, family = binomial()) # estimate the logistic regression model 
summary(model.2) # show the resulting model

## 
## Call:
## glm(formula = hire ~ educ + exp + genderM, family = binomial(), 
##     data = hiring)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -14.2483     6.0805  -2.343   0.0191 *
## educ          1.1549     0.6023   1.917   0.0552 .
## exp           0.9098     0.4293   2.119   0.0341 *
## genderM       5.6037     2.6028   2.153   0.0313 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 35.165  on 27  degrees of freedom
## Residual deviance: 14.735  on 24  degrees of freedom
## AIC: 22.735
## 
## Number of Fisher Scoring iterations: 7

Q#5:

permanent=read.csv("C:/Users/Abdul Qudoos/Downloads/permanentJob (1).csv")
attach(permanent) # attach data so that variables can be accessed directly

## The following object is masked _by_ .GlobalEnv:
## 
##     y

## The following object is masked from orion:
## 
##     age

head(permanent) # display first few rows of the data frame :

##   age edupost edupri edusec w y
## 1  57       0      0      0 0 0
## 2  48       1      0      0 0 1
## 3  38       0      0      0 0 0
## 4  27       0      0      1 0 0
## 5  23       0      0      1 1 0
## 6  22       0      0      1 0 0

 model.3=glm(y~age+edupri+edupost+edusec+w, data=permanent, family = binomial()) # estimate the logistic regression model 
summary(model.3) # show the resulting model.

## 
## Call:
## glm(formula = y ~ age + edupri + edupost + edusec + w, family = binomial(), 
##     data = permanent)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.60823    0.49164  -5.305 1.13e-07 ***
## age          0.05491    0.01224   4.486 7.26e-06 ***
## edupri       0.20809    0.38280   0.544 0.586718    
## edupost      3.56272    1.05540   3.376 0.000736 ***
## edusec       1.12428    0.35429   3.173 0.001507 ** 
## w            0.14253    0.34831   0.409 0.682392    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 354.71  on 260  degrees of freedom
## Residual deviance: 301.80  on 255  degrees of freedom
## AIC: 313.8
## 
## Number of Fisher Scoring iterations: 5

Q#6:

# using hiring data 
library(tree)

## Warning: package 'tree' was built under R version 4.3.2

hiring=read.csv("C:/Users/Abdul Qudoos/Downloads/hire (1).csv") 
attach(hiring)

## The following objects are masked from hiring (pos = 5):
## 
##     educ, exp, genderM, hire

head(hiring)

##   hire educ exp genderM
## 1    0    6   2       0
## 2    0    4   0       1
## 3    1    6   6       1
## 4    1    6   3       1
## 5    0    4   1       0
## 6    1    8   3       0

hiring$hire=as.factor(hiring$hire) #note dependent variable must be factor 
set.seed(100) 
hiring_model=tree(hire~educ+exp+genderM, data=hiring) # fit the model
 # note deviance (crorss-entropy is the default impurity measure)
 plot(hiring_model) 
text(hiring_model, pretty=0) # pruning tree

# pruning results in a more manageable and interoperable 
prune.tree=prune.tree(hiring_model , best=4) # a 4 leaves tree 
plot(prune.tree) 
text(prune.tree,pretty =0)

Shaz Assignment

Shaz Salim

2023-12-13

R Markdown