Problem Set 6

library(BayesFactor)

## Warning: package 'BayesFactor' was built under R version 3.6.2

## Loading required package: coda

## Warning: package 'coda' was built under R version 3.6.2

## Loading required package: Matrix

## ************
## Welcome to BayesFactor 0.9.12-4.2. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
## 
## Type BFManual() to open the manual.
## ************

set.seed(1000)
indoor<- sample(c("A","B","C","D"),prob=c(5,8,25,6),
size=200,replace=T)
outdoor <-sample(c("A","B","C","D"),prob=c(5,20,3,6),
size=200,replace=T)
tab1 <- table(indoor,outdoor)
tab1

##       outdoor
## indoor  A  B  C  D
##      A  4 11  3  5
##      B  9 23  1 10
##      C 19 59 10 19
##      D  5 12  2  8

tab2 <- table(c(indoor,outdoor),rep(c("I","O"),each=200))
tab2

##    
##       I   O
##   A  23  37
##   B  43 105
##   C 107  16
##   D  27  42

CHISQ1<-chisq.test(tab1)

## Warning in chisq.test(tab1): Chi-squared approximation may be incorrect

CHISQ1

## 
##  Pearson's Chi-squared test
## 
## data:  tab1
## X-squared = 5.0836, df = 9, p-value = 0.827

CHISQ2<-chisq.test(tab2)
CHISQ2

## 
##  Pearson's Chi-squared test
## 
## data:  tab2
## X-squared = 99.826, df = 3, p-value < 2.2e-16

ContiBF1<-contingencyTableBF(tab1 ,
                   sampleType = "indepMulti", fixedMargin = "cols")
ContiBF1

## Bayes factor analysis
## --------------
## [1] Non-indep. (a=1) : 0.000346898 ±0%
## 
## Against denominator:
##   Null, independence, a = 1 
## ---
## Bayes factor type: BFcontingencyTable, independent multinomial

ContiBF2<-contingencyTableBF(tab2 ,
                   sampleType = "indepMulti", fixedMargin = "cols")

ContiBF2

## Bayes factor analysis
## --------------
## [1] Non-indep. (a=1) : 1.21942e+21 ±0%
## 
## Against denominator:
##   Null, independence, a = 1 
## ---
## Bayes factor type: BFcontingencyTable, independent multinomial

Problem 2:

cardat <- read.table(text="
age gender type origin origin.last carval carval.last
34 F SUV US US 16400 15800
31 M Truck US Europe 16900 16000
47 M Sedan US US 18800 17100
21 F Sedan Japan Japan 16000 15500
42 M SUV US Japan 16800 16100
43 F SUV US US 17200 16300
60 F Truck Europe Europe 19900 17800
37 M Truck Europe Europe 17100 16200
46 F SUV Japan Japan 16900 16300
27 M Sedan US US 16200 15700
50 M SUV US US 18800 17100
64 F SUV Japan US 50700 31700
33 M SUV Japan Japan 16500 15900
39 M Truck US Europe 17000 16200
58 F Sedan Japan US 19400 17500
53 F SUV US Europe 19200 17400
29 F Sedan US Japan 16300 15700
37 F Sedan US US 17300 16300
37 M SUV US Japan 18200 16700
54 F Sedan Japan Japan 24500 19800
46 F SUV Japan Europe 18000 16700
55 F SUV US Japan 28900 21700
46 F Truck US Europe 16600 16100
57 M SUV Europe Europe 24300 19700
40 M SUV US US 16800 16100
27 M Sedan Japan US 16900 16000
58 M SUV Europe Europe 20300 17900
64 M Truck US US 40600 27100
47 M Truck US Europe 18400 16900
32 M Truck US US 15900 15600
43 F Sedan Japan US 17200 16300
66 M Truck Europe Europe 19100 17500
36 F SUV US Japan 16900 16100
68 M Truck US US 69300 40100
54 F Sedan Japan US 17000 16400
64 M Truck Japan Europe 34900 24600
27 M SUV Japan Europe 15800 15500
51 F Sedan Japan Japan 29000 21700
69 M Sedan US Japan 54400 33400
25 F Sedan Japan Japan 15800 15500",header=T)


####Is there an impact of gender on the type of car purchased?

chisq.test(cardat$gender,cardat$type)

## 
##  Pearson's Chi-squared test
## 
## data:  cardat$gender and cardat$type
## X-squared = 6.2934, df = 2, p-value = 0.04299

par(mfrow=c(1,2))
plot(cardat$type,cardat$gender,main="Gender v/s Type Graph", xlab="Car Type", ylab = "Gender")

######Is there a difference in amount paid for a car for men versus women?
#ttestBF(Fgender,cardat$carval)
#ttestBF(Mgender,cardat$carval)

chisq.test(cardat$gender,cardat$carval)

## Warning in chisq.test(cardat$gender, cardat$carval): Chi-squared approximation
## may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  cardat$gender and cardat$carval
## X-squared = 31.98, df = 31, p-value = 0.4177

ttestBF(cardat$gender,cardat$carval)

## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 1.557531e+16 ±0%
## 
## Against denominator:
##   Null, mu1-mu2 = 0 
## ---
## Bayes factor type: BFindepSample, JZS

plot(cardat$gender, cardat$carval, xlab="Gender",ylab="Car Value",main="Gender v/s Car Value")

#######Do people tend to buy vehicles from of the same origin as their last vehicle (US, europe, japan)?

ttestBF(cardat$origin,cardat$origin.last)

## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 1.30642 ±0%
## 
## Against denominator:
##   Null, mu1-mu2 = 0 
## ---
## Bayes factor type: BFindepSample, JZS

chisq.test(cardat$origin,cardat$origin.last)

## Warning in chisq.test(cardat$origin, cardat$origin.last): Chi-squared
## approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  cardat$origin and cardat$origin.last
## X-squared = 12.772, df = 4, p-value = 0.01245

plot(cardat$origin,cardat$type,xlab="Origin Country",ylab="Car Type",main="Car Origin v/s Type")

plot(cardat$origin.last,cardat$type,xlab="Origin Country",ylab="Car Type", main="Previous Car Origin v/s Type")

###########Is there a relationship between driver age and the value of the car?

t.test(cardat$age,cardat$carval)

## 
##  Welch Two Sample t-test
## 
## data:  cardat$age and cardat$carval
## t = -11.989, df = 39, p-value = 1.184e-14
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -26131.95 -18587.20
## sample estimates:
## mean of x mean of y 
##    45.425 22405.000

ttestBF(cardat$age,cardat$carval)

## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 1.413795e+16 ±0%
## 
## Against denominator:
##   Null, mu1-mu2 = 0 
## ---
## Bayes factor type: BFindepSample, JZS

chisq.test(cardat$age,cardat$carval)

## Warning in chisq.test(cardat$age, cardat$carval): Chi-squared approximation may
## be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  cardat$age and cardat$carval
## X-squared = 873.33, df = 837, p-value = 0.1863

plot(cardat$age,cardat$carval,xlab="Age of Driver",ylab="Car Value", main="Age of Driver v/s Car Value")

##################################

###What is your best estimate for the value of a car driven by a 32, 52, and 62-year-old?

Reg_Copma <- lm(cardat$carval ~ cardat$age, data=cardat)
predict(Reg_Copma, newdata=data.frame(num=c(32)),interval="prediction",se.fit=T)

## Warning: 'newdata' had 1 row but variables found have 40 rows

## $fit
##          fit         lwr      upr
## 1  15639.509  -2745.7973 34024.81
## 2  13863.012  -4620.8352 32346.86
## 3  23337.661   5116.2318 41559.09
## 4   7941.357 -11028.4458 26911.16
## 5  20376.833   2143.5166 38610.15
## 6  20968.999   2743.1993 39194.80
## 7  31035.813  12546.4537 49525.17
## 8  17416.005   -893.2717 35725.28
## 9  22745.495   4526.8303 40964.16
## 10 11494.350  -7155.2762 30143.98
## 11 25114.157   6869.0255 43359.29
## 12 33404.475  14747.8793 52061.07
## 13 15047.343  -3368.3247 33463.01
## 14 18600.336    329.0964 36871.58
## 15 29851.482  11431.0446 48271.92
## 16 26890.654   8598.7848 45182.52
## 17 12678.681  -5883.1935 31240.56
## 18 17416.005   -893.2717 35725.28
## 19 17416.005   -893.2717 35725.28
## 20 27482.820   9170.2805 45795.36
## 21 22745.495   4526.8303 40964.16
## 22 28074.985   9739.2442 46410.73
## 23 22745.495   4526.8303 40964.16
## 24 29259.316  10869.6151 47649.02
## 25 19192.502    936.4604 37448.54
## 26 11494.350  -7155.2762 30143.98
## 27 29851.482  11431.0446 48271.92
## 28 33404.475  14747.8793 52061.07
## 29 23337.661   5116.2318 41559.09
## 30 14455.178  -3993.3417 32903.70
## 31 20968.999   2743.1993 39194.80
## 32 34588.806  15834.1578 53343.45
## 33 16823.840  -1508.2599 35155.94
## 34 35773.137  16911.0086 54635.27
## 35 27482.820   9170.2805 45795.36
## 36 33404.475  14747.8793 52061.07
## 37 11494.350  -7155.2762 30143.98
## 38 25706.323   7448.1644 43964.48
## 39 36365.303  17445.9484 55284.66
## 40 10310.019  -8436.9466 29056.98
## 
## $se.fit
##        1        2        3        4        5        6        7        8 
## 1862.124 2086.632 1415.515 2965.676 1452.387 1429.179 2098.514 1669.357 
##        9       10       11       12       13       14       15       16 
## 1406.807 2419.949 1488.153 2433.022 1933.947 1564.363 1944.999 1622.123 
##       17       18       19       20       21       22       23       24 
## 2249.303 1669.357 1669.357 1678.065 1406.807 1738.789 1406.807 1872.684 
##       25       26       27       28       29       30       31       32 
## 1520.450 2419.949 1944.999 2433.022 1415.515 2008.895 1429.179 2610.497 
##       33       34       35       36       37       38       39       40 
## 1729.395 2793.072 1678.065 2433.022 2419.949 1526.640 2885.970 2596.997 
## 
## $df
## [1] 38
## 
## $residual.scale
## [1] 8888.928

plot(cardat$carval,cardat$age,xlab="Car Value",ylab="Age of Driver", main="Car Value v/s Age of Driver")

################Is there a relationship between how much someone paid for their previous car and how much they paid for their current car?


chisq.test(cardat$carval,cardat$carval.last)

## Warning in chisq.test(cardat$carval, cardat$carval.last): Chi-squared
## approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  cardat$carval and cardat$carval.last
## X-squared = 895, df = 744, p-value = 0.0001103

ttestBF(cardat$carval,cardat$carval.last)

## Bayes factor analysis
## --------------
## [1] Alt., r=0.707 : 0.9442107 ±0%
## 
## Against denominator:
##   Null, mu1-mu2 = 0 
## ---
## Bayes factor type: BFindepSample, JZS

t.test(cardat$carval,cardat$carval.last)

## 
##  Welch Two Sample t-test
## 
## data:  cardat$carval and cardat$carval.last
## t = 1.8048, df = 54.796, p-value = 0.0766
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -409.3724 7819.3724
## sample estimates:
## mean of x mean of y 
##     22405     18700

#################Did people tend to pay more for their current car than their previous car?
t.test(cardat$carval,cardat$carval.last)

## 
##  Welch Two Sample t-test
## 
## data:  cardat$carval and cardat$carval.last
## t = 1.8048, df = 54.796, p-value = 0.0766
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -409.3724 7819.3724
## sample estimates:
## mean of x mean of y 
##     22405     18700

plot(cardat$carval,cardat$carval.last,xlab="Current Car Value",ylab="Previous Car Value",main="Current Car Value v/s Previous Car Value")

################Did trucks cost more than SUVs?

TruckSet<-subset(cardat, cardat$type == "Truck")
print(TruckSet)

##    age gender  type origin origin.last carval carval.last
## 2   31      M Truck     US      Europe  16900       16000
## 7   60      F Truck Europe      Europe  19900       17800
## 8   37      M Truck Europe      Europe  17100       16200
## 14  39      M Truck     US      Europe  17000       16200
## 23  46      F Truck     US      Europe  16600       16100
## 28  64      M Truck     US          US  40600       27100
## 29  47      M Truck     US      Europe  18400       16900
## 30  32      M Truck     US          US  15900       15600
## 32  66      M Truck Europe      Europe  19100       17500
## 34  68      M Truck     US          US  69300       40100
## 36  64      M Truck  Japan      Europe  34900       24600

SUVSet<-subset(cardat, cardat$type == "SUV")

mean(SUVSet$carval)

## [1] 20731.25

mean(TruckSet$carval)

## [1] 25972.73

t.test(TruckSet$carval,SUVSet$carval)

## 
##  Welch Two Sample t-test
## 
## data:  TruckSet$carval and SUVSet$carval
## t = 0.96255, df = 13.809, p-value = 0.3523
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -6452.967 16935.922
## sample estimates:
## mean of x mean of y 
##  25972.73  20731.25

plot(cardat$type,cardat$carval, xlab="Car Type", ylab="Car Value",main="Current Car Type v/s Car Value")

##############################
mean(SUVSet$carval.last)

## [1] 17937.5

mean(TruckSet$carval.last)

## [1] 20372.73

t.test(TruckSet$carval.last,SUVSet$carval.last)

## 
##  Welch Two Sample t-test
## 
## data:  TruckSet$carval.last and SUVSet$carval.last
## t = 0.97615, df = 13.873, p-value = 0.3457
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2920.027  7790.481
## sample estimates:
## mean of x mean of y 
##  20372.73  17937.50

plot(cardat$type,cardat$carval.last, xlab="Car Type", ylab="Car Value",main="Previous Car Type v/s Car Value")

Problem Set 6

Sneha Nimmagadda

20 October 2018