knitr::opts_chunk$set(
echo = TRUE,
message = FALSE,
warning = FALSE
)
knitr::opts_knit$set(root.dir = normalizePath(".."))Introdução
Eu resolvi fazer a análise em Markdown e fazer um upload no RPubs para todos irmos acompanhando. Assim, eu posso descrever um pouco mais do que eu estou fazendo e vocês podem reproduzir as análises.
Importando os dados
library(readxl)
dd <- read_excel("/Users/lnicolao/Meu Drive/GDrive/UFRGS/Pesquisa/SES Experiential Purchases/Survey_CloudResearch/Experience Inquiry_1 Fapergs_July 11, 2023_15.33.xlsx",
col_names = FALSE, skip = 2)
names_dd <- read_excel("/Users/lnicolao/Meu Drive/GDrive/UFRGS/Pesquisa/SES Experiential Purchases/Survey_CloudResearch/Experience Inquiry_1 Fapergs_July 11, 2023_15.33.xlsx",
col_names = FALSE)[1,]
names(dd) <- names_dd
# Antes de ir adiante, eu preciso filtrar as pessoas que falharam a manipulação ao não escrever nada ou texto inapropriado
ds <- subset(dd, dd$Rejected==0)Caracterização do Respondente
Embora esse não seja um estudo descritivo, não custa explorar um pouco os dados e fazer uma caracterização da amostra.
require(knitr)
require(questionr)
ds$Sex.f <- factor(ds$Sex, levels = c(1,2,3), labels = c("Male","Female","Other"))
t.genero <- freq(ds$Sex.f, digits = 2, exclude = NA, total = T)
kable(t.genero, caption = "Distribuição de Respondentes por Gênero")| n | % | |
|---|---|---|
| Male | 108 | 40.00 |
| Female | 158 | 58.52 |
| Other | 4 | 1.48 |
| Total | 270 | 100.00 |
require(ggsci)
require(ggplot2)
ggplot(t.genero[-4,], aes(x="", y=`%`,fill=rownames(t.genero[-4,]), label = round(`%`,2)))+
geom_bar(width = 1, stat = "identity")+
geom_text(position = position_stack(vjust = 0.5),size = 5)+
scale_fill_npg() +
labs(x=NULL,fill="Sexo",y="%",title="Gênero dos Respondentes")+
coord_polar("y", start=0)A maior parte das respondentes se identifica como do sexo feminino.
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 270 39.9 14.24 37.5 38.75 14.08 17 78 61 0.66 -0.23 0.87
ggplot(ds, aes(x=Age_1)) +
geom_histogram(aes(y=..density..),binwidth = 1, color="grey",fill="grey")+
geom_density(alpha=.2, fill="green")+
geom_vline(aes(xintercept=mean(Age_1,na.rm=T)),color="blue",linetype="dashed",size=1)+
labs(title="Idade dos Respondentes",y=NULL,x="Idade") A
média de idade dos reposndentes é de 39.9 anos de idade, com um
desvio-padrão de 14.24 e a distribuição parece que “skew” para os
jovens.
ds$Ethnic.f <- factor(ds$Ethnic, levels = c(0,1,2,3,4,5,6),
labels = c("American Indian", "East Asian", "South Asian", "Black", "Hispanic","White", "Other"))
t.ethnic <- freq(ds$Ethnic.f, digits = 2, exclude = NA, total = T)
kable(t.ethnic, caption = "Distribuição de Respondentes por Etnia")| n | % | |
|---|---|---|
| American Indian | 1 | 0.37 |
| East Asian | 5 | 1.85 |
| South Asian | 4 | 1.48 |
| Black | 51 | 18.89 |
| Hispanic | 30 | 11.11 |
| White | 179 | 66.30 |
| Other | 0 | 0.00 |
| Total | 270 | 100.00 |
A amostra é, predominantemente, branca.
Renda
Embora renda seja parte da caracterização da amostra, eu resolvi separar a análise porque é uma variável de especial interesse nosso.
#Convertendo a escala de SES "Escada" em uma variável numérica
ds$ladder <- ifelse (ds$sesladder_1 == "On", 1,
ifelse (ds$sesladder_2 == "On", 2,
ifelse (ds$sesladder_3 == "On", 3,
ifelse (ds$sesladder_4 == "On", 4,
ifelse (ds$sesladder_5 == "On", 5,
ifelse (ds$sesladder_6 == "On", 6,
ifelse (ds$sesladder_7 == "On", 7,
ifelse (ds$sesladder_8 == "On", 8,
ifelse (ds$sesladder_9 == "On", 9,
ifelse (ds$sesladder_10 == "On", 10, NA))))))))))
require(questionr)
t.ladder <- freq(ds$ladder, digits = 2, exclude = NA, total = T)
kable(t.ladder, caption = "Distribuição das Posiçoẽs na Escada de SES")| n | % | |
|---|---|---|
| 1 | 21 | 7.78 |
| 2 | 27 | 10.00 |
| 3 | 36 | 13.33 |
| 4 | 23 | 8.52 |
| 5 | 42 | 15.56 |
| 6 | 47 | 17.41 |
| 7 | 34 | 12.59 |
| 8 | 19 | 7.04 |
| 9 | 14 | 5.19 |
| 10 | 7 | 2.59 |
| Total | 270 | 100.00 |
# Tratando a Escada de SES como uma variável intervalar
m.ladder <- psych::describe(ds$ladder)
rownames(m.ladder) <- "Escada SES"
kable(round(m.ladder[,c(2:4)],2), caption = "Média de Colocação na Escada de SES")| n | mean | sd | |
|---|---|---|---|
| Escada SES | 270 | 5.01 | 2.36 |
ds$SESlikert.f <- factor(ds$SESlikert, levels = c(1:5), labels = c("Lower class", "Lower middle class", "Middle class",
"Upper middle class", "Upper class"))
t.SESLikert <- freq(ds$SESlikert.f, digits = 2, exclude = NA, total = T)
kable(t.SESLikert, caption = "Social Economic Class")| n | % | |
|---|---|---|
| Lower class | 38 | 14.07 |
| Lower middle class | 82 | 30.37 |
| Middle class | 110 | 40.74 |
| Upper middle class | 34 | 12.59 |
| Upper class | 6 | 2.22 |
| Total | 270 | 100.00 |
ds$Income.f <- factor(ds$Income, levels = c(1:8), labels = c("$15,000 or less", "$15,001-$25,000", "$25,001-$35,000",
"$35,001-$50,000", "$50,001-$75,000", "$75,001-$100,000",
"$100,001-$150,000", "more than $150,000"))
t.Income <- freq(ds$Income.f, digits = 2, exclude = NA, total = T)
kable(t.Income, caption = "Distribuição de Renda dos Respondentes")| n | % | |
|---|---|---|
| $15,000 or less | 26 | 9.63 |
| $15,001-$25,000 | 31 | 11.48 |
| $25,001-$35,000 | 38 | 14.07 |
| $35,001-$50,000 | 48 | 17.78 |
| $50,001-$75,000 | 52 | 19.26 |
| $75,001-$100,000 | 41 | 15.19 |
| $100,001-$150,000 | 19 | 7.04 |
| more than $150,000 | 15 | 5.56 |
| Total | 270 | 100.00 |
#Fazendo um histograma da Renda, para visualizar a distribuição
ggplot(ds, aes(x=Income)) +
geom_histogram(aes(y=..density..),binwidth = 1, color="lightgrey",fill="lightgrey")+
geom_density(alpha=.2, fill="magenta")+
labs(title="Distribuição das faixas de Renda",y=NULL,x="Renda")Exploração das Variáveis de Medida de Consumo (em $)
n.gastos <- c("Lunch","Movies", "Streaming", "Trips", "Concert", "Museums", "Parties", "Church", "BBQ", "GameEvents",
"BodyCare", "Sports","Games","Drinking","Exercise", "Language", "Creativity", "Conferences", "Therapy", "Career",
"OwnEducation", "RelativeEducation","Healthcare")
m.freq.gasto <- describe(ds[,c(20:42)])
rownames(m.freq.gasto) <- n.gastos
m.freq.gasto.ordered <- m.freq.gasto[order(-m.freq.gasto$mean), ]
kable(round(m.freq.gasto.ordered[,c(3:4)],2), caption = "Média de Frequência de Gastos com Experiências")| mean | sd | |
|---|---|---|
| Streaming | 3.71 | 1.26 |
| Lunch | 3.35 | 1.07 |
| BBQ | 3.28 | 1.09 |
| Trips | 3.27 | 1.08 |
| BodyCare | 3.15 | 1.26 |
| Healthcare | 3.09 | 1.24 |
| Movies | 2.94 | 1.16 |
| Games | 2.93 | 1.30 |
| Exercise | 2.76 | 1.32 |
| Drinking | 2.66 | 1.32 |
| Parties | 2.66 | 1.20 |
| OwnEducation | 2.63 | 1.31 |
| Concert | 2.52 | 1.16 |
| Career | 2.50 | 1.39 |
| Museums | 2.34 | 1.22 |
| GameEvents | 2.31 | 1.28 |
| Sports | 2.29 | 1.25 |
| Church | 2.23 | 1.30 |
| Creativity | 2.16 | 1.29 |
| Conferences | 2.12 | 1.25 |
| Therapy | 2.10 | 1.28 |
| RelativeEducation | 1.93 | 1.25 |
| Language | 1.79 | 1.15 |
m.happyexp <- describe(ds[,c(43:46)])
rownames(m.happyexp) <- c("Happiness with Exp Purchases","Happiness Spending Money on them", "How many dollars", "Like to have more money")
kable(round(m.happyexp[,c(3,4)],2), caption = "Médias de Felicidade com Compras Experienciais")| mean | sd | |
|---|---|---|
| Happiness with Exp Purchases | 3.88 | 1.05 |
| Happiness Spending Money on them | 3.83 | 0.96 |
| How many dollars | 415.78 | 2189.71 |
| Like to have more money | 4.23 | 0.95 |
Parece haver umna variação enorme no gasto com compras experienciais. Vou colocar isso em boxplot.
## [1] 800 1500 34232 2500 1000 1000 800 2000 2000 4500 2000 1100
## [13] 1000 1000 1000 10000 700 800 1000 1000 1200
Quem gastou acima de 700 dólares é considerado um outlier. Teve um respondente que colocou 3.4232^{4} dólares! Claramente, alguém que digitou qualquer número.
Exploração das Variáveis de Medida de Consumo em Tempo
n.tempo <- c("Parks", "Reading", "Music", "Movies", "Chatting", "Instrument", "Games",
"CollectiveSports", "IndividualSports", "FreeConcerts", "Museums", "FamilyParties", "ChurchParties",
"Praying", "Studying", "LearningJob", "LearningFun", "Arts", "Volunteering")
m.exptime <- describe(ds[,c(48:66)])
rownames(m.exptime) <- n.tempo
m.freq.gasto.ordered <- m.freq.gasto[order(-m.freq.gasto$mean), ]
kable(round(m.freq.gasto.ordered[,c(3:4)],2), caption = "Média de Frequência de Gastos com Experiências")| mean | sd | |
|---|---|---|
| Streaming | 3.71 | 1.26 |
| Lunch | 3.35 | 1.07 |
| BBQ | 3.28 | 1.09 |
| Trips | 3.27 | 1.08 |
| BodyCare | 3.15 | 1.26 |
| Healthcare | 3.09 | 1.24 |
| Movies | 2.94 | 1.16 |
| Games | 2.93 | 1.30 |
| Exercise | 2.76 | 1.32 |
| Drinking | 2.66 | 1.32 |
| Parties | 2.66 | 1.20 |
| OwnEducation | 2.63 | 1.31 |
| Concert | 2.52 | 1.16 |
| Career | 2.50 | 1.39 |
| Museums | 2.34 | 1.22 |
| GameEvents | 2.31 | 1.28 |
| Sports | 2.29 | 1.25 |
| Church | 2.23 | 1.30 |
| Creativity | 2.16 | 1.29 |
| Conferences | 2.12 | 1.25 |
| Therapy | 2.10 | 1.28 |
| RelativeEducation | 1.93 | 1.25 |
| Language | 1.79 | 1.15 |
m.exptime.ordered <- m.exptime[order(-m.exptime$mean),]
kable(round(m.exptime.ordered[,c(3:4)],2), caption = "Média de Frequência de Tempo Gasto com Experiências")| mean | sd | |
|---|---|---|
| Music | 4.00 | 0.98 |
| Movies | 3.85 | 0.99 |
| Chatting | 3.58 | 1.13 |
| Games | 3.52 | 1.15 |
| LearningFun | 3.27 | 1.13 |
| Reading | 3.19 | 1.25 |
| FamilyParties | 3.13 | 1.15 |
| Parks | 3.11 | 1.19 |
| Praying | 2.89 | 1.45 |
| Arts | 2.76 | 1.37 |
| LearningJob | 2.71 | 1.36 |
| Studying | 2.55 | 1.36 |
| IndividualSports | 2.48 | 1.38 |
| FreeConcerts | 2.44 | 1.22 |
| ChurchParties | 2.37 | 1.37 |
| Museums | 2.37 | 1.26 |
| Volunteering | 2.36 | 1.22 |
| CollectiveSports | 2.21 | 1.38 |
| Instrument | 2.02 | 1.29 |
m.happytime <- describe(ds[,c(67:70)])
rownames(m.happytime) <- c("Happiness with Exp which spend time","Happiness Spending time on Free", "How many time", "Like to have more time")
kable(round(m.happytime[,c(3,4)],2), caption = "Médias de Felicidade com Compras Experienciais e Tempo")| mean | sd | |
|---|---|---|
| Happiness with Exp which spend time | 4.12 | 0.96 |
| Happiness Spending time on Free | 4.29 | 0.87 |
| How many time | 56.05 | 125.01 |
| Like to have more time | 4.15 | 0.91 |
Assim como no caso do dinheiro, parece haver uma variação muito grande no tempo gasto com experiências.
## [1] 200 400 300 150 400 200 180 200 165 400 300 600 1200 160 200
## [16] 200 1000 460 120 150 168 672 200 300 250
Parece que a partir de 120 horas, as repostas são outliers.
Teste de Relações entre variáveis
# Juntando todas as variáveis de medidas para um teste de correlação
d.gastos <- data.frame(ds[,c(20:46)])
n.gastos <- c("Lunch","Movies", "Streaming", "Trips", "Concert", "Museums", "Parties", "Church", "BBQ", "GameEvents",
"BodyCare", "Sports","Games","Drinking","Exercise", "Language", "Creativity", "Conferences", "Therapy", "Career",
"OwnEducation", "RelativeEducation","Healthcare","Happiness with Exp Purchases","Happiness Spending Money on them",
"How many dollars", "Like to have more money")
names(d.gastos) <- n.gastos
require(corrplot)
cor_matrix_spent <- cor(d.gastos)
corrplot(cor_matrix_spent, type = "upper",
method = "square",
tl.col = "gray", tl.srt = 45, tl.cex = 0.5)d.tempo <- data.frame(ds[,c(48:70)])
n.tempo <- c("Parks", "Reading", "Music", "Movies", "Chatting", "Instrument", "Games",
"CollectiveSports", "IndividualSports", "FreeConcerts", "Museums", "FamilyParties", "ChurchParties",
"Praying", "Studying", "LearningJob", "LearningFun", "Arts", "Volunteering",
"Happiness with Exp which spend time","Happiness Spending time on Free", "How many time", "Like to have more time")
names(d.tempo) <- n.tempo
cor_matrix_time <- cor(d.tempo)
corrplot(cor_matrix_time, type = "upper",
method = "square",
tl.col = "gray", tl.srt = 45, tl.cex = 0.5)Regressões (algumas)
Aqui eu testarei algumas regressões mais óbvias, centrando as variáveis nas suas médias para aumentar a interpretatividade dos resultados quando existirem interações.
Efeitos de gastos individuais na felicidade com compras experienciais.
library(sjPlot)
library(sjmisc)
library(sjlabelled)
require(Hmisc)
label(ds[,c(20:42)]) <- as.list(n.gastos[1:23])
m.gastos.happy <- lm(happyexppurchase ~ expmoney_1 + expmoney_2 +expmoney_3 +expmoney_4 +
expmoney_5 +expmoney_6 +expmoney_7 +expmoney_8 +expmoney_9 +
expmoney_10 +expmoney_11 +expmoney_12 +expmoney_13 +expmoney_14 +
expmoney_15 +expmoney_16 +expmoney_17 +expmoney_18 +expmoney_19 +
expmoney_20 +expmoney_21 +expmoney_22 +expmoney_23, data = ds)
summary(m.gastos.happy)##
## Call:
## lm(formula = happyexppurchase ~ expmoney_1 + expmoney_2 + expmoney_3 +
## expmoney_4 + expmoney_5 + expmoney_6 + expmoney_7 + expmoney_8 +
## expmoney_9 + expmoney_10 + expmoney_11 + expmoney_12 + expmoney_13 +
## expmoney_14 + expmoney_15 + expmoney_16 + expmoney_17 + expmoney_18 +
## expmoney_19 + expmoney_20 + expmoney_21 + expmoney_22 + expmoney_23,
## data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3281 -0.4985 0.1352 0.6047 1.8510
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.014235 0.287381 10.489 <2e-16 ***
## expmoney_1 0.190842 0.072593 2.629 0.0091 **
## expmoney_2 0.030502 0.070426 0.433 0.6653
## expmoney_3 -0.056696 0.057826 -0.980 0.3278
## expmoney_4 -0.019864 0.072403 -0.274 0.7840
## expmoney_5 -0.074636 0.072540 -1.029 0.3045
## expmoney_6 -0.073246 0.073634 -0.995 0.3208
## expmoney_7 -0.053292 0.073461 -0.725 0.4689
## expmoney_8 -0.043355 0.056483 -0.768 0.4435
## expmoney_9 0.086519 0.074144 1.167 0.2444
## expmoney_10 0.106520 0.070930 1.502 0.1344
## expmoney_11 0.089451 0.064154 1.394 0.1645
## expmoney_12 0.018705 0.077895 0.240 0.8104
## expmoney_13 0.073933 0.059557 1.241 0.2156
## expmoney_14 -0.097245 0.063847 -1.523 0.1290
## expmoney_15 0.051373 0.060817 0.845 0.3991
## expmoney_16 0.068714 0.085101 0.807 0.4202
## expmoney_17 -0.088357 0.070621 -1.251 0.2121
## expmoney_18 0.164615 0.072996 2.255 0.0250 *
## expmoney_19 -0.007865 0.058934 -0.133 0.8939
## expmoney_20 -0.023808 0.068424 -0.348 0.7282
## expmoney_21 -0.123787 0.066301 -1.867 0.0631 .
## expmoney_22 -0.083546 0.064293 -1.299 0.1950
## expmoney_23 0.110068 0.061274 1.796 0.0737 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9908 on 246 degrees of freedom
## Multiple R-squared: 0.1791, Adjusted R-squared: 0.1023
## F-statistic: 2.333 on 23 and 246 DF, p-value: 0.0007758
s.m.gastos.happy <- summary(m.gastos.happy)$coefficients
plot_model(m.gastos.happy, terms = rownames(s.m.gastos.happy)[s.m.gastos.happy[,4]<0.05], show.values = TRUE)#comparando os resultados a uma regressão stepwise
require(MASS)
m.gastos.happy.step <- stepAIC(lm(happyexppurchase ~ expmoney_1 + expmoney_2 +expmoney_3 +expmoney_4 +
expmoney_5 +expmoney_6 +expmoney_7 +expmoney_8 +expmoney_9 +
expmoney_10 +expmoney_11 +expmoney_12 +expmoney_13 +expmoney_14 +
expmoney_15 +expmoney_16 +expmoney_17 +expmoney_18 +expmoney_19 +
expmoney_20 +expmoney_21 +expmoney_22 +expmoney_23, data = ds), direction = "both", trace = FALSE)
summary(m.gastos.happy.step)##
## Call:
## lm(formula = happyexppurchase ~ expmoney_1 + expmoney_6 + expmoney_10 +
## expmoney_11 + expmoney_14 + expmoney_18 + expmoney_21 + expmoney_23,
## data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3116 -0.4823 0.1990 0.6212 1.7483
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.98905 0.23502 12.718 < 2e-16 ***
## expmoney_1 0.20848 0.06762 3.083 0.00227 **
## expmoney_6 -0.11594 0.06167 -1.880 0.06123 .
## expmoney_10 0.09797 0.06020 1.627 0.10488
## expmoney_11 0.08157 0.05688 1.434 0.15274
## expmoney_14 -0.10095 0.05501 -1.835 0.06763 .
## expmoney_18 0.15250 0.06587 2.315 0.02137 *
## expmoney_21 -0.15118 0.05563 -2.717 0.00702 **
## expmoney_23 0.10543 0.05518 1.911 0.05715 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9799 on 261 degrees of freedom
## Multiple R-squared: 0.1481, Adjusted R-squared: 0.122
## F-statistic: 5.674 on 8 and 261 DF, p-value: 1.179e-06
s.m.gastos.happy.step <- summary(m.gastos.happy.step)$coefficients
plot_model(m.gastos.happy.step, terms = rownames(s.m.gastos.happy.step)[s.m.gastos.happy.step[,4]<0.05], show.values = TRUE,
title = "Influência das Atividades na Felicidade com Experiências")O resultado do modelo com Stepwise é, previsivelmente, mais parcimonioso. Fazendo a mesma coisa para as outras variáveis dependentes deste bloco.
m.gastos.money.step <- stepAIC(lm(happymoney ~ expmoney_1 + expmoney_2 +expmoney_3 +expmoney_4 +
expmoney_5 +expmoney_6 +expmoney_7 +expmoney_8 +expmoney_9 +
expmoney_10 +expmoney_11 +expmoney_12 +expmoney_13 +expmoney_14 +
expmoney_15 +expmoney_16 +expmoney_17 +expmoney_18 +expmoney_19 +
expmoney_20 +expmoney_21 +expmoney_22 +expmoney_23, data = ds), direction = "both", trace = FALSE)
summary(m.gastos.money.step)##
## Call:
## lm(formula = happymoney ~ expmoney_1 + expmoney_4 + expmoney_7 +
## expmoney_12 + expmoney_14 + expmoney_15 + expmoney_17 + expmoney_21 +
## expmoney_22, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.93353 -0.51815 0.07929 0.60500 2.01593
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.63134 0.21535 12.219 < 2e-16 ***
## expmoney_1 0.12924 0.06043 2.139 0.03339 *
## expmoney_4 0.10052 0.05696 1.765 0.07878 .
## expmoney_7 0.16712 0.05968 2.800 0.00549 **
## expmoney_12 0.08996 0.05859 1.535 0.12588
## expmoney_14 -0.08889 0.05288 -1.681 0.09399 .
## expmoney_15 0.10617 0.05000 2.123 0.03467 *
## expmoney_17 0.07738 0.05353 1.446 0.14950
## expmoney_21 -0.09587 0.05092 -1.883 0.06084 .
## expmoney_22 -0.09369 0.05153 -1.818 0.07022 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8757 on 260 degrees of freedom
## Multiple R-squared: 0.1878, Adjusted R-squared: 0.1597
## F-statistic: 6.681 on 9 and 260 DF, p-value: 1.326e-08
s.m.gastos.money.step <- summary(m.gastos.money.step)$coefficients
plot_model(m.gastos.money.step, terms = rownames(s.m.gastos.money.step)[s.m.gastos.money.step[,4]<0.05], show.values = TRUE,
title = "Influência das Atividades na Felicidade com o Valor gasto com Experiências")
Verificando a influência da variável de gastos com a experiências na
experiência de felicidade
##
## Call:
## lm(formula = happyexppurchase ~ moneyspent, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8981 -0.8700 0.1275 1.1161 1.1295
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.870e+00 6.479e-02 59.724 <2e-16 ***
## moneyspent 2.851e-05 2.912e-05 0.979 0.328
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.046 on 268 degrees of freedom
## Multiple R-squared: 0.003565, Adjusted R-squared: -0.0001534
## F-statistic: 0.9587 on 1 and 268 DF, p-value: 0.3284
##
## Call:
## lm(formula = happymoney ~ moneyspent, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8300 -0.8226 0.1744 0.1853 1.1785
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.821e+00 5.916e-02 64.591 <2e-16 ***
## moneyspent 2.895e-05 2.659e-05 1.089 0.277
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.955 on 268 degrees of freedom
## Multiple R-squared: 0.004404, Adjusted R-squared: 0.0006895
## F-statistic: 1.186 on 1 and 268 DF, p-value: 0.2772
Parece que o dinheiro gasto em experiências não influencia o quanto de felicidade é derivada das experiências. Talvez seja uma questão de existir um efeito moderador da renda. Vou testar isso primeiro com a variável de “escada”
ds$moneyspent.mc <- ds$moneyspent-mean(ds$moneyspent, na.rm = TRUE)
ds$ladder.mc <- ds$ladder-mean(ds$ladder, na.rm = TRUE)
summary(lm(happyexppurchase ~ moneyspent.mc*ladder.mc, data = ds))##
## Call:
## lm(formula = happyexppurchase ~ moneyspent.mc * ladder.mc, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0906 -0.6761 0.1236 0.9506 1.3280
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.875e+00 6.523e-02 59.405 <2e-16 ***
## moneyspent.mc -1.677e-05 9.128e-05 -0.184 0.8544
## ladder.mc 5.449e-02 2.755e-02 1.978 0.0489 *
## moneyspent.mc:ladder.mc 8.085e-06 1.915e-05 0.422 0.6733
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.042 on 266 degrees of freedom
## Multiple R-squared: 0.01808, Adjusted R-squared: 0.007002
## F-statistic: 1.632 on 3 and 266 DF, p-value: 0.1822
##
## Call:
## lm(formula = happymoney ~ moneyspent.mc * ladder.mc, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1911 -0.6486 0.1345 0.5269 1.5164
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.822e+00 5.846e-02 65.377 < 2e-16 ***
## moneyspent.mc -4.901e-05 8.181e-05 -0.599 0.549640
## ladder.mc 9.268e-02 2.469e-02 3.754 0.000214 ***
## moneyspent.mc:ladder.mc 1.396e-05 1.717e-05 0.813 0.416718
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.934 on 266 degrees of freedom
## Multiple R-squared: 0.05473, Adjusted R-squared: 0.04407
## F-statistic: 5.134 on 3 and 266 DF, p-value: 0.001816
Embora não exista interação entre essas variáveis, parece existir um efeito principal da ladder em si. Os dois efeitos são positivos, ou seja, quanto mais “alto o lugar na escada social”, mais felizes com experiências compradas ($) os respondentes estão.
Efeitos de tempo gasto com atividades na felicidade com compras experienciais.
label(ds[,c(48:66)]) <- as.list(n.tempo[1:19])
m.tempo.happy.step <- stepAIC(lm(happytime1 ~ Exptime_1 + Exptime_2 + Exptime_3 + Exptime_4 +
Exptime_5 + Exptime_6 + Exptime_7 + Exptime_8 + Exptime_9 +
Exptime_10 + Exptime_11 + Exptime_12 + Exptime_13 + Exptime_14 +
Exptime_15 + Exptime_16 + Exptime_17 + Exptime_18 + Exptime_19, data = ds),
direction = "both", trace = FALSE)
summary(m.tempo.happy.step)##
## Call:
## lm(formula = happytime1 ~ Exptime_2 + Exptime_3 + Exptime_6 +
## Exptime_12 + Exptime_13 + Exptime_14, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.8377 -0.3311 0.1210 0.6257 1.5913
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.04939 0.27416 11.123 < 2e-16 ***
## Exptime_2 0.07197 0.04889 1.472 0.14221
## Exptime_3 0.12284 0.06195 1.983 0.04842 *
## Exptime_6 -0.10938 0.04755 -2.300 0.02223 *
## Exptime_12 0.13403 0.05403 2.481 0.01374 *
## Exptime_13 0.14774 0.05315 2.780 0.00584 **
## Exptime_14 -0.06979 0.04930 -1.416 0.15808
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9181 on 263 degrees of freedom
## Multiple R-squared: 0.1069, Adjusted R-squared: 0.08649
## F-statistic: 5.245 on 6 and 263 DF, p-value: 4.021e-05
s.m.tempo.happy.step <- summary(m.tempo.happy.step)$coefficients
plot_model(m.tempo.happy.step, terms = rownames(s.m.tempo.happy.step)[s.m.tempo.happy.step[,4]<0.05], show.values = TRUE,
title = "Influência do Tempo Gasto com Atividades na Felicidade com Experiências")m.tempo.feel.step <- stepAIC(lm(happytime2 ~ Exptime_1 + Exptime_2 + Exptime_3 + Exptime_4 +
Exptime_5 + Exptime_6 + Exptime_7 + Exptime_8 + Exptime_9 +
Exptime_10 + Exptime_11 + Exptime_12 + Exptime_13 + Exptime_14 +
Exptime_15 + Exptime_16 + Exptime_17 + Exptime_18 + Exptime_19, data = ds),
direction = "both", trace = FALSE)
summary(m.tempo.feel.step)##
## Call:
## lm(formula = happytime2 ~ Exptime_2 + Exptime_3 + Exptime_4 +
## Exptime_6 + Exptime_9 + Exptime_12 + Exptime_13 + Exptime_14 +
## Exptime_15 + Exptime_17, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.9058 -0.3553 0.1100 0.5642 1.5706
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.84402 0.26680 10.660 <2e-16 ***
## Exptime_2 0.08402 0.04402 1.909 0.0574 .
## Exptime_3 0.12066 0.05784 2.086 0.0379 *
## Exptime_4 0.08054 0.05551 1.451 0.1481
## Exptime_6 -0.08827 0.04489 -1.966 0.0503 .
## Exptime_9 0.07710 0.04313 1.788 0.0750 .
## Exptime_12 0.08576 0.05122 1.675 0.0952 .
## Exptime_13 0.09046 0.04791 1.888 0.0601 .
## Exptime_14 -0.07099 0.04482 -1.584 0.1144
## Exptime_15 -0.09056 0.04544 -1.993 0.0473 *
## Exptime_17 0.09765 0.05426 1.800 0.0731 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8118 on 259 degrees of freedom
## Multiple R-squared: 0.1675, Adjusted R-squared: 0.1354
## F-statistic: 5.213 on 10 and 259 DF, p-value: 5.673e-07
s.m.tempo.feel.step <- summary(m.tempo.feel.step)$coefficients
plot_model(m.tempo.feel.step, terms = rownames(s.m.tempo.feel.step)[s.m.tempo.feel.step[,4]<0.05], show.values = TRUE,
title = "Influência do Tempo Gasto com Atividades na Felicidade")Renda e Felicidade com Experiências Gratuítas
##
## Call:
## lm(formula = happytime2 ~ ladder.mc, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3712 -0.3136 -0.1697 0.7151 0.8303
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.28519 0.05307 80.745 <2e-16 ***
## ladder.mc 0.02878 0.02252 1.278 0.202
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.872 on 268 degrees of freedom
## Multiple R-squared: 0.006056, Adjusted R-squared: 0.002348
## F-statistic: 1.633 on 1 and 268 DF, p-value: 0.2024
##
## Call:
## lm(formula = happytime2 ~ SESlikert, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3079 -0.2918 -0.2597 0.7082 0.7403
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.24369 0.15379 27.595 <2e-16 ***
## SESlikert 0.01605 0.05581 0.288 0.774
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8746 on 268 degrees of freedom
## Multiple R-squared: 0.0003085, Adjusted R-squared: -0.003422
## F-statistic: 0.08269 on 1 and 268 DF, p-value: 0.7739
model.freecost3 <- lm(happytime2 ~ Income, data = ds) #Com a renda declarada
summary(model.freecost3)##
## Call:
## lm(formula = happytime2 ~ Income, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4569 -0.3533 -0.1346 0.6812 0.8654
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.08861 0.12871 31.766 <2e-16 ***
## Income 0.04603 0.02747 1.676 0.095 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8701 on 268 degrees of freedom
## Multiple R-squared: 0.01037, Adjusted R-squared: 0.006675
## F-statistic: 2.808 on 1 and 268 DF, p-value: 0.09498
Novas Análises (Combinadas na Nossa Reunião de Agosto de 2023)
require(psych)
ds$experience.time <- data.frame(ds$Exptime_1, ds$Exptime_2, ds$Exptime_3, ds$Exptime_4, ds$Exptime_5, ds$Exptime_6,
ds$Exptime_7, ds$Exptime_8, ds$Exptime_9, ds$Exptime_10, ds$Exptime_11, ds$Exptime_12,
ds$Exptime_13, ds$Exptime_14, ds$Exptime_15, ds$Exptime_16, ds$Exptime_17, ds$Exptime_18,
ds$Exptime_19)
require(nFactors)
fa.parallel(ds$experience.time)## Parallel analysis suggests that the number of factors = 4 and the number of components = 2
Aparentemente, temos espaço para dois fatores. Vamos rodar uma análise fatorial com uma rotação Varimax para ver quais são esses fatores.
fa.exptime <- factanal(ds$experience.time, factors=2, scores = c("regression"), rotation = "varimax")
print(fa.exptime)##
## Call:
## factanal(x = ds$experience.time, factors = 2, scores = c("regression"), rotation = "varimax")
##
## Uniquenesses:
## ds.Exptime_1 ds.Exptime_2 ds.Exptime_3 ds.Exptime_4 ds.Exptime_5
## 0.662 0.814 0.616 0.720 0.580
## ds.Exptime_6 ds.Exptime_7 ds.Exptime_8 ds.Exptime_9 ds.Exptime_10
## 0.615 0.687 0.515 0.568 0.499
## ds.Exptime_11 ds.Exptime_12 ds.Exptime_13 ds.Exptime_14 ds.Exptime_15
## 0.558 0.636 0.526 0.666 0.621
## ds.Exptime_16 ds.Exptime_17 ds.Exptime_18 ds.Exptime_19
## 0.552 0.575 0.654 0.419
##
## Loadings:
## Factor1 Factor2
## ds.Exptime_1 0.532 0.236
## ds.Exptime_2 0.199 0.383
## ds.Exptime_3 0.619
## ds.Exptime_4 0.527
## ds.Exptime_5 0.223 0.608
## ds.Exptime_6 0.602 0.150
## ds.Exptime_7 0.196 0.524
## ds.Exptime_8 0.660 0.223
## ds.Exptime_9 0.584 0.302
## ds.Exptime_10 0.671 0.226
## ds.Exptime_11 0.652 0.131
## ds.Exptime_12 0.312 0.516
## ds.Exptime_13 0.687
## ds.Exptime_14 0.557 0.155
## ds.Exptime_15 0.488 0.376
## ds.Exptime_16 0.509 0.434
## ds.Exptime_17 0.363 0.541
## ds.Exptime_18 0.452 0.377
## ds.Exptime_19 0.745 0.162
##
## Factor1 Factor2
## SS loadings 4.695 2.823
## Proportion Var 0.247 0.149
## Cumulative Var 0.247 0.396
##
## Test of the hypothesis that 2 factors are sufficient.
## The chi square statistic is 319.25 on 134 degrees of freedom.
## The p-value is 3.75e-17
# Perform factor analysis
library(psych)
fa <- fa(r = ds$experience.time,
nfactors = 2,
rotate = "varimax")
fa.diagram(fa)Criando a nova métrica de Renda (per Shah)
ds$Income.n <- 0
ds$Income.n[ds$Income == 1] <- 15000
ds$Income.n[ds$Income == 2] <- 20000
ds$Income.n[ds$Income == 3] <- 30000
ds$Income.n[ds$Income == 4] <- 42500
ds$Income.n[ds$Income == 5] <- 62500
ds$Income.n[ds$Income == 6] <- 87500
ds$Income.n[ds$Income == 7] <- 125000
ds$Income.n[ds$Income == 8] <- 150000
table(ds$Income.n)##
## 15000 20000 30000 42500 62500 87500 125000 150000
## 26 31 38 48 52 41 19 15
ds$Income.per <- ds$Income.n/sqrt(ds$HH_1)
ds$Income.log <- log(ds$Income.per)
histogram(ds$Income.log)Testando os modelos com essa nova variável de Renda
##
## Call:
## lm(formula = happytime2 ~ Income.log, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3976 -0.3477 -0.1342 0.7044 0.8837
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.08954 0.77385 3.992 8.45e-05 ***
## Income.log 0.11653 0.07524 1.549 0.123
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8708 on 268 degrees of freedom
## Multiple R-squared: 0.00887, Adjusted R-squared: 0.005172
## F-statistic: 2.398 on 1 and 268 DF, p-value: 0.1226
##
## Call:
## lm(formula = happytime2 ~ Income.log * exptime.fa, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4893 -0.3704 0.1474 0.6555 1.1975
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.67799 0.77384 4.753 3.29e-06 ***
## Income.log 0.06003 0.07520 0.798 0.425
## exptime.fa 0.84315 0.73920 1.141 0.255
## Income.log:exptime.fa -0.06343 0.07172 -0.884 0.377
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8523 on 266 degrees of freedom
## Multiple R-squared: 0.0577, Adjusted R-squared: 0.04707
## F-statistic: 5.429 on 3 and 266 DF, p-value: 0.001224
##
## Call:
## lm(formula = happyexppurchase ~ Income.log, data = ds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0378 -0.7147 0.1118 1.0092 1.3054
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.44798 0.92695 2.641 0.00875 **
## Income.log 0.13971 0.09013 1.550 0.12230
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.043 on 268 degrees of freedom
## Multiple R-squared: 0.008886, Adjusted R-squared: 0.005188
## F-statistic: 2.403 on 1 and 268 DF, p-value: 0.1223