回归满意度影响

Author

尹俊贺

Quarto

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

Running Code

数据读取

library(readxl)
data <- read_excel("C:/Users/24966/Desktop/Excercise/307676931_按文本_电影文化挪用效果调查问卷_256_256.xlsx")

names(data)[which(names(data) == "3、您对电影《花木兰》的满意度有多高?")] <- "mulan_satisfaction"
names(data)[which(names(data) == "4、您认为电影《花木兰》多大程度上体现出花木兰历史故事的文化内涵?")] <- "mulan_culture"
names(data)[which(names(data) == "5、您认为电影《花木兰》中中国文化元素的含量有多高")] <- "mulan_elements"

花木兰单变量回归

model_1 <- lm(mulan_satisfaction ~ mulan_culture, data = data)
model_2 <- lm(mulan_satisfaction ~ mulan_elements, data = data)

花木兰多元回归

model_3 <- lm(mulan_satisfaction ~ mulan_culture+mulan_elements, data = data)

功夫熊猫单变量回归

names(data)[which(names(data) == "6、您对电影《功夫熊猫》的满意度有多高?")] <- "panda_satisfaction"
names(data)[which(names(data) == "7、您认为电影《功夫熊猫》多大程度上体现出中国功夫的文化内涵")] <- "panda_kungfu"
names(data)[which(names(data) == "8、您认为电影《功夫熊猫》中中国文化元素的含量有多高")] <- "panda_elements"

# 去除缺失值
df <- na.omit(data[, c("panda_satisfaction", "panda_kungfu", "panda_elements")])

# 多元线性回归模型
model_4 <- lm(panda_satisfaction ~ panda_kungfu , data = df)

model_5 <- lm(panda_satisfaction ~ panda_elements, data = df)

功夫熊猫多元回归

model_6 <- lm(panda_satisfaction ~ panda_kungfu+panda_elements , data = df)
# 重命名列(推荐,可避免中文出错)
names(data)[which(names(data) == "9、您接触或学习外来文化的兴趣有多高?")] <- "interest_foreign"
names(data)[which(names(data) == "10、您认为外来文化对社会的重要性如何?")] <- "importance_foreign"
names(data)[which(names(data) == "11、您认为文化交融会促进本土文化的发展吗?")] <- "integration_effect"

# 计算这三列的行平均值,创建新变量
data$culture_openness <- rowMeans(data[, c("interest_foreign", "importance_foreign", "integration_effect")], na.rm = TRUE)

# 查看结果
head(data[, c("interest_foreign", "importance_foreign", "integration_effect", "culture_openness")])
# A tibble: 6 × 4
  interest_foreign importance_foreign integration_effect culture_openness
             <dbl>              <dbl>              <dbl>            <dbl>
1                7                  7                  7             7   
2                4                  4                  4             4   
3                6                  5                  7             6   
4                5                  3                  4             4   
5                6                  6                  7             6.33
6                6                  4                  4             4.67

包容度和满意度回归(功夫熊猫)

model_7 <- lm(panda_satisfaction ~ culture_openness, data = data)
model_8 <- lm(mulan_satisfaction ~ culture_openness, data = data)
summary(model_1)

Call:
lm(formula = mulan_satisfaction ~ mulan_culture, data = data)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.9470 -0.2433  0.0530  0.2011  3.7567 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)    0.68781    0.15024   4.578 7.34e-06 ***
mulan_culture  0.85184    0.03119  27.308  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.7302 on 255 degrees of freedom
Multiple R-squared:  0.7452,    Adjusted R-squared:  0.7442 
F-statistic: 745.7 on 1 and 255 DF,  p-value: < 2.2e-16
summary(model_2)

Call:
lm(formula = mulan_satisfaction ~ mulan_elements, data = data)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.7219 -0.5588  0.1151  0.4412  3.9520 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)     0.53713    0.20107   2.671  0.00804 ** 
mulan_elements  0.83695    0.03988  20.984  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.876 on 255 degrees of freedom
Multiple R-squared:  0.6333,    Adjusted R-squared:  0.6318 
F-statistic: 440.3 on 1 and 255 DF,  p-value: < 2.2e-16
summary(model_3)

Call:
lm(formula = mulan_satisfaction ~ mulan_culture + mulan_elements, 
    data = data)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.2661 -0.2134  0.0352  0.2367  3.3365 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)     0.23349    0.15825   1.475    0.141    
mulan_culture   0.61706    0.04776  12.921  < 2e-16 ***
mulan_elements  0.31577    0.05090   6.204 2.22e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.6818 on 254 degrees of freedom
Multiple R-squared:  0.7787,    Adjusted R-squared:  0.777 
F-statistic: 446.9 on 2 and 254 DF,  p-value: < 2.2e-16
summary(model_4)

Call:
lm(formula = panda_satisfaction ~ panda_kungfu, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-5.5984 -0.2740  0.0638  0.4016  2.7127 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)   1.96288    0.25758   7.621 4.97e-13 ***
panda_kungfu  0.66222    0.04655  14.227  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.8408 on 255 degrees of freedom
Multiple R-squared:  0.4425,    Adjusted R-squared:  0.4403 
F-statistic: 202.4 on 1 and 255 DF,  p-value: < 2.2e-16
summary(model_5)

Call:
lm(formula = panda_satisfaction ~ panda_elements, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-5.5330 -0.2435  0.1118  0.4670  2.4012 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)      2.0198     0.2672   7.558 7.36e-13 ***
panda_elements   0.6447     0.0478  13.487  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.8603 on 255 degrees of freedom
Multiple R-squared:  0.4164,    Adjusted R-squared:  0.4141 
F-statistic: 181.9 on 1 and 255 DF,  p-value: < 2.2e-16
summary(model_6)

Call:
lm(formula = panda_satisfaction ~ panda_kungfu + panda_elements, 
    data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-5.6904 -0.3169  0.0418  0.3625  2.5063 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)     1.56466    0.26465   5.912 1.08e-08 ***
panda_kungfu    0.41163    0.07248   5.679 3.70e-08 ***
panda_elements  0.32062    0.07275   4.407 1.55e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.812 on 254 degrees of freedom
Multiple R-squared:  0.4821,    Adjusted R-squared:  0.478 
F-statistic: 118.2 on 2 and 254 DF,  p-value: < 2.2e-16
summary(model_7)

Call:
lm(formula = panda_satisfaction ~ culture_openness, data = data)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.2634 -0.6128  0.0860  0.6883  2.7848 

Coefficients:
                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)        2.6429     0.3490   7.573 6.71e-13 ***
culture_openness   0.5241     0.0619   8.467 2.02e-15 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9949 on 255 degrees of freedom
Multiple R-squared:  0.2194,    Adjusted R-squared:  0.2164 
F-statistic: 71.68 on 1 and 255 DF,  p-value: 2.015e-15
summary(model_8)

Call:
lm(formula = mulan_satisfaction ~ culture_openness, data = data)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.0855 -0.6374  0.1385  0.9145  3.2587 

Coefficients:
                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)       2.73324    0.49337    5.54 7.53e-08 ***
culture_openness  0.33603    0.08751    3.84 0.000155 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.406 on 255 degrees of freedom
Multiple R-squared:  0.05466,   Adjusted R-squared:  0.05096 
F-statistic: 14.75 on 1 and 255 DF,  p-value: 0.0001553

可视化部分

library(ggplot2)
df$fitted <- fitted(model_7)
df$residuals <- resid(model_7)

ggplot(data, aes(x = culture_openness, y = panda_satisfaction)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", color = "blue") +
  labs(title = "文化开放度对《功夫熊猫》满意度的影响",
       x = "文化开放度 (culture_openness)",
       y = "满意度 (panda_satisfaction)") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

df$fitted <- fitted(model_8)
df$residuals <- resid(model_8)

ggplot(data, aes(x = culture_openness, y = mulan_satisfaction)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", color = "blue") +
  labs(title = "文化开放度对《花木兰》满意度的影响",
       x = "文化开放度 (culture_openness)",
       y = "满意度 (mulan_satisfaction)") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

df$fitted <- fitted(model_5)
df$residuals <- resid(model_5)

ggplot(data, aes(x = panda_elements, y =  panda_satisfaction)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", color = "blue") +
  labs(title = "熊猫元素对功夫熊猫满意度的影响",
       x = "熊猫元素 (culture_openness)",
       y = "满意度 (mulan_satisfaction)") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

df$fitted <- fitted(model_4)
df$residuals <- resid(model_4)

ggplot(data, aes(x = panda_kungfu, y =  panda_satisfaction)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", color = "blue") +
  labs(title = "功夫元素对功夫熊猫满意度的影响",
       x = "功夫元素 (culture_openness)",
       y = "满意度 (mulan_satisfaction)") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

df$fitted <- fitted(model_2)
df$residuals <- resid(model_2)

ggplot(data, aes(x = mulan_elements, y =  mulan_satisfaction)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", color = "blue") +
  labs(title = "木兰元素对花木兰满意度的影响",
       x = "木兰元素 (culture_openness)",
       y = "满意度 (mulan_satisfaction)") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

df$fitted <- fitted(model_1)
df$residuals <- resid(model_1)

ggplot(data, aes(x = mulan_culture, y =  mulan_satisfaction)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", color = "blue") +
  labs(title = "木兰文化对花木兰满意度的影响",
       x = "木兰文化(culture_openness)",
       y = "满意度 (mulan_satisfaction)") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

# 加载库
library(readxl)
library(ggplot2)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
# 读取数据
# 替换掉列名中的中文,简化变量名
names(data)[which(names(data) == "1、您的年龄段是?")] <- "age_group"

# 创建文化开放度变量
data$culture_openness <- rowMeans(data[, c("interest_foreign", "importance_foreign", "integration_effect")], na.rm = TRUE)

# 分类文化开放度为“低中高”
data$openness_level <- cut(
  data$culture_openness,
  breaks = c(-Inf, 4.5, 6.5, Inf),
  labels = c("低", "中", "高")
)

# 生成堆积图数据
plot_data <- data %>%
  filter(!is.na(age_group) & !is.na(openness_level)) %>%
  group_by(age_group, openness_level) %>%
  summarise(count = n(), .groups = "drop")

# 画堆积柱状图
ggplot(plot_data, aes(x = age_group, y = count, fill = openness_level)) +
  geom_bar(stat = "identity") +
  labs(title = "不同年龄段的文化开放度分布",
       x = "年龄段",
       y = "人数",
       fill = "文化开放度") +
  theme_minimal()