This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
Note: this analysis was performed using the open source software R and Rstudio.
Group 1’s regression analysis on our survey data.
# Load data
df <- read.csv("Group1Survey.csv", header = TRUE, stringsAsFactors = FALSE)
# Remove first two rows (metadata)
df <- df[-c(1,2), ]
# Convert columns to numeric where appropriate
df[] <- lapply(df, function(x) as.numeric(x))
# Check structure
str(df)
## 'data.frame': 16 obs. of 13 variables:
## $ Q1. : num 1 1 1 1 1 1 1 1 1 1 ...
## $ Q2. : num 21 21 21 21 20 31 22 20 22 21 ...
## $ Q3. : num 1 1 2 2 1 1 2 1 1 1 ...
## $ Q4. : num 1 1 2 1 1 1 2 1 1 1 ...
## $ Q5. : num 1 1 1 1 1 1 1 1 1 1 ...
## $ Q6. : num 1 1 2 1 1 1 1 1 2 1 ...
## $ Q7. : num 1 1 1 1 1 2 2 2 1 1 ...
## $ Q8. : num 5 5 5 0 6 7 7 7 6 1 ...
## $ Q9. : num 1 1 3 1 1 1 2 1 2 1 ...
## $ Q10.: num 5 4 10 0 6 2 8 3 7 2 ...
## $ Q11.: num 1 2 2 3 1 2 3 2 1 2 ...
## $ Q12.: num 1 1 1 1 1 1 1 1 2 1 ...
## $ Q13.: num 1 1 1 1 1 1 1 1 1 1 ...
summary(df)
## Q1. Q2. Q3. Q4. Q5.
## Min. :0.0000 Min. :20.00 Min. :1.00 Min. :1.000 Min. :1
## 1st Qu.:1.0000 1st Qu.:21.00 1st Qu.:1.00 1st Qu.:1.000 1st Qu.:1
## Median :1.0000 Median :21.00 Median :1.00 Median :1.000 Median :1
## Mean :0.9375 Mean :21.75 Mean :1.25 Mean :1.125 Mean :1
## 3rd Qu.:1.0000 3rd Qu.:22.00 3rd Qu.:1.25 3rd Qu.:1.000 3rd Qu.:1
## Max. :1.0000 Max. :31.00 Max. :2.00 Max. :2.000 Max. :1
## Q6. Q7. Q8. Q9.
## Min. :1.000 Min. :1.000 Min. :0.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:5.000 1st Qu.:1.000
## Median :1.000 Median :1.000 Median :5.000 Median :1.000
## Mean :1.188 Mean :1.312 Mean :4.938 Mean :1.312
## 3rd Qu.:1.000 3rd Qu.:2.000 3rd Qu.:7.000 3rd Qu.:1.250
## Max. :2.000 Max. :2.000 Max. :8.000 Max. :3.000
## Q10. Q11. Q12. Q13.
## Min. : 0.00 Min. :1.000 Min. :1.000 Min. :1
## 1st Qu.: 2.75 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1
## Median : 5.00 Median :2.000 Median :1.000 Median :1
## Mean : 4.75 Mean :1.875 Mean :1.125 Mean :1
## 3rd Qu.: 6.25 3rd Qu.:2.250 3rd Qu.:1.000 3rd Qu.:1
## Max. :10.00 Max. :3.000 Max. :2.000 Max. :1
df <- na.omit(df)
df$Q3. <- as.factor(df$Q3.)
model <- lm(Q8. ~ Q2. + Q5. + Q6., data = df)
summary(model)
##
## Call:
## lm(formula = Q8. ~ Q2. + Q5. + Q6., data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.8839 -0.2651 0.2797 1.6116 3.2797
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.7792 6.2038 0.126 0.902
## Q2. 0.1637 0.2668 0.613 0.550
## Q5. NA NA NA NA
## Q6. 0.5040 1.6833 0.299 0.769
##
## Residual standard error: 2.628 on 13 degrees of freedom
## Multiple R-squared: 0.03418, Adjusted R-squared: -0.1144
## F-statistic: 0.23 on 2 and 13 DF, p-value: 0.7977
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# Clean data
df_clean <- na.omit(df)
# Create grouped summary
summary_df <- df_clean %>%
group_by(Q6.) %>%
summarise(avg_switch = mean(Q8.))
# Bar graph
ggplot(summary_df, aes(x = factor(Q6.), y = avg_switch)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(
title = "Effect of Gas Prices on Switching Transportation",
x = "Gas Price Impact (Q6)",
y = "Average Likelihood to Switch (Q8)"
) +
theme_minimal()
library(ggplot2)
ggplot(df, aes(x = Q6., y = Q8.)) +
geom_point(color = "blue", alpha = 0.6) +
geom_smooth(method = "lm", color = "red", se = TRUE) +
labs(
title = "Effect of Gas Prices on Switching Transportation",
x = "Impact of Gas Prices (Q6)",
y = "Likelihood to Switch Transportation (Q8)"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
library(dplyr)
library(ggplot2)
# Clean data
df_clean <- na.omit(df)
# Group and summarize
summary_df <- df_clean %>%
group_by(Q6.) %>%
summarise(avg_ev_switch = mean(Q9.))
# Plot
ggplot(summary_df, aes(x = factor(Q6.), y = avg_ev_switch)) +
geom_bar(stat = "identity", fill = "darkgreen") +
labs(
title = "Effect of Gas Prices on Switching to Electric Vehicles",
x = "Gas Price Impact (Q6)",
y = "Average Likelihood to Switch to EV (Q9)"
) +
theme_minimal()
library(ggplot2)
# Clean data
df_clean <- na.omit(df)
# Scatter plot with regression line
ggplot(df_clean, aes(x = Q6., y = Q9.)) +
geom_point(color = "darkgreen", alpha = 0.6) +
geom_smooth(method = "lm", color = "black", se = TRUE) +
labs(
title = "Effect of Gas Prices on Likelihood of Switching to EV",
x = "Gas Price Impact (Q6)",
y = "Likelihood to Switch to EV (Q9)"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'