R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

Note: this analysis was performed using the open source software R and Rstudio.

Objective

Group 1’s regression analysis on our survey data.

# Load data
df <- read.csv("Group1Survey.csv", header = TRUE, stringsAsFactors = FALSE)

# Remove first two rows (metadata)
df <- df[-c(1,2), ]

# Convert columns to numeric where appropriate
df[] <- lapply(df, function(x) as.numeric(x))

# Check structure
str(df)
## 'data.frame':    16 obs. of  13 variables:
##  $ Q1. : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ Q2. : num  21 21 21 21 20 31 22 20 22 21 ...
##  $ Q3. : num  1 1 2 2 1 1 2 1 1 1 ...
##  $ Q4. : num  1 1 2 1 1 1 2 1 1 1 ...
##  $ Q5. : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ Q6. : num  1 1 2 1 1 1 1 1 2 1 ...
##  $ Q7. : num  1 1 1 1 1 2 2 2 1 1 ...
##  $ Q8. : num  5 5 5 0 6 7 7 7 6 1 ...
##  $ Q9. : num  1 1 3 1 1 1 2 1 2 1 ...
##  $ Q10.: num  5 4 10 0 6 2 8 3 7 2 ...
##  $ Q11.: num  1 2 2 3 1 2 3 2 1 2 ...
##  $ Q12.: num  1 1 1 1 1 1 1 1 2 1 ...
##  $ Q13.: num  1 1 1 1 1 1 1 1 1 1 ...
summary(df) 
##       Q1.              Q2.             Q3.            Q4.             Q5.   
##  Min.   :0.0000   Min.   :20.00   Min.   :1.00   Min.   :1.000   Min.   :1  
##  1st Qu.:1.0000   1st Qu.:21.00   1st Qu.:1.00   1st Qu.:1.000   1st Qu.:1  
##  Median :1.0000   Median :21.00   Median :1.00   Median :1.000   Median :1  
##  Mean   :0.9375   Mean   :21.75   Mean   :1.25   Mean   :1.125   Mean   :1  
##  3rd Qu.:1.0000   3rd Qu.:22.00   3rd Qu.:1.25   3rd Qu.:1.000   3rd Qu.:1  
##  Max.   :1.0000   Max.   :31.00   Max.   :2.00   Max.   :2.000   Max.   :1  
##       Q6.             Q7.             Q8.             Q9.       
##  Min.   :1.000   Min.   :1.000   Min.   :0.000   Min.   :1.000  
##  1st Qu.:1.000   1st Qu.:1.000   1st Qu.:5.000   1st Qu.:1.000  
##  Median :1.000   Median :1.000   Median :5.000   Median :1.000  
##  Mean   :1.188   Mean   :1.312   Mean   :4.938   Mean   :1.312  
##  3rd Qu.:1.000   3rd Qu.:2.000   3rd Qu.:7.000   3rd Qu.:1.250  
##  Max.   :2.000   Max.   :2.000   Max.   :8.000   Max.   :3.000  
##       Q10.            Q11.            Q12.            Q13.  
##  Min.   : 0.00   Min.   :1.000   Min.   :1.000   Min.   :1  
##  1st Qu.: 2.75   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1  
##  Median : 5.00   Median :2.000   Median :1.000   Median :1  
##  Mean   : 4.75   Mean   :1.875   Mean   :1.125   Mean   :1  
##  3rd Qu.: 6.25   3rd Qu.:2.250   3rd Qu.:1.000   3rd Qu.:1  
##  Max.   :10.00   Max.   :3.000   Max.   :2.000   Max.   :1
df <- na.omit(df)

df$Q3. <- as.factor(df$Q3.)

model <- lm(Q8. ~ Q2. + Q5. + Q6., data = df)
summary(model)
## 
## Call:
## lm(formula = Q8. ~ Q2. + Q5. + Q6., data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8839 -0.2651  0.2797  1.6116  3.2797 
## 
## Coefficients: (1 not defined because of singularities)
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   0.7792     6.2038   0.126    0.902
## Q2.           0.1637     0.2668   0.613    0.550
## Q5.               NA         NA      NA       NA
## Q6.           0.5040     1.6833   0.299    0.769
## 
## Residual standard error: 2.628 on 13 degrees of freedom
## Multiple R-squared:  0.03418,    Adjusted R-squared:  -0.1144 
## F-statistic:  0.23 on 2 and 13 DF,  p-value: 0.7977
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

# Clean data
df_clean <- na.omit(df)

# Create grouped summary
summary_df <- df_clean %>%
  group_by(Q6.) %>%
  summarise(avg_switch = mean(Q8.))

# Bar graph
ggplot(summary_df, aes(x = factor(Q6.), y = avg_switch)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(
    title = "Effect of Gas Prices on Switching Transportation",
    x = "Gas Price Impact (Q6)",
    y = "Average Likelihood to Switch (Q8)"
  ) +
  theme_minimal()

library(ggplot2)

ggplot(df, aes(x = Q6., y = Q8.)) +
  geom_point(color = "blue", alpha = 0.6) +
  geom_smooth(method = "lm", color = "red", se = TRUE) +
  labs(
    title = "Effect of Gas Prices on Switching Transportation",
    x = "Impact of Gas Prices (Q6)",
    y = "Likelihood to Switch Transportation (Q8)"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

library(dplyr)
library(ggplot2)

# Clean data
df_clean <- na.omit(df)

# Group and summarize
summary_df <- df_clean %>%
  group_by(Q6.) %>%
  summarise(avg_ev_switch = mean(Q9.))

# Plot
ggplot(summary_df, aes(x = factor(Q6.), y = avg_ev_switch)) +
  geom_bar(stat = "identity", fill = "darkgreen") +
  labs(
    title = "Effect of Gas Prices on Switching to Electric Vehicles",
    x = "Gas Price Impact (Q6)",
    y = "Average Likelihood to Switch to EV (Q9)"
  ) +
  theme_minimal()

library(ggplot2)

# Clean data
df_clean <- na.omit(df)

# Scatter plot with regression line
ggplot(df_clean, aes(x = Q6., y = Q9.)) +
  geom_point(color = "darkgreen", alpha = 0.6) +
  geom_smooth(method = "lm", color = "black", se = TRUE) +
  labs(
    title = "Effect of Gas Prices on Likelihood of Switching to EV",
    x = "Gas Price Impact (Q6)",
    y = "Likelihood to Switch to EV (Q9)"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'