This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

# 1. 라이브러리 로드
library(readxl)
library(tidyverse)
library(cluster)
library(factoextra)

# 2. 데이터 불러오기 (파일 이름은 본인 것으로 수정!)
file_path <- "investment.xlsx"
df <- read_excel(file_path)

# ==========================================================
# 긴 한글 이름 대신 '순서(번호)'로 이름 바꾸기
# ==========================================================
# 구글 폼 순서가 바뀌지 않았다면 이 코드가 가장 확실합니다.
# (혹시 모르니 실행 후 colnames(df)로 확인해보세요!)

# 투자 행동 변수 (10, 11, 12번째 열)
colnames(df)[10] <- "Risk_Asset_Ratio" # 2-3. 위험자산 비중
colnames(df)[11] <- "Short_Term_Ratio" # 2-4. 단타 비중
colnames(df)[12] <- "Leverage"         # 2-5. 레버리지 여부

# 한탕주의 성향 (O1~O6: 39~44번째 열)
colnames(df)[39:44] <- c("O1", "O2", "O3", "O4", "O5", "O6")

# 불안감/박탈감 (A1~A5: 45~49번째 열)
colnames(df)[45:49] <- c("A1", "A2", "A3", "A4", "A5")

# 확인 사살: 이름이 잘 바뀌었는지 출력해보기
print(colnames(df)[c(10:12, 39:49)])

# ==========================================================
# 3. 데이터 전처리 (점수화)
# ==========================================================
data_processed <- df %>%
  mutate(
    # 1) 위험자산 & 단타 비중: 문자를 숫자로 (중앙값)
    Risk_Score = case_when(
      str_detect(Risk_Asset_Ratio, "0~10") ~ 5,
      str_detect(Risk_Asset_Ratio, "10~20") ~ 15,
      str_detect(Risk_Asset_Ratio, "20~30") ~ 25,
      str_detect(Risk_Asset_Ratio, "30~40") ~ 35,
      str_detect(Risk_Asset_Ratio, "40~50") ~ 45,
      str_detect(Risk_Asset_Ratio, "50~60") ~ 55,
      str_detect(Risk_Asset_Ratio, "60~70") ~ 65,
      str_detect(Risk_Asset_Ratio, "70~80") ~ 75,
      str_detect(Risk_Asset_Ratio, "80~90") ~ 85,
      str_detect(Risk_Asset_Ratio, "90~100") ~ 95,
      TRUE ~ 5 # 결측치나 예외는 최소값 처리
    ),
    Short_Score = case_when(
      str_detect(Short_Term_Ratio, "0~10") ~ 5,
      str_detect(Short_Term_Ratio, "10~20") ~ 15,
      str_detect(Short_Term_Ratio, "20~30") ~ 25,
      str_detect(Short_Term_Ratio, "30~40") ~ 35,
      str_detect(Short_Term_Ratio, "40~50") ~ 45,
      str_detect(Short_Term_Ratio, "50~60") ~ 55,
      str_detect(Short_Term_Ratio, "60~70") ~ 65,
      str_detect(Short_Term_Ratio, "70~80") ~ 75,
      str_detect(Short_Term_Ratio, "80~90") ~ 85,
      str_detect(Short_Term_Ratio, "90~100") ~ 95,
      TRUE ~ 5
    ),
    # 2) 레버리지 점수화
    Lev_Score = case_when(
      str_detect(Leverage, "전혀") ~ 0,
      str_detect(Leverage, "예전") ~ 1,
      str_detect(Leverage, "최근") ~ 2,
      str_detect(Leverage, "현재") ~ 3,
      TRUE ~ 0
    ),
    # 3) 평균 점수 계산 (rowMeans 함수 이용)
    O_Mean = rowMeans(select(., O1:O6), na.rm = TRUE),
    A_Mean = rowMeans(select(., A1:A5), na.rm = TRUE)
  )

# ==========================================================
# 4. 군집 분석 및 시각화
# ==========================================================
# 데이터 준비
cluster_data <- data_processed %>%
  select(Risk_Score, Short_Score, Lev_Score, O_Mean) %>%
  drop_na() %>%
  scale()

# 군집 분석 실행 (K=3)
set.seed(42)
kmeans_result <- kmeans(cluster_data, centers = 3, nstart = 25)

# [그래프] 군집 시각화
fviz_cluster(kmeans_result, data = cluster_data,
             geom = "point", ellipse.type = "convex",
             ggtheme = theme_minimal(),
             main = "2030 투자자 유형 군집 분석")

# [추가적 변수 생성]
# --- [여기서부터 군집 분석 재실행] ---
# 분석용 데이터 준비
cluster_data_temp <- data_processed %>%
  select(Risk_Score, Short_Score, Lev_Score, O_Mean) %>%
  drop_na() %>%
  scale()

# K-means 실행 (K=3)
set.seed(42)
kmeans_result <- kmeans(cluster_data_temp, centers = 3, nstart = 25)

# 중요!! 결과를 원본 데이터에 다시 붙여줍니다 (이게 없어서 에러가 난 겁니다)
data_processed$Cluster <- as.factor(kmeans_result$cluster)
# -----------------------------------

cluster_summary <- data_processed %>%
  group_by(Cluster) %>%
  summarise(
    Risk = mean(Risk_Score, na.rm=TRUE),       # 위험자산 비중
    ShortTerm = mean(Short_Score, na.rm=TRUE), # 단타 비중
    Leverage = mean(Lev_Score, na.rm=TRUE) * 20, # 빚투 성향 (그래프 높이 맞춤)
    HanTang = mean(O_Mean, na.rm=TRUE) * 20      # 한탕주의 (그래프 높이 맞춤)
  ) %>%
  ungroup() %>% # 그룹 해제 (오류 방지)
  pivot_longer(
    cols = c("Risk", "ShortTerm", "Leverage", "HanTang"), # 컬럼명 직접 지정
    names_to = "Variable",
    values_to = "Value"
  )

Including Plots

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

2030세대 한탕주의 투자 문화 분석 보고서

R Markdown

Including Plots