R Markdown

data() #BAI HOC SO 1 #CAC THAO TAC CO BAN # 1.CAI DAT CAC GOI PHO THONG

install.packages(“tidyverse”)

Goi thu vien can su dung

Ve bieu do ggplot2

library(ggplot2)

Xu ly dau vao và lam sach du lieu dplyr, tidyr, readr (doc csv)

library(dplyr) library(tidyr) library(readr) library(readxl)

Xem du lieu

Doc du lieu

CO2

Xem du lieu

View(CO2)

Xem noi dung truc tiep

head(CO2)

Danh gia chat luong bo du lieu

str(CO2) summary(CO2)

Khai thac du lieu

Chon cac truong can thiet

CO2 %>% select(Type, conc)

Loc theo dieu kien

CO2 %>% filter(conc>500)

——THUC HANH——-

Doc du lieu mau co san

mpg ?mpg

Quan sat cau truc du lieu

head(mpg)

Danh gia ho so du lieu

summary(mpg)

Khai thac du lieu

Chon 1 so cot

mpg$hwy

Loc 1 so dong thoa man dieu kien

mpg %>% filter(hwy>20)

Chi chon 1 so cot thoa man dieu kien

mpg_loc <- mpg %>% select(hwy, cty) %>% filter(hwy>20)

head(mpg_loc)

Ve bieu do

plot(mpg\(displ,mpg\)hwy)

ggplot(data=mpg) + geom_point(mapping = aes(x = displ, y =hwy, color = mpg$class))

###BTVN tim hieu ve bo Titanic, Iris, diamonds


Thong ke so luong moi hang xe

hangxe <- mpg %>% group_by(manufacturer) %>% summarise(soluong = n())

head(hangxe)

View(hangxe)

namsx <- mpg %>% group_by(year) %>% summarise(soluong = n())

head(namsx)

ham table

dem_hx <- table(mpg$manufacturer) dem_hx

hangxe <- mpg %>% group_by(manufacturer) %>% summarise(soluong = n(), tonxang = max (hwy), tietkiem = min (hwy), tb_tieuthu = mean (hwy), trungvi = median(hwy) ) hangxe

loaixe <- mpg %>% group_by(class)%>% summarise(soluong = n(), tonxang = max(hwy), tietkiem = min(hwy), tb_tieuthu = mean(hwy), trungvi = median(hwy))

loaixe

Ve bieu do

Bieu do hieu suat tieu hao nang luong

hist(mpg$hwy)

ggplot(data = mpg) + geom_histogram(mapping = aes(x = hwy))

Bieu do so sanh

ggplot(data=mpg) + geom_bar(mapping = aes(x=class))


Phần 2: Sử dụng dữ liệu tùy biến để phân tích

1.Lấy dữ liệu từ file excel

Superstore <- read_excel(“3.SUPERSTORE.xlsx”, sheet = 1)

head(Superstore)

View(Superstore)

Creditdata <- read_excel(“creditdata.xlsx”, sheet = 1)

Xử lý làm sạch dữ liệu với lỗi NA: chuyển hết NA về 0

credit_data <- Creditdata credit_data[is.na(credit_data)] <- 0

Thay thế dấu cách trống bằng dấu _

names(credit_data) <- gsub(” “,”_“, names(credit_data))

View(credit_data)

Thống kê

theonhom <- credit_data %>% group_by(Term) %>% summarise(soluong = n())

theonhom

Bieu do

ggplot(data = credit_data) + geom_bar(mapping = aes(x = Home_Ownership, color = Term)