library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
library(stringr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(readxl)
path = dir("D:/TKT/RImportData", full.names = TRUE)
data_path = path[str_detect(path, "SME")]
data11 = read.csv("D:/TKT/RImportData/SME2011.xlsx")
data13 = read.csv("D:/TKT/RImportData/SME2013.xlsx")
#Tim tat ca cac file co ten chua "SME" trong file RImportData
str(data_path)
## chr [1:5] "D:/TKT/RImportData/SME2011.xlsx" ...
#Xem file
View(data11)
View(data13)
#Xem qua du lieu cho thay cac cot bien sap xep rat lon xon, khong the ghep file ngay
#Xem su trung khop cua cac cot
dim(data11)
## [1] 2656 120
dim(data13)
## [1] 2575 125
c11 = c(colnames(data11))
c13 = c(colnames(data13))
c = 0
meg = function(colna1, colna2) {
xl = length(colna1)
yl = length(colna2)
if (xl < yl) {l = xl} else {l = yl}
for (i in 15:l) {
x = colna1[i]
y = colna2[i - 1]
if (x == y) { c = c(c, colna1[i])} else {c = c(c, as.character(i))}
}
return(c)
}
meg(c11, c13)
## [1] "0" "CEOethnic" "CEOeducation"
## [4] "CEOprofessional" "CEOexperience" "CEOpartymember"
## [7] "machinetype" "machine3year" "machine3to5year"
## [10] "machine6to10year" "machine11to20year" "machine20year"
## [13] "outsource" "subcontractor" "certificate"
## [16] "advertise" "advertisespent" "salehousehold"
## [19] "saletourist" "salegovern" "saledomestic"
## [22] "salestate" "saleFDI" "saleexport"
## [25] "export" "exporttype" "exportratio"
## [28] "china" "japan" "asean"
## [31] "asian" "us" "eu"
## [34] "russia" "nonasia" "costmarketing"
## [37] "servicepurchase" "investtotal" "investland"
## [40] "investbuilding" "investequip" "investRD"
## [43] "investtrain" "investpatent" "investfirm"
## [46] "investothers" "assets" "debt"
## [49] "labortotal" "manager" "professional"
## [52] "productionworker" "66" "67"
## [55] "networkbusiness1" "networkbusiness2" "networkbank"
## [58] "networkpolitician" "networkother" "networktotal"
## [61] "association" "newproduct" "motivation1"
## [64] "modification" "motivation2" "newprocess"
## [67] "motivation3" "barrierinnovation" "constraintgrowth"
## [70] "governtax" "governloan" "governtraining"
## [73] "governtrade" "governquality" "governother"
## [76] "89" "90" "91"
## [79] "92" "93" "94"
## [82] "95" "96" "97"
## [85] "98" "99" "100"
## [88] "101" "102" "103"
## [91] "104" "105" "106"
## [94] "107" "108" "109"
## [97] "110" "111" "112"
## [100] "113" "114" "115"
## [103] "116" "117" "118"
## [106] "119" "120"
#Hop nhat du lieu
data11_13 = read_excel("D:/TKT/RImportData/SME2011_2013.xlsx")
## Warning in read_fun(path = path, sheet = sheet, limits = limits, shim =
## shim, : Expecting numeric in C1635 / R1635C3: got 'Kh«ng cã th«ng tin'
## Warning in read_fun(path = path, sheet = sheet, limits = limits, shim =
## shim, : Expecting numeric in C4803 / R4803C3: got 'Kh«ng cã th«ng tin'
## Warning in read_fun(path = path, sheet = sheet, limits = limits, shim =
## shim, : Expecting numeric in C5067 / R5067C3: got 'Kh«ng cã th«ng tin'
## Warning in read_fun(path = path, sheet = sheet, limits = limits, shim =
## shim, : Expecting numeric in C5167 / R5167C3: got 'Kh«ng cã th«ng tin'
data11_13 = as.data.frame(data11_13)
#Chuan hoa firmid
colna = colnames(data11_13)
rna = c(4, 4)
dem = 0
dim(data11_13)
## [1] 5229 114
for (i in 2:5228) {
x = data11_13$firmid[i]
y = data11_13$firmid[i + 1]
if (x < y) {
dem = 0
dem = dem + 1
rna = c(rna, y)
} else if (dem < 2) {
dem = dem + 1
rna = c(rna, y)
}
}
#x la data.frame chua firmid chuan hoa
x = data.frame(firmid = rna)
#Chuan hoa data theo firmid, moi firmid xuat hien nhieu nhat 2 lan, neu qua thi xoa
dem = 0
mydata = data11_13
for (i in 2:5228) {
x = data11_13$firmid[i]
y = data11_13$firmid[i + 1]
z = i + 1
if (x < y) {
dem = 0
dem = dem + 1
} else if (dem < 2) {
dem = dem + 1
} else (mydata = mydata[- z, ])
}
dim(mydata) #Co the thay so dong cua mydata trung voi firmid chuan
## [1] 5109 114
#Xem du lieu da duoc xu li
View(mydata)
#Xuat du lieu duoi dang file excel
setwd("D:/")
write_excel_csv(mydata, "SME2011_2013_edited.xlsx")