#Create data frame
df <- data.frame(
Category = c("Electronics", "Electronics", "Electronics", "Electronics", "Home Appliances", "Home Appliances", "Home Appliances", "Home Appliances", "Clothing", "Clothing", "Clothing", "Clothing", "Clothing", "Books", "Books", "Books", "Books", "Sports Equipment", "Sports Equipment", "Sports Equipment" ),
Item_Name = c("Smartphone", "Smartphone", "Laptop", "Laptop", "Refrigerator", "Refrigerator", "Washing Machine", "Washing Machine", "T-Shirt", "T-Shirt", "T-Shirt", "Jeans", "Jeans", "Fiction Novel", "Fiction Novel", "Non-Fiction Guide", "Non-Fiction Guide", "Basketball", "Tennis Racket", "Tennis Racket"),
Item_ID = c("101", "101", "102", "102", "201", "201", "202", "202", "301", "301", "301", "302", "302", "401", "401", "402", "402", "501", "502", "502"),
Brand = c("TechBrand", "TechBrand", "CompuBrand", "CompuBrand", "HomeCool", "HomeCool", "CleanTech", "CleanTech", "FashionCo", "FashionCo", "FashionCo", "DenimWorks", "DenimWorks", "-", "-", "-", "-", "SportsGear", "RacketPro", "RacketPro"),
Price = c("699.99", "699.99", "1099.99", "1099.99", "899.99", "899.99", "499.99", "499.99", "19.99", "19.99", "19.99", "49.99", "49.99", "14.99", "14.99", "24.99", "24.99", "29.99", "89.99", "89.99"),
Variation_ID = c("101-A", "101-B", "102-A", "102-B", "201-A", "201-B", "202-A", "202-B", "301-A", "301-B", "301-C", "302-A", "302-B", "401-A", "401-B", "402-A", "402-B", "501-A", "502-A", "502-B"),
Variation_Details = c("Color: Black, Storage: 64GB", "Color: White, Storage: 128GB", "Color: Silver, Storage: 256GB", "Color: Space Gray, Storage: 512GB", "Color: Stainless Steel, Capacity: 20 cu ft", "Color: White, Capacity: 18 cu ft", "Type: Front Load, Capacity: 4.5 cu ft", "Type: Top Load, Capacity: 5.0 cu ft", "Color: Blue, Size: S", "Color: Red, Size: M", "Color: Green, Size: L", "Color: Dark Blue, Size: 32", "Color: Light Blue, Size: 34", "Format: Hardcover, Language: English", "Format: Paperback, Language: Spanish", "Format: eBook, Language: English", "Format: Paperback, Language: French", "Size: Size 7, Color: Orange", "Material: Graphite, Color: Black", "Material: Aluminum, Color: Silver"))
head(df)
## Category Item_Name Item_ID Brand Price Variation_ID
## 1 Electronics Smartphone 101 TechBrand 699.99 101-A
## 2 Electronics Smartphone 101 TechBrand 699.99 101-B
## 3 Electronics Laptop 102 CompuBrand 1099.99 102-A
## 4 Electronics Laptop 102 CompuBrand 1099.99 102-B
## 5 Home Appliances Refrigerator 201 HomeCool 899.99 201-A
## 6 Home Appliances Refrigerator 201 HomeCool 899.99 201-B
## Variation_Details
## 1 Color: Black, Storage: 64GB
## 2 Color: White, Storage: 128GB
## 3 Color: Silver, Storage: 256GB
## 4 Color: Space Gray, Storage: 512GB
## 5 Color: Stainless Steel, Capacity: 20 cu ft
## 6 Color: White, Capacity: 18 cu ft
#JSON
json <- toJSON(x = df, dataframe = 'rows', pretty = T)
head(json)
## [
## {
## "Category": "Electronics",
## "Item_Name": "Smartphone",
## "Item_ID": "101",
## "Brand": "TechBrand",
## "Price": "699.99",
## "Variation_ID": "101-A",
## "Variation_Details": "Color: Black, Storage: 64GB"
## },
## {
## "Category": "Electronics",
## "Item_Name": "Smartphone",
## "Item_ID": "101",
## "Brand": "TechBrand",
## "Price": "699.99",
## "Variation_ID": "101-B",
## "Variation_Details": "Color: White, Storage: 128GB"
## },
## {
## "Category": "Electronics",
## "Item_Name": "Laptop",
## "Item_ID": "102",
## "Brand": "CompuBrand",
## "Price": "1099.99",
## "Variation_ID": "102-A",
## "Variation_Details": "Color: Silver, Storage: 256GB"
## },
## {
## "Category": "Electronics",
## "Item_Name": "Laptop",
## "Item_ID": "102",
## "Brand": "CompuBrand",
## "Price": "1099.99",
## "Variation_ID": "102-B",
## "Variation_Details": "Color: Space Gray, Storage: 512GB"
## },
## {
## "Category": "Home Appliances",
## "Item_Name": "Refrigerator",
## "Item_ID": "201",
## "Brand": "HomeCool",
## "Price": "899.99",
## "Variation_ID": "201-A",
## "Variation_Details": "Color: Stainless Steel, Capacity: 20 cu ft"
## },
## {
## "Category": "Home Appliances",
## "Item_Name": "Refrigerator",
## "Item_ID": "201",
## "Brand": "HomeCool",
## "Price": "899.99",
## "Variation_ID": "201-B",
## "Variation_Details": "Color: White, Capacity: 18 cu ft"
## },
## {
## "Category": "Home Appliances",
## "Item_Name": "Washing Machine",
## "Item_ID": "202",
## "Brand": "CleanTech",
## "Price": "499.99",
## "Variation_ID": "202-A",
## "Variation_Details": "Type: Front Load, Capacity: 4.5 cu ft"
## },
## {
## "Category": "Home Appliances",
## "Item_Name": "Washing Machine",
## "Item_ID": "202",
## "Brand": "CleanTech",
## "Price": "499.99",
## "Variation_ID": "202-B",
## "Variation_Details": "Type: Top Load, Capacity: 5.0 cu ft"
## },
## {
## "Category": "Clothing",
## "Item_Name": "T-Shirt",
## "Item_ID": "301",
## "Brand": "FashionCo",
## "Price": "19.99",
## "Variation_ID": "301-A",
## "Variation_Details": "Color: Blue, Size: S"
## },
## {
## "Category": "Clothing",
## "Item_Name": "T-Shirt",
## "Item_ID": "301",
## "Brand": "FashionCo",
## "Price": "19.99",
## "Variation_ID": "301-B",
## "Variation_Details": "Color: Red, Size: M"
## },
## {
## "Category": "Clothing",
## "Item_Name": "T-Shirt",
## "Item_ID": "301",
## "Brand": "FashionCo",
## "Price": "19.99",
## "Variation_ID": "301-C",
## "Variation_Details": "Color: Green, Size: L"
## },
## {
## "Category": "Clothing",
## "Item_Name": "Jeans",
## "Item_ID": "302",
## "Brand": "DenimWorks",
## "Price": "49.99",
## "Variation_ID": "302-A",
## "Variation_Details": "Color: Dark Blue, Size: 32"
## },
## {
## "Category": "Clothing",
## "Item_Name": "Jeans",
## "Item_ID": "302",
## "Brand": "DenimWorks",
## "Price": "49.99",
## "Variation_ID": "302-B",
## "Variation_Details": "Color: Light Blue, Size: 34"
## },
## {
## "Category": "Books",
## "Item_Name": "Fiction Novel",
## "Item_ID": "401",
## "Brand": "-",
## "Price": "14.99",
## "Variation_ID": "401-A",
## "Variation_Details": "Format: Hardcover, Language: English"
## },
## {
## "Category": "Books",
## "Item_Name": "Fiction Novel",
## "Item_ID": "401",
## "Brand": "-",
## "Price": "14.99",
## "Variation_ID": "401-B",
## "Variation_Details": "Format: Paperback, Language: Spanish"
## },
## {
## "Category": "Books",
## "Item_Name": "Non-Fiction Guide",
## "Item_ID": "402",
## "Brand": "-",
## "Price": "24.99",
## "Variation_ID": "402-A",
## "Variation_Details": "Format: eBook, Language: English"
## },
## {
## "Category": "Books",
## "Item_Name": "Non-Fiction Guide",
## "Item_ID": "402",
## "Brand": "-",
## "Price": "24.99",
## "Variation_ID": "402-B",
## "Variation_Details": "Format: Paperback, Language: French"
## },
## {
## "Category": "Sports Equipment",
## "Item_Name": "Basketball",
## "Item_ID": "501",
## "Brand": "SportsGear",
## "Price": "29.99",
## "Variation_ID": "501-A",
## "Variation_Details": "Size: Size 7, Color: Orange"
## },
## {
## "Category": "Sports Equipment",
## "Item_Name": "Tennis Racket",
## "Item_ID": "502",
## "Brand": "RacketPro",
## "Price": "89.99",
## "Variation_ID": "502-A",
## "Variation_Details": "Material: Graphite, Color: Black"
## },
## {
## "Category": "Sports Equipment",
## "Item_Name": "Tennis Racket",
## "Item_ID": "502",
## "Brand": "RacketPro",
## "Price": "89.99",
## "Variation_ID": "502-B",
## "Variation_Details": "Material: Aluminum, Color: Silver"
## }
## ]
Pros:
- JSON is easy to read and write due to its simple syntax.
- JSON is less verbose than XML, making it more efficient in terms of
data size.
- JSON is supported by most modern programming languages.
Cons:
- JSON supports only a limited set of data types: strings, numbers,
objects, arrays, booleans, and null.
- JSON does not support comments.
- JSON does not have a built-in schema definition.
#HTML
html <- df
html %>%
tableHTML()
1 |
Electronics |
Smartphone |
101 |
TechBrand |
699.99 |
101-A |
Color: Black, Storage: 64GB |
2 |
Electronics |
Smartphone |
101 |
TechBrand |
699.99 |
101-B |
Color: White, Storage: 128GB |
3 |
Electronics |
Laptop |
102 |
CompuBrand |
1099.99 |
102-A |
Color: Silver, Storage: 256GB |
4 |
Electronics |
Laptop |
102 |
CompuBrand |
1099.99 |
102-B |
Color: Space Gray, Storage: 512GB |
5 |
Home Appliances |
Refrigerator |
201 |
HomeCool |
899.99 |
201-A |
Color: Stainless Steel, Capacity: 20 cu ft |
6 |
Home Appliances |
Refrigerator |
201 |
HomeCool |
899.99 |
201-B |
Color: White, Capacity: 18 cu ft |
7 |
Home Appliances |
Washing Machine |
202 |
CleanTech |
499.99 |
202-A |
Type: Front Load, Capacity: 4.5 cu ft |
8 |
Home Appliances |
Washing Machine |
202 |
CleanTech |
499.99 |
202-B |
Type: Top Load, Capacity: 5.0 cu ft |
9 |
Clothing |
T-Shirt |
301 |
FashionCo |
19.99 |
301-A |
Color: Blue, Size: S |
10 |
Clothing |
T-Shirt |
301 |
FashionCo |
19.99 |
301-B |
Color: Red, Size: M |
11 |
Clothing |
T-Shirt |
301 |
FashionCo |
19.99 |
301-C |
Color: Green, Size: L |
12 |
Clothing |
Jeans |
302 |
DenimWorks |
49.99 |
302-A |
Color: Dark Blue, Size: 32 |
13 |
Clothing |
Jeans |
302 |
DenimWorks |
49.99 |
302-B |
Color: Light Blue, Size: 34 |
14 |
Books |
Fiction Novel |
401 |
- |
14.99 |
401-A |
Format: Hardcover, Language: English |
15 |
Books |
Fiction Novel |
401 |
- |
14.99 |
401-B |
Format: Paperback, Language: Spanish |
16 |
Books |
Non-Fiction Guide |
402 |
- |
24.99 |
402-A |
Format: eBook, Language: English |
17 |
Books |
Non-Fiction Guide |
402 |
- |
24.99 |
402-B |
Format: Paperback, Language: French |
18 |
Sports Equipment |
Basketball |
501 |
SportsGear |
29.99 |
501-A |
Size: Size 7, Color: Orange |
19 |
Sports Equipment |
Tennis Racket |
502 |
RacketPro |
89.99 |
502-A |
Material: Graphite, Color: Black |
20 |
Sports Equipment |
Tennis Racket |
502 |
RacketPro |
89.99 |
502-B |
Material: Aluminum, Color: Silver |
Pros:
- HTML is easy to learn and use, making it accessible for
beginners.
- HTML allows for integration with CSS and JavaScript for enhanced
styling and functionality.
- HTML is Supported by all web browsers, ensuring compatibility.
Cons:
- Limited to structuring content; does not define how it looks
(requires CSS).
- Can become cluttered with excessive use of tags, making maintenance
difficult.
- Not suitable for complex applications without additional
technologies (e.g., JavaScript).
#XML
xml <- df
df_to_xml(
xml,
fields = "tags",
record.tag = "record",
field.names = NULL,
only.fields = NULL,
exclude.fields = NULL,
root.node = "root",
xml.file = NULL,
non.exist = NULL,
encoding = "UTF-8",
no.return = FALSE
)
## {xml_document}
## <root encoding="UTF-8">
## [1] <record>\n <Category>Electronics</Category>\n <Item_Name>Smartphone</I ...
## [2] <record>\n <Category>Electronics</Category>\n <Item_Name>Smartphone</I ...
## [3] <record>\n <Category>Electronics</Category>\n <Item_Name>Laptop</Item_ ...
## [4] <record>\n <Category>Electronics</Category>\n <Item_Name>Laptop</Item_ ...
## [5] <record>\n <Category>Home Appliances</Category>\n <Item_Name>Refrigera ...
## [6] <record>\n <Category>Home Appliances</Category>\n <Item_Name>Refrigera ...
## [7] <record>\n <Category>Home Appliances</Category>\n <Item_Name>Washing M ...
## [8] <record>\n <Category>Home Appliances</Category>\n <Item_Name>Washing M ...
## [9] <record>\n <Category>Clothing</Category>\n <Item_Name>T-Shirt</Item_Na ...
## [10] <record>\n <Category>Clothing</Category>\n <Item_Name>T-Shirt</Item_Na ...
## [11] <record>\n <Category>Clothing</Category>\n <Item_Name>T-Shirt</Item_Na ...
## [12] <record>\n <Category>Clothing</Category>\n <Item_Name>Jeans</Item_Name ...
## [13] <record>\n <Category>Clothing</Category>\n <Item_Name>Jeans</Item_Name ...
## [14] <record>\n <Category>Books</Category>\n <Item_Name>Fiction Novel</Item ...
## [15] <record>\n <Category>Books</Category>\n <Item_Name>Fiction Novel</Item ...
## [16] <record>\n <Category>Books</Category>\n <Item_Name>Non-Fiction Guide</ ...
## [17] <record>\n <Category>Books</Category>\n <Item_Name>Non-Fiction Guide</ ...
## [18] <record>\n <Category>Sports Equipment</Category>\n <Item_Name>Basketba ...
## [19] <record>\n <Category>Sports Equipment</Category>\n <Item_Name>Tennis R ...
## [20] <record>\n <Category>Sports Equipment</Category>\n <Item_Name>Tennis R ...
Pros:
- XML supports a wide range of data types and can represent complex
data hierarchies.
- XML is self-descriptive, making it easier to understand the data
structure.
- Well-formed documents can be validated against schemas (e.g., DTD,
XSD) for consistency.
Cons:
- More verbose than JSON, leading to larger file sizes.
- Parsing can be more complex and slower compared to simpler
formats.
- Requires strict adherence to syntax rules, which can lead to errors
if not followed.
#Parquet
parquet <- df
write_parquet(parquet, "inventory.parquet")
read_parquet("inventory.parquet")
## Category Item_Name Item_ID Brand Price Variation_ID
## 1 Electronics Smartphone 101 TechBrand 699.99 101-A
## 2 Electronics Smartphone 101 TechBrand 699.99 101-B
## 3 Electronics Laptop 102 CompuBrand 1099.99 102-A
## 4 Electronics Laptop 102 CompuBrand 1099.99 102-B
## 5 Home Appliances Refrigerator 201 HomeCool 899.99 201-A
## 6 Home Appliances Refrigerator 201 HomeCool 899.99 201-B
## 7 Home Appliances Washing Machine 202 CleanTech 499.99 202-A
## 8 Home Appliances Washing Machine 202 CleanTech 499.99 202-B
## 9 Clothing T-Shirt 301 FashionCo 19.99 301-A
## 10 Clothing T-Shirt 301 FashionCo 19.99 301-B
## 11 Clothing T-Shirt 301 FashionCo 19.99 301-C
## 12 Clothing Jeans 302 DenimWorks 49.99 302-A
## 13 Clothing Jeans 302 DenimWorks 49.99 302-B
## 14 Books Fiction Novel 401 - 14.99 401-A
## 15 Books Fiction Novel 401 - 14.99 401-B
## 16 Books Non-Fiction Guide 402 - 24.99 402-A
## 17 Books Non-Fiction Guide 402 - 24.99 402-B
## 18 Sports Equipment Basketball 501 SportsGear 29.99 501-A
## 19 Sports Equipment Tennis Racket 502 RacketPro 89.99 502-A
## 20 Sports Equipment Tennis Racket 502 RacketPro 89.99 502-B
## Variation_Details
## 1 Color: Black, Storage: 64GB
## 2 Color: White, Storage: 128GB
## 3 Color: Silver, Storage: 256GB
## 4 Color: Space Gray, Storage: 512GB
## 5 Color: Stainless Steel, Capacity: 20 cu ft
## 6 Color: White, Capacity: 18 cu ft
## 7 Type: Front Load, Capacity: 4.5 cu ft
## 8 Type: Top Load, Capacity: 5.0 cu ft
## 9 Color: Blue, Size: S
## 10 Color: Red, Size: M
## 11 Color: Green, Size: L
## 12 Color: Dark Blue, Size: 32
## 13 Color: Light Blue, Size: 34
## 14 Format: Hardcover, Language: English
## 15 Format: Paperback, Language: Spanish
## 16 Format: eBook, Language: English
## 17 Format: Paperback, Language: French
## 18 Size: Size 7, Color: Orange
## 19 Material: Graphite, Color: Black
## 20 Material: Aluminum, Color: Silver
Pros:
- Columnar format reduces storage space by compressing similar data
types together, leading to better compression ratios.
- Enables faster read times for analytical queries, as only the
necessary columns are read from disk.
- Designed to work well with data processing frameworks, making it
ideal for large datasets.
Cons:
- More complex than row-based formats, which may require additional
learning for new users.
- Writing data can be slower compared to row-oriented formats,
especially for small, frequent writes.
- Less suitable for use cases that require frequent updates or
transactions, as it is optimized for read-heavy operations.