# Load necessary libraries
library(jsonlite)
library(XML)
library(arrow)
## Warning: package 'arrow' was built under R version 4.4.3
##
## Attaching package: 'arrow'
## The following object is masked from 'package:utils':
##
## timestamp
# Create the dataset as a data frame
inventory_data <- data.frame(
Category = c("Electronics", "Electronics", "Electronics", "Electronics",
"Home Appliances", "Home Appliances", "Home Appliances", "Home Appliances",
"Clothing", "Clothing", "Clothing", "Clothing", "Clothing",
"Books", "Books", "Books", "Books",
"Sports Equipment", "Sports Equipment", "Sports Equipment"),
ItemName = c("Smartphone", "Smartphone", "Laptop", "Laptop",
"Refrigerator", "Refrigerator", "Washing Machine", "Washing Machine",
"T-Shirt", "T-Shirt", "T-Shirt", "Jeans", "Jeans",
"Fiction Novel", "Fiction Novel", "Non-Fiction Guide", "Non-Fiction Guide",
"Basketball", "Tennis Racket", "Tennis Racket"),
ItemID = c(101, 101, 102, 102, 201, 201, 202, 202, 301, 301, 301, 302, 302,
401, 401, 402, 402, 501, 502, 502),
Brand = c("TechBrand", "TechBrand", "CompuBrand", "CompuBrand",
"HomeCool", "HomeCool", "CleanTech", "CleanTech",
"FashionCo", "FashionCo", "FashionCo",
"DenimWorks", "DenimWorks",
NA, NA,
NA, NA,
"SportsGear", "RacketPro", "RacketPro"),
Price = c(699.99, 699.99, 1099.99, 1099.99,
899.99, 899.99, 499.99, 499.99,
19.99, 19.99, 19.99,
49.99, 49.99,
14.99, 14.99,
24.99, 24.99,
29.99, 89.99, 89.99),
VariationID = c("101-A", "101-B", "102-A", "102-B",
"201-A", "201-B",
"202-A", "202-B",
"301-A", "301-B", "301-C",
"302-A", "302-B",
"401-A", "401-B",
"402-A", "402-B",
NA, NA, NA),
VariationDetails = c(
# Electronics
'Color: Black; Storage: 64GB', 'Color: White; Storage: 128GB',
'Color: Silver; Storage: 256GB', 'Color: Space Gray; Storage: 512GB',
# Home Appliances
'Color: Stainless Steel; Capacity: 20 cu ft', 'Color: White; Capacity: 18 cu ft',
'Type: Front Load; Capacity: 4.5 cu ft', 'Type: Top Load; Capacity: 5.0 cu ft',
# Clothing
'Color: Blue; Size: S', 'Color: Red; Size: M', 'Color: Green; Size: L',
'Color: Dark Blue; Size: 32', 'Color: Light Blue; Size: 34',
# Books
'Format: Hardcover; Language: English', 'Format: Paperback; Language: Spanish',
'Format: eBook; Language: English', 'Format: Paperback; Language: French',
# Sports Equipment
'Size: Size7; Color: Orange', 'Material: Graphite; Color: Black',
'Material: Aluminum; Color: Silver'
))
# Export to JSON
write_json(inventory_data, path = "./inventory.json")
# Explanation:
# The `write_json()` function from the `jsonlite` package is used to write the data frame to a JSON file.
# The resulting file will be saved as `inventory.json` in the working directory.
# Import JSON back into R for verification
# Explanation:
# The `fromJSON()` function reads the JSON file back into R as a data frame for verification.
json_data <- fromJSON("./inventory.json")
print(json_data)
## Category ItemName ItemID Brand Price VariationID
## 1 Electronics Smartphone 101 TechBrand 699.99 101-A
## 2 Electronics Smartphone 101 TechBrand 699.99 101-B
## 3 Electronics Laptop 102 CompuBrand 1099.99 102-A
## 4 Electronics Laptop 102 CompuBrand 1099.99 102-B
## 5 Home Appliances Refrigerator 201 HomeCool 899.99 201-A
## 6 Home Appliances Refrigerator 201 HomeCool 899.99 201-B
## 7 Home Appliances Washing Machine 202 CleanTech 499.99 202-A
## 8 Home Appliances Washing Machine 202 CleanTech 499.99 202-B
## 9 Clothing T-Shirt 301 FashionCo 19.99 301-A
## 10 Clothing T-Shirt 301 FashionCo 19.99 301-B
## 11 Clothing T-Shirt 301 FashionCo 19.99 301-C
## 12 Clothing Jeans 302 DenimWorks 49.99 302-A
## 13 Clothing Jeans 302 DenimWorks 49.99 302-B
## 14 Books Fiction Novel 401 <NA> 14.99 401-A
## 15 Books Fiction Novel 401 <NA> 14.99 401-B
## 16 Books Non-Fiction Guide 402 <NA> 24.99 402-A
## 17 Books Non-Fiction Guide 402 <NA> 24.99 402-B
## 18 Sports Equipment Basketball 501 SportsGear 29.99 <NA>
## 19 Sports Equipment Tennis Racket 502 RacketPro 89.99 <NA>
## 20 Sports Equipment Tennis Racket 502 RacketPro 89.99 <NA>
## VariationDetails
## 1 Color: Black; Storage: 64GB
## 2 Color: White; Storage: 128GB
## 3 Color: Silver; Storage: 256GB
## 4 Color: Space Gray; Storage: 512GB
## 5 Color: Stainless Steel; Capacity: 20 cu ft
## 6 Color: White; Capacity: 18 cu ft
## 7 Type: Front Load; Capacity: 4.5 cu ft
## 8 Type: Top Load; Capacity: 5.0 cu ft
## 9 Color: Blue; Size: S
## 10 Color: Red; Size: M
## 11 Color: Green; Size: L
## 12 Color: Dark Blue; Size: 32
## 13 Color: Light Blue; Size: 34
## 14 Format: Hardcover; Language: English
## 15 Format: Paperback; Language: Spanish
## 16 Format: eBook; Language: English
## 17 Format: Paperback; Language: French
## 18 Size: Size7; Color: Orange
## 19 Material: Graphite; Color: Black
## 20 Material: Aluminum; Color: Silver
# Export to HTML
html_file <- "./inventory.html"
html_table <- paste0("<table border='1'>\n<tr>",
paste0("<th>", colnames(inventory_data), "</th>", collapse = ""),
"</tr>\n")
for (i in seq_len(nrow(inventory_data))) {
html_table <- paste0(html_table,
"<tr>",
paste0("<td>", inventory_data[i, ], "</td>", collapse = ""),
"</tr>\n")
}
html_table <- paste0(html_table, "</table>")
write(html_table, file = html_file)
# Explanation:
# This code manually creates an HTML table by looping through each row of the data frame.
# It writes the table structure to an HTML file (`inventory.html`) using basic HTML tags.
# Import HTML back into R for verification (optional)
html_data <- readHTMLTable(html_file)
print(html_data)
## $`NULL`
## Category ItemName ItemID Brand Price VariationID
## 1 Electronics Smartphone 101 TechBrand 699.99 101-A
## 2 Electronics Smartphone 101 TechBrand 699.99 101-B
## 3 Electronics Laptop 102 CompuBrand 1099.99 102-A
## 4 Electronics Laptop 102 CompuBrand 1099.99 102-B
## 5 Home Appliances Refrigerator 201 HomeCool 899.99 201-A
## 6 Home Appliances Refrigerator 201 HomeCool 899.99 201-B
## 7 Home Appliances Washing Machine 202 CleanTech 499.99 202-A
## 8 Home Appliances Washing Machine 202 CleanTech 499.99 202-B
## 9 Clothing T-Shirt 301 FashionCo 19.99 301-A
## 10 Clothing T-Shirt 301 FashionCo 19.99 301-B
## 11 Clothing T-Shirt 301 FashionCo 19.99 301-C
## 12 Clothing Jeans 302 DenimWorks 49.99 302-A
## 13 Clothing Jeans 302 DenimWorks 49.99 302-B
## 14 Books Fiction Novel 401 NA 14.99 401-A
## 15 Books Fiction Novel 401 NA 14.99 401-B
## 16 Books Non-Fiction Guide 402 NA 24.99 402-A
## 17 Books Non-Fiction Guide 402 NA 24.99 402-B
## 18 Sports Equipment Basketball 501 SportsGear 29.99 NA
## 19 Sports Equipment Tennis Racket 502 RacketPro 89.99 NA
## 20 Sports Equipment Tennis Racket 502 RacketPro 89.99 NA
## VariationDetails
## 1 Color: Black; Storage: 64GB
## 2 Color: White; Storage: 128GB
## 3 Color: Silver; Storage: 256GB
## 4 Color: Space Gray; Storage: 512GB
## 5 Color: Stainless Steel; Capacity: 20 cu ft
## 6 Color: White; Capacity: 18 cu ft
## 7 Type: Front Load; Capacity: 4.5 cu ft
## 8 Type: Top Load; Capacity: 5.0 cu ft
## 9 Color: Blue; Size: S
## 10 Color: Red; Size: M
## 11 Color: Green; Size: L
## 12 Color: Dark Blue; Size: 32
## 13 Color: Light Blue; Size: 34
## 14 Format: Hardcover; Language: English
## 15 Format: Paperback; Language: Spanish
## 16 Format: eBook; Language: English
## 17 Format: Paperback; Language: French
## 18 Size: Size7; Color: Orange
## 19 Material: Graphite; Color: Black
## 20 Material: Aluminum; Color: Silver
# Explanation:
# The `readHTMLTable()` function reads the HTML table back into R as a list or data frame.
# Export to XML
xml_file <- "./inventory.xml"
doc <- newXMLDoc()
root <- newXMLNode("InventoryDataSet")
for (i in seq_len(nrow(inventory_data))) {
item_node <- newXMLNode("ItemDetails")
for (j in seq_along(inventory_data)) {
newXMLNode(names(inventory_data)[j], inventory_data[i,j], parent = item_node)
}
addChildren(root, item_node)
}
saveXML(root, file = xml_file)
## [1] "./inventory.xml"
# Explanation:
# This code creates an XML document and adds each row of the data frame as an `<ItemDetails>` node.
# Each column becomes a child node within `<ItemDetails>`. The resulting XML is saved as `inventory.xml`.
# Import XML back into R for verification
xml_data <- xmlParse(xml_file)
xml_df <- xmlToDataFrame(xml_data)
print(xml_df)
## Category ItemName ItemID Brand Price VariationID
## 1 Electronics Smartphone 101 TechBrand 699.99 101-A
## 2 Electronics Smartphone 101 TechBrand 699.99 101-B
## 3 Electronics Laptop 102 CompuBrand 1099.99 102-A
## 4 Electronics Laptop 102 CompuBrand 1099.99 102-B
## 5 Home Appliances Refrigerator 201 HomeCool 899.99 201-A
## 6 Home Appliances Refrigerator 201 HomeCool 899.99 201-B
## 7 Home Appliances Washing Machine 202 CleanTech 499.99 202-A
## 8 Home Appliances Washing Machine 202 CleanTech 499.99 202-B
## 9 Clothing T-Shirt 301 FashionCo 19.99 301-A
## 10 Clothing T-Shirt 301 FashionCo 19.99 301-B
## 11 Clothing T-Shirt 301 FashionCo 19.99 301-C
## 12 Clothing Jeans 302 DenimWorks 49.99 302-A
## 13 Clothing Jeans 302 DenimWorks 49.99 302-B
## 14 Books Fiction Novel 401 NA 14.99 401-A
## 15 Books Fiction Novel 401 NA 14.99 401-B
## 16 Books Non-Fiction Guide 402 NA 24.99 402-A
## 17 Books Non-Fiction Guide 402 NA 24.99 402-B
## 18 Sports Equipment Basketball 501 SportsGear 29.99 NA
## 19 Sports Equipment Tennis Racket 502 RacketPro 89.99 NA
## 20 Sports Equipment Tennis Racket 502 RacketPro 89.99 NA
## VariationDetails
## 1 Color: Black; Storage: 64GB
## 2 Color: White; Storage: 128GB
## 3 Color: Silver; Storage: 256GB
## 4 Color: Space Gray; Storage: 512GB
## 5 Color: Stainless Steel; Capacity: 20 cu ft
## 6 Color: White; Capacity: 18 cu ft
## 7 Type: Front Load; Capacity: 4.5 cu ft
## 8 Type: Top Load; Capacity: 5.0 cu ft
## 9 Color: Blue; Size: S
## 10 Color: Red; Size: M
## 11 Color: Green; Size: L
## 12 Color: Dark Blue; Size: 32
## 13 Color: Light Blue; Size: 34
## 14 Format: Hardcover; Language: English
## 15 Format: Paperback; Language: Spanish
## 16 Format: eBook; Language: English
## 17 Format: Paperback; Language: French
## 18 Size: Size7; Color: Orange
## 19 Material: Graphite; Color: Black
## 20 Material: Aluminum; Color: Silver
# Explanation:
# The `xmlParse()` function parses the XML file into an R object.
# The `xmlToDataFrame()` function converts it back into a data frame for verification.
# Export to Parquet
write_parquet(inventory_data, sink = "./inventory.parquet")
# Explanation:
# The `write_parquet()` function from the `arrow` package writes the data frame to a Parquet file.
# Parquet is a binary columnar storage format optimized for large-scale analytics.
# Import Parquet back into R for verification
parquet_data <- read_parquet("./inventory.parquet")
print(parquet_data)
## Category ItemName ItemID Brand Price VariationID
## 1 Electronics Smartphone 101 TechBrand 699.99 101-A
## 2 Electronics Smartphone 101 TechBrand 699.99 101-B
## 3 Electronics Laptop 102 CompuBrand 1099.99 102-A
## 4 Electronics Laptop 102 CompuBrand 1099.99 102-B
## 5 Home Appliances Refrigerator 201 HomeCool 899.99 201-A
## 6 Home Appliances Refrigerator 201 HomeCool 899.99 201-B
## 7 Home Appliances Washing Machine 202 CleanTech 499.99 202-A
## 8 Home Appliances Washing Machine 202 CleanTech 499.99 202-B
## 9 Clothing T-Shirt 301 FashionCo 19.99 301-A
## 10 Clothing T-Shirt 301 FashionCo 19.99 301-B
## 11 Clothing T-Shirt 301 FashionCo 19.99 301-C
## 12 Clothing Jeans 302 DenimWorks 49.99 302-A
## 13 Clothing Jeans 302 DenimWorks 49.99 302-B
## 14 Books Fiction Novel 401 <NA> 14.99 401-A
## 15 Books Fiction Novel 401 <NA> 14.99 401-B
## 16 Books Non-Fiction Guide 402 <NA> 24.99 402-A
## 17 Books Non-Fiction Guide 402 <NA> 24.99 402-B
## 18 Sports Equipment Basketball 501 SportsGear 29.99 <NA>
## 19 Sports Equipment Tennis Racket 502 RacketPro 89.99 <NA>
## 20 Sports Equipment Tennis Racket 502 RacketPro 89.99 <NA>
## VariationDetails
## 1 Color: Black; Storage: 64GB
## 2 Color: White; Storage: 128GB
## 3 Color: Silver; Storage: 256GB
## 4 Color: Space Gray; Storage: 512GB
## 5 Color: Stainless Steel; Capacity: 20 cu ft
## 6 Color: White; Capacity: 18 cu ft
## 7 Type: Front Load; Capacity: 4.5 cu ft
## 8 Type: Top Load; Capacity: 5.0 cu ft
## 9 Color: Blue; Size: S
## 10 Color: Red; Size: M
## 11 Color: Green; Size: L
## 12 Color: Dark Blue; Size: 32
## 13 Color: Light Blue; Size: 34
## 14 Format: Hardcover; Language: English
## 15 Format: Paperback; Language: Spanish
## 16 Format: eBook; Language: English
## 17 Format: Paperback; Language: French
## 18 Size: Size7; Color: Orange
## 19 Material: Graphite; Color: Black
## 20 Material: Aluminum; Color: Silver
#Pros and Cons of Each Format
# JSON is a widely used format that is easily readable, and ideal for nested data structures. It is used a lot in APIs and web applications due to its simplicity and compatibility across platforms. However, JSON can become tough to handle for large datasets and lacks the efficiency of binary formats like Parquet when handling large-scale datasets.
#HTML is primarily designed for visual representation of tabular data in web browsers. It is easy to view and interpret but is not optimized for data analysis or storage purposes.
#XML is a highly structured format that supports complex hierarchies.It is suitable for applications requiring detailed data organization. XML tends to be verbose and inefficient for large datasets compared to JSON.
#Parquet is a columnar storage format that is really good in handling large-scale analytics. It is highly efficient due to its compression capabilities and compact binary structure, making it ideal for big data applications. However, Parquet requires specialized libraries or tools for reading and writing, which can limit accessibility
#In summary, JSON is best suited for lightweight data exchange, HTML for visualization purposes, XML for structured hierarchical data, and Parquet for efficient storage and analytics of large datasets. Choosing the appropriate format depends on the specific requirements of the task at hand.