How to Load an XML Set into the Database
Depending on the database where you want to load the parsing results, configure settings for SQL databases or NoSQL databases.
The test database is located at the following path: data-samples\basic-sample-2\database.db. The table that stores links to the original XML files is called xml_files and has the following schema (you can read about the minimal required schema for working with the database in this article):
CREATE TABLE xml_files (
id INTEGER PRIMARY KEY AUTOINCREMENT,
xml_path TEXT,
parsing_status TEXT,
filling_percent_stat NUMERIC,
processed_date TEXT,
CONSTRAINT unique_xml_path UNIQUE (xml_path)
);
General view of the populated table:
id | xml_path | filling_percent_stat | parsing_status | processed_date |
---|---|---|---|---|
1 | basic-sample-2/example_1.xml | NULL | NULL | NULL |
2 | basic-sample-2/example_2.xml | NULL | NULL | NULL |
3 | basic-sample-2/example_3.xml | NULL | NULL | NULL |
4 | basic-sample-2/example_4.xml | NULL | NULL | NULL |
5 | basic-sample-2/example_5.xml | NULL | NULL | NULL |
The xml_files column stores the relative path to the processing files.
Test XMLs for processing:
example_1.xml
<?xml version="1.0"?>
<customers>
<customer>
<name>Tesla</name>
<keyPerson>Elon Musk</keyPerson>
<UsaResident>Yes</UsaResident>
<foundedYear>2003</foundedYear>
<department>
<name>Tesla Engineering</name>
<address>1501 Page Mill Road Palo Alto, Framingham, USA</address>
</department>
<department>
<name>Gigafactory Texas</name>
<address>1 Tesla Road Austin, TX 78725, USA</address>
</department>
</customer>
<customer>
<name>Boston Dynamics</name>
<keyPerson>Robert Playter</keyPerson>
<UsaResident>Yes</UsaResident>
<foundedYear>1992</foundedYear>
<department>
<name>Headquarters</name>
<address>200 Smith St, Waltham, USA</address>
</department>
</customer>
<customer>
<name>Bank of Korea</name>
<keyPerson>Kim Seong Tae</keyPerson>
<UsaResident>No</UsaResident>
<foundedYear>1950</foundedYear>
<department>
<name>Main Department</name>
<address>67, Sejong-daero, Jung-gu, Seoul, South Korea</address>
</department>
</customer>
</customers>
example_2.xml
<?xml version="1.0"?>
<customers>
<customer>
<name>Apple Inc.</name>
<keyPerson>Tim Cook</keyPerson>
<UsaResident>Yes</UsaResident>
<foundedYear>1976</foundedYear>
<department>
<name>Software Engineering</name>
<address>1 Apple Park Way, Cupertino, CA, USA</address>
</department>
</customer>
<customer>
<name>Microsoft Corporation</name>
<keyPerson>Satya Nadella</keyPerson>
<UsaResident>Yes</UsaResident>
<foundedYear>1975</foundedYear>
</customer>
<customer>
<name>Amazon.com Inc.</name>
<keyPerson>Andy Jassy</keyPerson>
<UsaResident>Yes</UsaResident>
<foundedYear>1994</foundedYear>
<department>
<name>Amazon Web Services</name>
<address>410 Terry Ave N, Seattle, WA, USA</address>
</department>
</customer>
<customer>
<name>Samsung Group</name>
<keyPerson>Lee Kun-hee</keyPerson>
<UsaResident>No</UsaResident>
<foundedYear>1938</foundedYear>
<department>
<name>Samsung Electronics</name>
<address>129 Samsung-ro, Su-dong, Yeongtong-gu, Suwon-si, Gyeonggi-do, South Korea</address>
</department>
<department>
<name>Samsung Heavy Industries</name>
<address>508, Maegok-dong, Changwon, South Korea</address>
</department>
</customer>
</customers>
example_3.xml
<?xml version="1.0"?>
<customers>
<customer>
<name>Volkswagen Group</name>
<keyPerson>Herbert Diess</keyPerson>
<UsaResident>No</UsaResident>
<foundedYear>1937</foundedYear>
<department>
<name>Volkswagen Passenger Cars</name>
<address>Berliner Ring 2, 38440 Wolfsburg, Germany</address>
</department>
<department>
<name>Audi</name>
<address>Auto-Union-Str. 1, 85045 Ingolstadt, Germany</address>
</department>
</customer>
<customer>
<name>Unilever</name>
<keyPerson>Alan Jope</keyPerson>
<UsaResident>No</UsaResident>
<foundedYear>1930</foundedYear>
<department>
<name>Unilever Food Solutions</name>
<address>Rotterdam, Netherlands</address>
</department>
</customer>
<customer>
<name>Nestle</name>
<keyPerson>Ulf Mark Schneider</keyPerson>
<UsaResident>No</UsaResident>
<foundedYear>1866</foundedYear>
<department>
<name>Nestle Waters</name>
<address>50 Rue Carnot, 92240 Malakoff, France</address>
</department>
<department>
<name>Nestle Purina PetCare</name>
<address>50 Avenue de La Couronne, 1050 Ixelles, Belgium</address>
</department>
</customer>
<customer>
<name>Siemens</name>
<keyPerson>Roland Busch</keyPerson>
<UsaResident>No</UsaResident>
<foundedYear>1847</foundedYear>
<department>
<name>Siemens Energy</name>
<address>Werner-von-Siemens-Str. 1, 80333 Munich, Germany</address>
</department>
</customer>
<customer>
<name>Philips</name>
<keyPerson>Frans van Houten</keyPerson>
<UsaResident>No</UsaResident>
<foundedYear>1891</foundedYear>
<department>
<name>Healthcare</name>
<address>Amstelplein 2, 1096 BC Amsterdam, Netherlands</address>
</department>
</customer>
</customers>
example_4.xml
<?xml version="1.0"?>
<customers>
<customer>
<name>Netflix</name>
<keyPerson>Reed Hastings</keyPerson>
<UsaResident>Yes</UsaResident>
<foundedYear>1997</foundedYear>
<department>
<name>Content Production</name>
<address>123 Entertainment Ave, Los Gatos, CA 95032, USA</address>
</department>
</customer>
<customer>
<name>SpaceX</name>
<keyPerson>Elon Musk</keyPerson>
<UsaResident>Yes</UsaResident>
<foundedYear>2002</foundedYear>
<department>
<name>Space Exploration</name>
<address>1 Rocket Road Hawthorne, CA 90250, USA</address>
</department>
<department>
<name>Starlink</name>
<address>123 SpaceX Blvd, Hawthorne, CA 90250, USA</address>
</department>
</customer>
<customer>
<name>Amazon Web Services</name>
<keyPerson>Andy Jassy</keyPerson>
<UsaResident>Yes</UsaResident>
<foundedYear>2006</foundedYear>
<department>
<name>Cloud Services</name>
</department>
<department>
<name>AI and Machine Learning</name>
</department>
</customer>
<customer>
<name>Toyota</name>
<keyPerson>Akio Toyoda</keyPerson>
<UsaResident>No</UsaResident>
<foundedYear>1937</foundedYear>
<department>
<name>Research and Development</name>
<address>Toyota-cho, Toyota City, Aichi Prefecture 471-8571, Japan</address>
</department>
</customer>
<customer>
<name>Adobe Systems</name>
<keyPerson>Shantanu Narayen</keyPerson>
<UsaResident>Yes</UsaResident>
<foundedYear>1982</foundedYear>
<department>
<name>Software Development</name>
<address>345 Park Ave, San Jose, CA 95110, USA</address>
</department>
<department>
<name>Marketing</name>
<address>234 Adobe Way, San Jose, CA 95110, USA</address>
</department>
</customer>
</customers>
example_5.xml
<?xml version="1.0"?>
<customers>
<customer>
<name>Ford Motor Company</name>
<keyPerson>Jim Farley</keyPerson>
<UsaResident>Yes</UsaResident>
<foundedYear>1903</foundedYear>
<department>
<name>Automotive Design</name>
<address>Dearborn, Michigan, USA</address>
</department>
</customer>
<customer>
<name>Alphabet Inc.</name>
<keyPerson>Sundar Pichai</keyPerson>
<UsaResident>Yes</UsaResident>
<foundedYear>2015</foundedYear>
<department>
<name>Google Search</name>
<address>1600 Amphitheatre Parkway, Mountain View, CA, USA</address>
</department>
</customer>
<customer>
<name>General Electric</name>
<keyPerson>H. Lawrence Culp Jr.</keyPerson>
<UsaResident>Yes</UsaResident>
<foundedYear>1892</foundedYear>
<department>
<name>Power</name>
<address>41 Farnsworth St, Boston, MA 02210, USA</address>
</department>
</customer>
<customer>
<name>Sony Corporation</name>
<keyPerson>Kenichiro Yoshida</keyPerson>
<foundedYear>1946</foundedYear>
<department>
</customers>
Open the project created in the previous step - basic-sample
.
Make sure the setting is enabled: Settings⮞Write to DB columns filling stat
Go to the Database tab
If multi-processor processing is required, you will need to configure multiple Job
. In this example, the default Job
is used without specifying data selection settings. The processing will be done by one instance of SmartXML. The settings specified in the interface reflect the structure of the table xml_files created in the first step. For more details on configuring multiple Job
, you can read here.
Press Get XML Count to find out the number of files in the queue for processing.
Start parsing by clicking Start Processing
After processing, the table xml_files will look like this:
id | xml_path | filling_percent_stat | parsing_status | processed_date |
---|---|---|---|---|
1 | basic-sample-2/example_1.xml | 100 | success | 2024-03-21 15:10 |
2 | basic-sample-2/example_2.xml | 100 | success | 2024-03-21 15:10 |
3 | basic-sample-2/example_3.xml | 100 | success | 2024-03-21 15:10 |
4 | basic-sample-2/example_4.xml | 95 | success | 2024-03-21 15:10 |
5 | basic-sample-2/example_5.xml | 83 | success | 2024-03-21 15:10 |
File parsing was successful. The value in filling_percent_stat indicates the percentage of data extracted from the total expected volume. As seen in the rows containing NULL
in example_4.xml
and example_5.xml
, some fields that we expected to find are missing.
Results for SQL Database:
Data loaded into the customers table:
id | company_name | key_person | usa_resident | founded_year |
---|---|---|---|---|
1 | Tesla | Elon Musk | 1 | 2003 |
2 | Boston Dynamics | Robert Playter | 1 | 1992 |
3 | Bank of Korea | Kim Seong Tae | 0 | 1950 |
4 | Apple Inc. | Tim Cook | 1 | 1976 |
5 | Microsoft Corporation | Satya Nadella | 1 | 1975 |
6 | Amazon.com Inc. | Andy Jassy | 1 | 1994 |
7 | Samsung Group | Lee Kun-hee | 0 | 1938 |
8 | Volkswagen Group | Herbert Diess | 0 | 1937 |
9 | Unilever | Alan Jope | 0 | 1930 |
10 | Nestle | Ulf Mark Schneider | 0 | 1866 |
11 | Siemens | Roland Busch | 0 | 1847 |
12 | Philips | Frans van Houten | 0 | 1891 |
13 | Netflix | Reed Hastings | 1 | 1997 |
14 | SpaceX | Elon Musk | 1 | 2002 |
15 | Amazon Web Services | Andy Jassy | 1 | 2006 |
16 | Toyota | Akio Toyoda | 0 | 1937 |
17 | Adobe Systems | Shantanu Narayen | 1 | 1982 |
18 | Ford Motor Company | Jim Farley | 1 | 1903 |
19 | Alphabet Inc. | Sundar Pichai | 1 | 2015 |
20 | General Electric | H. Lawrence Culp Jr. | 1 | 1892 |
21 | Sony Corporation | Kenichiro Yoshida | NULL | 1946 |
Data loaded into the departments table:
id | company_name | department_name | department_address |
---|---|---|---|
1 | Tesla | Tesla Engineering | 1501 Page Mill Road Palo Alto, Framingham, USA |
2 | Tesla | Gigafactory Texas | 1 Tesla Road Austin, TX 78725, USA |
3 | Boston Dynamics | Headquarters | 200 Smith St, Waltham, USA |
4 | Bank of Korea | Main Department | 67, Sejong-daero, Jung-gu, Seoul, South Korea |
5 | Apple Inc. | Software Engineering | 1 Apple Park Way, Cupertino, CA, USA |
6 | Amazon.com Inc. | Amazon Web Services | 410 Terry Ave N, Seattle, WA, USA |
7 | Samsung Group | Samsung Electronics | 129 Samsung-ro, Su-dong, Yeongtong-gu, Suwon-si, Gyeonggi-do, South Korea |
8 | Samsung Group | Samsung Heavy Industries | 508, Maegok-dong, Changwon, South Korea |
9 | Volkswagen Group | Volkswagen Passenger Cars | Berliner Ring 2, 38440 Wolfsburg, Germany |
10 | Volkswagen Group | Audi | Auto-Union-Str. 1, 85045 Ingolstadt, Germany |
11 | Unilever | Unilever Food Solutions | Rotterdam, Netherlands |
12 | Nestle | Nestle Waters | 50 Rue Carnot, 92240 Malakoff, France |
13 | Nestle | Nestle Purina PetCare | 50 Avenue de La Couronne, 1050 Ixelles, Belgium |
14 | Siemens | Siemens Energy | Werner-von-Siemens-Str. 1, 80333 Munich, Germany |
15 | Philips | Healthcare | Amstelplein 2, 1096 BC Amsterdam, Netherlands |
16 | Netflix | Content Production | 123 Entertainment Ave, Los Gatos, CA 95032, USA |
17 | SpaceX | Space Exploration | 1 Rocket Road Hawthorne, CA 90250, USA |
18 | SpaceX | Starlink | 123 SpaceX Blvd, Hawthorne, CA 90250, USA |
19 | Amazon Web Services | Cloud Services | Toyota-cho, Toyota City, Aichi Prefecture 471-8571, Japan |
20 | Amazon Web Services | AI and Machine Learning | 345 Park Ave, San Jose, CA 95110, USA |
21 | Toyota | Research and Development | 234 Adobe Way, San Jose, CA 95110, USA |
22 | Adobe Systems | Software Development | NULL |
23 | Adobe Systems | Marketing | NULL |
24 | Ford Motor Company | Automotive Design | Dearborn, Michigan, USA |
25 | Alphabet Inc. | Google Search | 1600 Amphitheatre Parkway, Mountain View, CA, USA |
26 | General Electric | Power | 41 Farnsworth St, Boston, MA 02210, USA |
27 | Sony Corporation | NULL | NULL |
NULL values indicate fields that were missing in the processed files.
Results of XML loading into NoSQL:
Data loaded into the sample
collection:
[
{
"_key": "1393",
"_id": "sample/1393",
"_rev": "_g1BqrES---",
"customers": [
{
"company_name": "Ford Motor Company",
"key_person": "Jim Farley",
"usa_resident": true,
"founded_year": 1903,
"departments": [
{
"department_name": "Automotive Design",
"department_address": "Dearborn, Michigan, USA"
}
]
},
{
"company_name": "Alphabet Inc.",
"key_person": "Sundar Pichai",
"usa_resident": true,
"founded_year": 2015,
"departments": [
{
"department_name": "Google Search",
"department_address": "1600 Amphitheatre Parkway, Mountain View, CA, USA"
}
]
},
{
"company_name": "General Electric",
"key_person": "H. Lawrence Culp Jr.",
"usa_resident": true,
"founded_year": 1892,
"departments": [
{
"department_name": "Power",
"department_address": "41 Farnsworth St, Boston, MA 02210, USA"
}
]
},
{
"company_name": "Sony Corporation",
"key_person": "Kenichiro Yoshida",
"usa_resident": null,
"founded_year": 1946,
"departments": [
{
"department_name": null,
"department_address": null
}
]
}
]
},
{
"_key": "1395",
"_id": "sample/1395",
"_rev": "_g1BqrHa---",
"customers": [
{
"company_name": "Netflix",
"key_person": "Reed Hastings",
"usa_resident": true,
"founded_year": 1997,
"departments": [
{
"department_name": "Content Production",
"department_address": "123 Entertainment Ave, Los Gatos, CA 95032, USA"
}
]
},
{
"company_name": "SpaceX",
"key_person": "Elon Musk",
"usa_resident": true,
"founded_year": 2002,
"departments": [
{
"department_name": "Space Exploration",
"department_address": "1 Rocket Road Hawthorne, CA 90250, USA"
},
{
"department_name": "Starlink",
"department_address": "123 SpaceX Blvd, Hawthorne, CA 90250, USA"
}
]
},
{
"company_name": "Amazon Web Services",
"key_person": "Andy Jassy",
"usa_resident": true,
"founded_year": 2006,
"departments": [
{
"department_name": "Cloud Services",
"department_address": "Toyota-cho, Toyota City, Aichi Prefecture 471-8571, Japan"
},
{
"department_name": "AI and Machine Learning",
"department_address": "345 Park Ave, San Jose, CA 95110, USA"
}
]
},
{
"company_name": "Toyota",
"key_person": "Akio Toyoda",
"usa_resident": false,
"founded_year": 1937,
"departments": [
{
"department_name": "Research and Development",
"department_address": "234 Adobe Way, San Jose, CA 95110, USA"
}
]
},
{
"company_name": "Adobe Systems",
"key_person": "Shantanu Narayen",
"usa_resident": true,
"founded_year": 1982,
"departments": [
{
"department_name": "Software Development",
"department_address": null
},
{
"department_name": "Marketing",
"department_address": null
}
]
}
]
},
{
"_key": "1397",
"_id": "sample/1397",
"_rev": "_g1BqrLG---",
"customers": [
{
"company_name": "Volkswagen Group",
"key_person": "Herbert Diess",
"usa_resident": false,
"founded_year": 1937,
"departments": [
{
"department_name": "Volkswagen Passenger Cars",
"department_address": "Berliner Ring 2, 38440 Wolfsburg, Germany"
},
{
"department_name": "Audi",
"department_address": "Auto-Union-Str. 1, 85045 Ingolstadt, Germany"
}
]
},
{
"company_name": "Unilever",
"key_person": "Alan Jope",
"usa_resident": false,
"founded_year": 1930,
"departments": [
{
"department_name": "Unilever Food Solutions",
"department_address": "Rotterdam, Netherlands"
}
]
},
{
"company_name": "Nestle",
"key_person": "Ulf Mark Schneider",
"usa_resident": false,
"founded_year": 1866,
"departments": [
{
"department_name": "Nestle Waters",
"department_address": "50 Rue Carnot, 92240 Malakoff, France"
},
{
"department_name": "Nestle Purina PetCare",
"department_address": "50 Avenue de La Couronne, 1050 Ixelles, Belgium"
}
]
},
{
"company_name": "Siemens",
"key_person": "Roland Busch",
"usa_resident": false,
"founded_year": 1847,
"departments": [
{
"department_name": "Siemens Energy",
"department_address": "Werner-von-Siemens-Str. 1, 80333 Munich, Germany"
}
]
},
{
"company_name": "Philips",
"key_person": "Frans van Houten",
"usa_resident": false,
"founded_year": 1891,
"departments": [
{
"department_name": "Healthcare",
"department_address": "Amstelplein 2, 1096 BC Amsterdam, Netherlands"
}
]
}
]
},
{
"_key": "1399",
"_id": "sample/1399",
"_rev": "_g1BqrNu---",
"customers": [
{
"company_name": "Apple Inc.",
"key_person": "Tim Cook",
"usa_resident": true,
"founded_year": 1976,
"departments": [
{
"department_name": "Software Engineering",
"department_address": "1 Apple Park Way, Cupertino, CA, USA"
}
]
},
{
"company_name": "Microsoft Corporation",
"key_person": "Satya Nadella",
"usa_resident": true,
"founded_year": 1975
},
{
"company_name": "Amazon.com Inc.",
"key_person": "Andy Jassy",
"usa_resident": true,
"founded_year": 1994,
"departments": [
{
"department_name": "Amazon Web Services",
"department_address": "410 Terry Ave N, Seattle, WA, USA"
}
]
},
{
"company_name": "Samsung Group",
"key_person": "Lee Kun-hee",
"usa_resident": false,
"founded_year": 1938,
"departments": [
{
"department_name": "Samsung Electronics",
"department_address": "129 Samsung-ro, Su-dong, Yeongtong-gu, Suwon-si, Gyeonggi-do, South Korea"
},
{
"department_name": "Samsung Heavy Industries",
"department_address": "508, Maegok-dong, Changwon, South Korea"
}
]
}
]
},
{
"_key": "1401",
"_id": "sample/1401",
"_rev": "_g1BqrQS---",
"customers": [
{
"company_name": "Tesla",
"key_person": "Elon Musk",
"usa_resident": true,
"founded_year": 2003,
"departments": [
{
"department_name": "Tesla Engineering",
"department_address": "1501 Page Mill Road Palo Alto, Framingham, USA"
},
{
"department_name": "Gigafactory Texas",
"department_address": "1 Tesla Road Austin, TX 78725, USA"
}
]
},
{
"company_name": "Boston Dynamics",
"key_person": "Robert Playter",
"usa_resident": true,
"founded_year": 1992,
"departments": [
{
"department_name": "Headquarters",
"department_address": "200 Smith St, Waltham, USA"
}
]
},
{
"company_name": "Bank of Korea",
"key_person": "Kim Seong Tae",
"usa_resident": false,
"founded_year": 1950,
"departments": [
{
"department_name": "Main Department",
"department_address": "67, Sejong-daero, Jung-gu, Seoul, South Korea"
}
]
}
]
}
]