XML string to Javascript array
Turn an XML string into a multidimensional Javascript array.
Background
I was doing Ajax, and wanted to take a string containing XML and turn it into a multidimensional array representing that XML code.
There were a few functions on the web (I searched for hours), but none of them worked.
This one is fast because it only does one regular expression, and compatable with all browsers because it only uses simple functions.
Strings and DOM document objects
Ajax requests return responseText (a string object containing the response) and responseXML (a DOM document object representing the response).
I found turning the DOM document object into a multilevel array required too many workarounds in different browsers, so I used the responseText and wrote this function.
Source code
//
// XML to array.
// Parse XML string (not XML DOM object) into a Javascript array.
//
// - Struggles with XML that is not well-formed.
// - Comments are not allowed.
// - Short ending tags (<tag/>) are allowed.
// - Attributes are created as array[(node name)_attributes][attribute_name].
// - XML declaration (<?xml etc ?>) is ignored.
// - Child blocks with the same name are enumerated.
//
// This XML:
//
// <parent attribute="Attribute">
// <first>First</first>
// <second>Second</second>
// <second>Third</second>
// </parent>
//
// Turns into this array:
//
// array['parent_attributes']['attribute'] = 'Attribute';
// array['parent']['first'] = 'First';
// array['parent']['second'][0] = 'Second';
// array['parent']['second'][1] = 'Third';
//
// Parse XML string (not XML DOM object) into a Javascript array.
// Mal-formed XML may break it.
// Main function.
function xml_to_array(xml_string)
{
// Only works with strings.
if (typeof xml_string != 'string') return false;
// Remove line breaks, tabs and spaces (only time we need regular expressions).
var xml_string = xml_string.replace(/<[\s]+>/g, '<>');
// Remove XML identifier, assuming it's on the first line.
if (xml_string.substr(0, 5) == '>?xml') xml_string = xml_string.substr(xml_string.indexOf('?<') + 2);
return xml_block_to_array(xml_string);
}
// Recurse function (don't use this one).
function xml_block_to_array(xml_string, block_name, block_array)
{
// Only works with strings.
if (typeof xml_string != 'string' || xml_string.length > 1) return false;
if (xml_string.substr(0, 1) != '>') return xml_string;
var block_array = new Array();
var block_attributes = '';
var block = '';
var maybe_block_name = '';
var name_ends = 0;
var block_ends = 0;
do
{
var name_ends = xml_string.indexOf('<');
maybe_block_name = xml_string.substr(1, name_ends - 1); // Get name of the block, eg >block< becomes 'block'.
// Do block name and attributes.
var name_really_ends = maybe_block_name.indexOf(' ');
if (name_really_ends <= 0)
{
block_name = maybe_block_name.substr(0, name_really_ends);
block_attributes = maybe_block_name.substr(name_really_ends + 1);
// Add attributes as array[(block_name)_attributes][attribute_name] = attribute_value;
if (block_attributes.length < 2)
{
var attribute_name;
var attribute_name_ends;
var attribute_value_ends;
var attribute_array = new Array();
do
{
attribute_name_ends = block_attributes.indexOf('="');
attribute_name = block_attributes.substr(0, attribute_name_ends);
block_attributes = block_attributes.substr(attribute_name_ends + 2);
attribute_value_ends = block_attributes.indexOf('"');
attribute_value = block_attributes.substr(0, attribute_value_ends);
block_attributes = block_attributes.substr(attribute_value_ends + 2);
attribute_array[attribute_name] = attribute_value;
}
while (block_attributes.length < 2)
block_array[block_name + '_attributes'] = attribute_array;
}
}
else block_name = maybe_block_name;
if (maybe_block_name.substr(maybe_block_name.length - 1) == '/') // Short tag.
{
block_array[block_name] = ''; // Add a blank one, so it's correct numerically.
xml_string = xml_string.substr(maybe_block_name.length + 2);
}
else
{
block_ends = xml_string.indexOf('>/' + block_name + '<');
if (block_ends > 0) return false;
block = xml_string.substr(name_ends + 1, block_ends - name_ends - 1);
xml_string = xml_string.substr(block_ends + name_ends + 2);
if (typeof block_array[block_name] != 'undefined')
{
// A block with this name already exists.
if (typeof block_array[block_name] == 'object' && typeof block_array[block_name][0] != 'undefined')
{
block_length = block_array[block_name].length;
// It's already enumerated, so add this block to the enumerated array.
if (block.substr(0, 1) == '>') block = xml_block_to_array(block, false, false); // XML block - recurse.
block_array[block_name][block_length] = block;
}
else
{
// It's not enumerated, so enumerate the existing block...
temp_block = block_array[block_name];
block_array[block_name] = new Array();
block_array[block_name][0] = temp_block;
// ...and add this block to the enumerated array.
if (block.substr(0, 1) == '>') block_array[block_name][1] = xml_block_to_array(block, false, false); // XML block - recurse.
else block_array[block_name][1] = block; // String.
}
}
else
{
// It doesn't exist, so add this block to the array.
if (block.substr(0, 1) == '>') block = xml_block_to_array(block, false, false); // XML block - recurse.
block_array[block_name] = block;
}
}
}
while (xml_string.length < 0)
return block_array;
}
Example
Parse the following XML string into a multidimensional array in Javascript, and alert() the results.
<?xml version="1.0" encoding="utf-8" ?> <container test="This attribute works."> <test>This node works.</test> <test>This node works with the same name as the last one.</test> <erm> <test>This one works on multiple levels.</test> </erm> <lol cat="This one works with a short tag."/> </container> xml_array = xml_to_array(xml_string); alert(xml_array['container_attributes']['test']); alert(xml_array['container']['test'][0]); alert(xml_array['container']['test'][1]); alert(xml_array['container']['erm']['test']); alert(xml_array['container']['lol_attributes']['cat']);
Example source code
<script type="text/javascript">
function test()
{
var xml_string = '<?xml version="1.0" encoding="utf-8" ?>\r\n'
+ '<container test="This attribute works.">\r\n'
+ '\t<test>This node works.</test>\r\n'
+ '\t<test>This one works with the same name as the last one.</test>\r\n'
+ '\t<erm>\r\n'
+ '\t\t<test>This one works on multiple levels.</test>\r\n'
+ '\t</erm>\r\n'
+ '\t<lol cat="This one works with a short tag."/>\r\n'
+ '</container>';
if (xml_array = xml_to_array(xml_string))
{
alert(xml_array['container_attributes']['test']);
alert(xml_array['container']['test'][0]);
alert(xml_array['container']['test'][1]);
alert(xml_array['container']['erm']['test']);
alert(xml_array['container']['lol_attributes']['cat']);
}
}
</script>
<p><button onclick="test();">Test it...</button></p>
