Parse JSON to xmerl Compitable XML Tree via A Simple XML State Machine
Updated Aug 5: rewrote json_parser.erl base on tonyg's RFC4627 implementation , fixed some bugs.
In my previous blog: A Simple XML State Machine Accepting SAX Events to Build xmerl Compitable XML Tree: icalendar demo , I wrote a simple state machine to parse icalendar to xmerl compitable XML tree. This time, I'll use this state machine to parse a JSON expression to xmerl compitable XML tree, the work is fairly simple:
The result will be something like:
Now you fecth element by:
Next time, I'll write a simple Erlang Data state machine, which will parse icalendar and json to simple Erlang Lists + Tuples.
The code of xml_sm.erl can be found in my previous blog .
In my previous blog: A Simple XML State Machine Accepting SAX Events to Build xmerl Compitable XML Tree: icalendar demo , I wrote a simple state machine to parse icalendar to xmerl compitable XML tree. This time, I'll use this state machine to parse a JSON expression to xmerl compitable XML tree, the work is fairly simple:
-module(json_parser).
-define(stateMachine, fun xml_sm:state/2).
-export([parse/1]).
-export([test/0]).
parse(Bin) when is_binary(Bin) ->
parse(binary_to_list(Bin));
parse(Text) ->
States1 = ?stateMachine({startDocument}, undefined),
States2 = parse_value(skip_ws(Text), States1),
States3 = ?stateMachine({endDocument}, States2).
parse_value([], States) -> States;
parse_value([H|T], States) when H == $"; H == $' ->
{Rest, Value} = parse_string(T, [], H),
States1 = ?stateMachine({characters, Value}, States),
{Rest, States1};
parse_value([${|T], States) ->
States1 = ?stateMachine({startElement, [], obj, [], []}, States),
parse_object(skip_ws(T), States1);
parse_value([$[|T], States) ->
parse_array(skip_ws(T), States);
parse_value(Chars, States) ->
{Rest, Value} = parse_number(skip_ws(Chars), []),
States1 = ?stateMachine({characters, Value}, States),
{Rest, States1}.
parse_object([$}|T], States) ->
States1 = ?stateMachine({endElement, [], obj, []}, States),
case skip_ws(T) of
[] -> States1; %% final result
Chars -> {Chars, States1}
end;
parse_object([$,|T], States) ->
parse_object(skip_ws(T), States);
parse_object([H|T], States) when H == $"; H == $' ->
{Rest, Name} = parse_string(skip_ws(T), [], H),
States1 = ?stateMachine({startElement, [], list_to_atom(Name), [], []}, States),
[$:|Rest1] = skip_ws(Rest),
{Rest2, States2} = parse_value(skip_ws(Rest1), States1),
States3 = ?stateMachine({endElement, [], undefined, []}, States2),
parse_object(skip_ws(Rest2), States3).
parse_array([$]|T], States) ->
{T, States};
parse_array([$,|T], States) ->
parse_array(skip_ws(T), States);
parse_array(Chars, States) ->
{Rest, States1} = parse_value(Chars, States),
parse_array(skip_ws(Rest), States1).
parse_string([H|T], Acc, Quote) when H == Quote ->
{T, lists:reverse(Acc)};
parse_string([H|T], Acc, Quote) ->
parse_string(T, [H|Acc], Quote).
parse_number([H|T], Acc) when H == $,; H == $}; H == $] ->
{[H|T], lists:reverse(Acc)};
parse_number([H|T], Acc) ->
parse_number(T, [H|Acc]).
skip_ws([H|T]) when H =< 32 ->
skip_ws(T);
skip_ws(Chars) ->
Chars.
test() ->
Text = "
{'businesses': [{'address1': '650 Mission Street',
'address2': '',
'avg_rating': 4.5,
'categories': [{'category_filter': 'localflavor',
'name': 'Local Flavor',
'search_url': 'http://lightpole.net/search'}],
'city': 'San Francisco',
'distance': 0.085253790020942688,
'id': '4kMBvIEWPxWkWKFN__8SxQ',
'latitude': 37.787185668945298,
'longitude': -122.40093994140599},
{'address1': '25 Maiden Lane',
'address2': '',
'avg_rating': 5.0,
'categories': [{'category_filter': 'localflavor',
'name': 'Local Flavor',
'search_url': 'http://lightpole.net/search'}],
'city': 'San Francisco',
'distance': 0.23186808824539185,
'id': 'O1zPF_b7RyEY_NNsizX7Yw',
'latitude': 37.788387,
'longitude': -122.40401}]}
",
{ok, Xml} = parse(Text),
%io:fwrite(user, "Xml Tree: ~p~n", [Xml]),
XmlText = lists:flatten(xmerl:export_simple([Xml], xmerl_xml)),
io:fwrite(user, "Parsed: ~n~p~n", [XmlText]),
Latitude1 = xmerl_xpath:string("/obj/businesses/obj[1]/latitude/text()", Xml),
io:format(user, "Latitude1: ~p~n", [Latitude1]).
The result will be something like:
<?xml version=\"1.0\"?>
<obj>
<businesses>
<obj>
<address1>650 Mission Street</address1>
<address2></address2>
<avg_rating>4.5</avg_rating>
<categories>
<obj>
<category_filter>localflavor</category_filter>
<name>Local Flavor</name>
<search_url>http://lightpole.net/search</search_url>
</obj>
</categories>
<city>San Francisco</city>
<distance>0.085253790020942688</distance>
<id>4kMBvIEWPxWkWKFN__8SxQ</id>
<latitude>37.787185668945298</latitude>
<longitude>-122.40093994140599</longitude>
</obj>
<obj>
<address1>25 Maiden Lane</address1>
<address2></address2>
<avg_rating>5.0</avg_rating>
<categories>
<obj>
<category_filter>localflavor</category_filter>
<name>Local Flavor</name>
<search_url>http://lightpole.net/search</search_url>
</obj>
</categories>
<city>San Francisco</city>
<distance>0.23186808824539185</distance>
<id>O1zPF_b7RyEY_NNsizX7Yw</id>
<latitude>37.788387</latitude>
<longitude>-122.40401</longitude>
</obj>
</businesses>
</obj>
Now you fecth element by:
> [Latitude1] = xmerl_xpath:string("/obj/businesses/obj[1]/latitude/text()", Xml),
> Latitude1#xmlText.value.
"37.787185668945298"
Next time, I'll write a simple Erlang Data state machine, which will parse icalendar and json to simple Erlang Lists + Tuples.
The code of xml_sm.erl can be found in my previous blog .