A Simple POET State Machine Accepting SAX Events to Build Plain Old Erlang Term
Per previous blogs: A Simple XML State Machine Accepting SAX Events to Build xmerl Compitable XML Tree: icalendar demo Parse JSON to xmerl Compitable XML Tree via A Simple XML State Machine I wrote a simple xml state machine that receives SAX events to build xmerl compitable XML tree.
This time, it's a simple POET (Plain Old Erlang Term) state machine, which receives SAX events to build the data in form of List and Tuple.
The result will be something like:
The previous iCal and JSON examples can be parsed to POET by modifing the front-end parser a bit.
This time, it's a simple POET (Plain Old Erlang Term) state machine, which receives SAX events to build the data in form of List and Tuple.
%%% A state machine which receives sax events and builds a Plain Old Erlang Term
-module(poet_sm).
-export([state/2]).
-export([test/0
]).
-record(poetsmState, {
qname = undefined,
attributes = [],
content = [],
parents = []
}).
receive_events(Events) -> receive_events(Events, undefined).
receive_events([], _States) -> {ok, [], []};
receive_events([Event|T], States) ->
case state(Event, States) of
{ok, TopObject} ->
{ok, TopObject, T};
{error, Reason} ->
{error, Reason};
States1 ->
receive_events(T, States1)
end.
state({startDocument}, _StateStack) ->
State = #poetsmState{},
[State];
state({endDocument}, StateStack) ->
%io:fwrite(user, "endDocument, states: ~p~n", [StateStack]),
case StateStack of
{ok, TopObject} -> {ok, TopObject};
_ -> {error, io:fwrite(
user,
"Bad object match, StateStack is: ~n~p~n",
[StateStack])}
end;
state({startElement, _Uri, _LocalName, QName, Attrs}, StateStack) ->
%io:fwrite(user, "startElement~n", []),
%% pop current State
[State|_StatesPrev] = StateStack,
#poetsmState{parents=Parents} = State,
{_Pos, Attributes1} = lists:foldl(
fun ({Key, Value}, {Pos, AccAttrs}) ->
Pos1 = Pos + 1,
Attr = {atom_to_list(Key), to_poet_value(Value)},
%parents = [{LocalName, Pos1}|Parents]},
{Pos1, [Attr|AccAttrs]}
end, {0, []}, Attrs),
Parents1 = [{QName, 0}|Parents],
%% push new state of Attributes, Content and Parents to StateStack
NewState = #poetsmState{qname = QName,
attributes = Attributes1,
content = [],
parents = Parents1},
[NewState|StateStack];
state({endElement, _Uri, _LocalName, QName}, StateStack) ->
%% pop current State
[State|StatesPrev] = StateStack,
#poetsmState{qname=ElemName,
attributes=Attributes,
content=Content,
parents=Parents} = State,
%io:fwrite(user, "Element end with Name: ~p~n", [Name]),
if QName == undefined -> %% don't care
undefined;
QName /= ElemName ->
throw(lists:flatten(io_lib:format(
"Element name match error: ~p should be ~p~n",
[QName, ElemName])));
true -> undefined
end,
%% composite a new object
[_|_ParentsPrev] = Parents,
Object =
if Attributes == [] ->
{QName, lists:reverse(Content)};
true ->
{QName, lists:reverse(Attributes), lists:reverse(Content)}
%parents = ParentsPrev
end,
%io:fwrite(user, "object: ~p~n", [Object]),
%% put Object to parent's content and return new state stack
case StatesPrev of
[_ParentState|[]] -> %% reached the top now, return final result
{ok, Object};
[ParentState|Other] ->
#poetsmState{content=ParentContent} = ParentState,
ParentContent1 = [Object|ParentContent],
%% update parent state and backward to it:
ParentState1 = ParentState#poetsmState{content = ParentContent1},
%io:fwrite(user, "endElement, state: ~p~n", [State1]),
[ParentState1|Other]
end;
state({characters, Characters}, StateStack) ->
%% pop current State
[State|StatesPrev] = StateStack,
#poetsmState{qname=_,
content=Content,
parents=Parents} = State,
[{Parent, Pos}|ParentsPrev] = Parents,
Pos1 = Pos + 1,
Value = to_poet_value(Characters), %parents = [{Parent, Pos1}|ParentsPrev]},
Content1 = [Value|Content],
Parents1 = [{Parent, Pos1}|ParentsPrev],
UpdatedState = State#poetsmState{content = Content1,
parents = Parents1},
[UpdatedState|StatesPrev].
to_poet_value(Name) when is_atom(Name) ->
to_poet_value(atom_to_list(Name));
to_poet_value(Chars) when is_list(Chars) ->
%% it's string, should convert to binary, since list in poet means array
list_to_binary(Chars);
to_poet_value(Value) ->
Value.
test() ->
Events = [
{startDocument},
{startElement, "", feed, feed, [{link, "http://lightpole.net"}, {author, "Caoyuan"}]},
{characters, "feed text"},
{startElement, "", entry, entry, [{tag, "Erlang, Function"}]},
{characters, "Entry1's text"},
{endElement, "", entry, entry},
{startElement, "", entry, entry, []},
{characters, "Entry2's text"},
{endElement, "", entry, entry},
{endElement, "", feed, feed},
{endDocument}
],
%% Streaming:
{ok, Poet1, _Rest} = receive_events(Events),
io:fwrite(user, "Streaming Result: ~n~p~n", [Poet1]),
{feed,[{"link",<<"http://lightpole.net">>},{"author",<<"Caoyuan">>}],
[<<"feed text">>,
{entry,[{"tag",<<"Erlang, Function">>}],[<<"Entry1's text">>]},
{entry,[<<"Entry2's text">>]}]} = Poet1.
The result will be something like:
{feed,[{"link",<<"http://lightpole.net">>},{"author",<<"Caoyuan">>}],
[<<"feed text">>,
{entry,[{"tag",<<"Erlang, Function">>}],[<<"Entry1's text">>]},
{entry,[<<"Entry2's text">>]}]}
The previous iCal and JSON examples can be parsed to POET by modifing the front-end parser a bit.