A Simple POET State Machine Accepting SAX Events to Build Plain Old Erlang Term


Per previous blogs:
  • A Simple XML State Machine Accepting SAX Events to Build xmerl Compitable XML Tree: icalendar demo
  • Parse JSON to xmerl Compitable XML Tree via A Simple XML State Machine
  • I wrote a simple xml state machine that receives SAX events to build xmerl compitable XML tree.
    This time, it's a simple POET (Plain Old Erlang Term) state machine, which receives SAX events to build the data in form of List and Tuple.
    %%% A state machine which receives sax events and builds a Plain Old Erlang Term
    
    
    -module(poet_sm).
    
    -export([state/2]).
    
    -export([test/0
            ]).
    
    -record(poetsmState, {
        qname = undefined,
        attributes = [],
        content = [],
        parents = []
    }).
    
    
    receive_events(Events) -> receive_events(Events, undefined).
    
    receive_events([], _States) -> {ok, [], []};
    receive_events([Event|T], States) ->
        case state(Event, States) of 
            {ok, TopObject} -> 
                {ok, TopObject, T};
            {error, Reason} -> 
                {error, Reason};
            States1 -> 
                receive_events(T, States1)    
        end.
    
    state({startDocument}, _StateStack) ->
        State = #poetsmState{},
        [State];
    state({endDocument}, StateStack) ->
        %io:fwrite(user, "endDocument, states: ~p~n", [StateStack]),
        case StateStack of
            {ok, TopObject} -> {ok, TopObject};
            _ -> {error, io:fwrite(
                        user, 
                        "Bad object match, StateStack is: ~n~p~n", 
                        [StateStack])}
        end;
    state({startElement, _Uri, _LocalName, QName, Attrs}, StateStack) ->
        %io:fwrite(user, "startElement~n", []),
        %% pop current State
        [State|_StatesPrev] = StateStack,
        #poetsmState{parents=Parents} = State,
        {_Pos, Attributes1} = lists:foldl(
            fun ({Key, Value}, {Pos, AccAttrs}) ->
                    Pos1 = Pos + 1,
                    Attr = {atom_to_list(Key), to_poet_value(Value)}, 
                    %parents = [{LocalName, Pos1}|Parents]},
                    {Pos1, [Attr|AccAttrs]}
            end, {0, []}, Attrs),
        Parents1 = [{QName, 0}|Parents],
        %% push new state of Attributes, Content and Parents to StateStack
        NewState = #poetsmState{qname = QName,
                                attributes = Attributes1,
                                content = [],
                                parents = Parents1},
        [NewState|StateStack];
    state({endElement, _Uri, _LocalName, QName}, StateStack) ->
        %% pop current State
        [State|StatesPrev] = StateStack,
        #poetsmState{qname=ElemName,
                     attributes=Attributes,
                     content=Content,
                     parents=Parents} = State,
        %io:fwrite(user, "Element end with Name: ~p~n", [Name]),
        if  QName == undefined -> %% don't care 
                undefined; 
            QName /= ElemName -> 
                throw(lists:flatten(io_lib:format(
                    "Element name match error: ~p should be ~p~n", 
                    [QName, ElemName])));
            true -> undefined
        end,
        %% composite a new object
        [_|_ParentsPrev] = Parents,
        Object = 
            if  Attributes == [] ->
                    {QName, lists:reverse(Content)};
                true ->
                    {QName, lists:reverse(Attributes), lists:reverse(Content)} 
                    %parents = ParentsPrev
            end,
        %io:fwrite(user, "object: ~p~n", [Object]),
        %% put Object to parent's content and return new state stack
        case StatesPrev of
            [_ParentState|[]] -> %% reached the top now, return final result
                {ok, Object};
            [ParentState|Other] ->
                #poetsmState{content=ParentContent} = ParentState,
                ParentContent1 = [Object|ParentContent],
                %% update parent state and backward to it:
                ParentState1 = ParentState#poetsmState{content = ParentContent1},
                %io:fwrite(user, "endElement, state: ~p~n", [State1]),
                [ParentState1|Other]
            end;
    state({characters, Characters}, StateStack) ->
        %% pop current State
        [State|StatesPrev] = StateStack,
        #poetsmState{qname=_,
                     content=Content,
                     parents=Parents} = State,
        [{Parent, Pos}|ParentsPrev] = Parents,
        Pos1 = Pos + 1,
        Value = to_poet_value(Characters), %parents = [{Parent, Pos1}|ParentsPrev]},
        Content1 = [Value|Content],
        Parents1 = [{Parent, Pos1}|ParentsPrev],
        UpdatedState = State#poetsmState{content = Content1,
                                         parents = Parents1},
        [UpdatedState|StatesPrev].
    
    to_poet_value(Name) when is_atom(Name) ->
        to_poet_value(atom_to_list(Name));
    to_poet_value(Chars) when is_list(Chars) ->
        %% it's string, should convert to binary, since list in poet means array 
        list_to_binary(Chars); 
    to_poet_value(Value) ->
        Value.
    
    test() ->
        Events = [
            {startDocument},
            {startElement, "", feed, feed, [{link, "http://lightpole.net"}, {author, "Caoyuan"}]},
            {characters, "feed text"},
            {startElement, "", entry, entry, [{tag, "Erlang, Function"}]},
            {characters, "Entry1's text"},
            {endElement, "", entry, entry},
            {startElement, "", entry, entry, []},
            {characters, "Entry2's text"},
            {endElement, "", entry, entry},
            {endElement, "", feed, feed},
            {endDocument}
        ],
    
        %% Streaming:
        {ok, Poet1, _Rest} = receive_events(Events),
        io:fwrite(user, "Streaming Result: ~n~p~n", [Poet1]),
    
        {feed,[{"link",<<"http://lightpole.net">>},{"author",<<"Caoyuan">>}],
              [<<"feed text">>,
               {entry,[{"tag",<<"Erlang, Function">>}],[<<"Entry1's text">>]},
               {entry,[<<"Entry2's text">>]}]} = Poet1.
    

    The result will be something like:
    {feed,[{"link",<<"http://lightpole.net">>},{"author",<<"Caoyuan">>}],
          [<<"feed text">>,
           {entry,[{"tag",<<"Erlang, Function">>}],[<<"Entry1's text">>]},
           {entry,[<<"Entry2's text">>]}]}
    

    The previous iCal and JSON examples can be parsed to POET by modifing the front-end parser a bit.