Logo Search packages:      
Sourcecode: biosig4c++ version File versions  Download package

xer_support.c

/*
 * Copyright (c) 2003, 2004 X/IO Labs, xiolabs.com.
 * Copyright (c) 2003, 2004, 2005 Lev Walkin <vlm@lionet.info>.
 *    All rights reserved.
 * Redistribution and modifications are permitted subject to BSD license.
 */
#include <asn_system.h>
#include <xer_support.h>

/* Parser states */
typedef enum {
      ST_TEXT,
      ST_TAG_START,
      ST_TAG_BODY,
      ST_TAG_QUOTE_WAIT,
      ST_TAG_QUOTED_STRING,
      ST_TAG_UNQUOTED_STRING,
      ST_COMMENT_WAIT_DASH1,  /* "<!--"[1] */
      ST_COMMENT_WAIT_DASH2,  /* "<!--"[2] */
      ST_COMMENT,
      ST_COMMENT_CLO_DASH2,   /* "-->"[0] */
      ST_COMMENT_CLO_RT /* "-->"[1] */
} pstate_e;

static pxml_chunk_type_e final_chunk_type[] = {
      PXML_TEXT,
      PXML_TAG_END,
      PXML_COMMENT_END,
      PXML_TAG_END,
      PXML_COMMENT_END,
};


static int
_charclass[256] = {
      0,0,0,0,0,0,0,0, 0,1,1,0,1,1,0,0,
      0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
      1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
      2,2,2,2,2,2,2,2, 2,2,0,0,0,0,0,0,   /* 01234567 89       */
      0,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,   /*  ABCDEFG HIJKLMNO */
      3,3,3,3,3,3,3,3, 3,3,3,0,0,0,0,0,   /* PQRSTUVW XYZ      */
      0,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,   /*  abcdefg hijklmno */
      3,3,3,3,3,3,3,3, 3,3,3,0,0,0,0,0    /* pqrstuvw xyz      */
};
#define WHITESPACE(c)   (_charclass[(unsigned char)(c)] == 1)
#define ALNUM(c)  (_charclass[(unsigned char)(c)] >= 2)
#define ALPHA(c)  (_charclass[(unsigned char)(c)] == 3)

/* Aliases for characters, ASCII/UTF-8 */
#define     EXCLAM      0x21  /* '!' */
#define     CQUOTE      0x22  /* '"' */
#define     CDASH 0x2d  /* '-' */
#define     CSLASH      0x2f  /* '/' */
#define     LANGLE      0x3c  /* '<' */
#define     CEQUAL      0x3d  /* '=' */
#define     RANGLE      0x3e  /* '>' */
#define     CQUEST      0x3f  /* '?' */

/* Invoke token callback */
#define     TOKEN_CB_CALL(type, _ns, _current_too, _final) do {   \
            int _ret;                           \
            pstate_e ns  = _ns;                       \
            ssize_t _sz = (p - chunk_start) + _current_too; \
            if (!_sz) {                         \
                  /* Shortcut */                      \
                  state = _ns;                        \
                  break;                              \
            }                                   \
            _ret = cb(type, chunk_start, _sz, key);         \
            if(_ret < _sz) {                    \
                  if(_current_too && _ret == -1)            \
                        state = ns;             \
                  goto finish;                        \
            }                                   \
            chunk_start = p + _current_too;                 \
            state = ns;                         \
      } while(0)

#define TOKEN_CB(_type, _ns, _current_too)                  \
      TOKEN_CB_CALL(_type, _ns, _current_too, 0)

#define TOKEN_CB_FINAL(_type, _ns, _current_too)            \
      TOKEN_CB_CALL(final_chunk_type[_type], _ns, _current_too, 1)

/*
 * Parser itself
 */
ssize_t pxml_parse(int *stateContext, const void *xmlbuf, size_t size, pxml_callback_f *cb, void *key) {
      pstate_e state = (pstate_e)*stateContext;
      const char *chunk_start = (const char *)xmlbuf;
      const char *p = chunk_start;
      const char *end = p + size;

      for(; p < end; p++) {
        int C = *(const unsigned char *)p;
        switch(state) {
        case ST_TEXT:
            /*
             * Initial state: we're in the middle of some text,
             * or just have started.
             */
            if (C == LANGLE) 
                  /* We're now in the tag, probably */
                  TOKEN_CB(PXML_TEXT, ST_TAG_START, 0);
            break;
        case ST_TAG_START:
            if (ALPHA(C) || (C == CSLASH))
                  state = ST_TAG_BODY;
            else if (C == EXCLAM)
                  state = ST_COMMENT_WAIT_DASH1;
            else 
                  /*
                   * Not characters and not whitespace.
                   * Must be something like "3 < 4".
                   */
                  TOKEN_CB(PXML_TEXT, ST_TEXT, 1);/* Flush as data */
            break;
        case ST_TAG_BODY:
            switch(C) {
            case RANGLE:
                  /* End of the tag */
                  TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
                  break;
            case LANGLE:
                  /*
                   * The previous tag wasn't completed, but still
                   * recognized as valid. (Mozilla-compatible)
                   */
                  TOKEN_CB_FINAL(PXML_TAG, ST_TAG_START, 0);      
                  break;
            case CEQUAL:
                  state = ST_TAG_QUOTE_WAIT;
                  break;
            }
            break;
        case ST_TAG_QUOTE_WAIT:
            /*
             * State after the equal sign ("=") in the tag.
             */
            switch(C) {
            case CQUOTE:
                  state = ST_TAG_QUOTED_STRING;
                  break;
            case RANGLE:
                  /* End of the tag */
                  TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
                  break;
            default:
                  if(!WHITESPACE(C))
                        /* Unquoted string value */
                        state = ST_TAG_UNQUOTED_STRING;
            }
            break;
        case ST_TAG_QUOTED_STRING:
            /*
             * Tag attribute's string value in quotes.
             */
            if(C == CQUOTE) {
                  /* Return back to the tag state */
                  state = ST_TAG_BODY;
            }
            break;
        case ST_TAG_UNQUOTED_STRING:
            if(C == RANGLE) {
                  /* End of the tag */
                  TOKEN_CB_FINAL(PXML_TAG, ST_TEXT, 1);
            } else if(WHITESPACE(C)) {
                  /* Return back to the tag state */
                  state = ST_TAG_BODY;
            }
            break;
        case ST_COMMENT_WAIT_DASH1:
            if(C == CDASH) {
                  state = ST_COMMENT_WAIT_DASH2;
            } else {
                  /* Some ordinary tag. */
                  state = ST_TAG_BODY;
            }
            break;
        case ST_COMMENT_WAIT_DASH2:
            if(C == CDASH) {
                  /* Seen "<--" */
                  state = ST_COMMENT;
            } else {
                  /* Some ordinary tag */
                  state = ST_TAG_BODY;
            }
            break;
        case ST_COMMENT:
            if(C == CDASH) {
                  state = ST_COMMENT_CLO_DASH2;
            }
            break;
        case ST_COMMENT_CLO_DASH2:
            if(C == CDASH) {
                  state = ST_COMMENT_CLO_RT;
            } else {
                  /* This is not an end of a comment */
                  state = ST_COMMENT;
            }
            break;
        case ST_COMMENT_CLO_RT:
            if(C == RANGLE) {
                  TOKEN_CB_FINAL(PXML_COMMENT, ST_TEXT, 1);
            } else if(C == CDASH) {
                  /* Maintain current state, still waiting for '>' */
            } else {
                  state = ST_COMMENT;
            }
            break;
        } /* switch(*ptr) */
      } /* for() */

      /*
       * Flush the partially processed chunk, state permitting.
       */
      if(p - chunk_start) {
            switch (state) {
            case ST_COMMENT:
                  TOKEN_CB(PXML_COMMENT, state, 0);
                  break;
            case ST_TEXT:
                  TOKEN_CB(PXML_TEXT, state, 0);
                  break;
            default: break;   /* a no-op */
            }
      }

finish:
      *stateContext = (int)state;
      return chunk_start - (const char *)xmlbuf;
}


Generated by  Doxygen 1.6.0   Back to index