Main Page | Modules | Data Structures | Directories | File List | Data Fields | Globals

parser.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2005, 2006 by KoanLogic s.r.l. <http://www.koanlogic.com>
00003  * All rights reserved.
00004  *
00005  * This file is part of KLone, and as such it is subject to the license stated
00006  * in the LICENSE file which you have received as part of this distribution.
00007  *
00008  * $Id: parser.c,v 1.11 2006/01/30 20:04:27 tho Exp $
00009  */
00010 
00011 #include "klone_conf.h"
00012 #include <stdio.h>
00013 #include <stdlib.h>
00014 #include <ctype.h>
00015 #include <klone/klone.h>
00016 #include <klone/translat.h>
00017 #include <klone/parser.h>
00018 
00019 /* parser state */
00020 enum { 
00021     S_START, 
00022     S_IN_DOUBLE_QUOTE,
00023     S_IN_SINGLE_QUOTE, 
00024     S_HTML, 
00025     S_WAIT_PERC,
00026     S_START_CODE, 
00027     S_CODE, 
00028     S_WAIT_GT,
00029     S_EAT_NEWLINE
00030 };
00031 
00032 enum { LF = 0xA, CR = 0xD };
00033 
00034 static int parser_on_block(parser_t *p, const char *buf, size_t sz)
00035 {
00036     dbg_err_if (p == NULL);
00037     dbg_err_if (buf == NULL);
00038 
00039     for(;;)
00040     {
00041         switch(p->state)
00042         {
00043         case S_START:
00044             /* empty file */
00045             return 0;
00046         case S_IN_DOUBLE_QUOTE:
00047         case S_IN_SINGLE_QUOTE: 
00048             if(p->state != p->prev_state)
00049             {
00050                 p->state = p->prev_state;
00051                 continue;
00052             } else
00053                 return 0;
00054         case S_HTML: 
00055         case S_WAIT_PERC:
00056             if(sz && p->cb_html)
00057                 dbg_err_if(p->cb_html(p, p->cb_arg, buf, sz));
00058             return 0;
00059         case S_START_CODE:
00060         case S_CODE:
00061         case S_WAIT_GT:
00062             if(sz && p->cb_code)
00063                 dbg_err_if(p->cb_code(p, p->cmd_code, p->cb_arg, buf, sz));
00064             return 0;
00065         }
00066     }
00067 
00068     return 0;
00069 err:
00070     return ~0;
00071 }
00072 
00073 int parser_run(parser_t *p)
00074 {
00075     enum { BUFSZ = 262144 }; /* a big buffer is good to better zip *.kl1 */
00076     #define set_state( s ) \
00077         do { tmp = p->state; p->state = s; p->prev_state = tmp; } while(0)
00078     #define fetch_next_char()                                           \
00079         do { prev = c;                                                  \
00080             dbg_err_if((rc = io_getc(p->in, &c)) < 0);                  \
00081             if(rc == 0) break;                                          \
00082             if( (c == CR || c == LF) && prev != (c == CR ? LF : CR))    \
00083                 p->line++;                                              \
00084         } while(0)
00085     int tmp;
00086     char c = 0, prev;
00087     char buf[BUFSZ];
00088     size_t idx = 0;
00089     ssize_t rc;
00090 
00091     dbg_err_if (p == NULL);
00092     
00093     buf[0] = 0;
00094     prev = 0;
00095 
00096     dbg_err_if(p->line > 1);
00097 
00098     fetch_next_char();
00099 
00100     while(rc > 0)
00101     {
00102         prev = c;
00103         switch(p->state)
00104         {
00105         case S_START:
00106             set_state(S_HTML);
00107             continue;
00108         case S_IN_DOUBLE_QUOTE:
00109             if(c == '"' && prev != '\\')
00110                 set_state(p->prev_state);
00111             break;
00112         case S_IN_SINGLE_QUOTE:
00113             if(c == '\'' && prev != '\\')
00114                 set_state(p->prev_state);
00115             break;
00116         case S_HTML:
00117             if(c == '<')
00118                 set_state(S_WAIT_PERC);
00119             break;
00120         case S_WAIT_PERC:
00121             if(c == '%')
00122             {
00123                 if(idx && --idx) /* erase < */
00124                 {
00125                     buf[idx] = 0;
00126                     dbg_err_if(parser_on_block(p, buf, idx));
00127                     buf[0] = 0; idx = 0;
00128                 }
00129                 set_state(S_START_CODE);
00130                 p->code_line = p->line; /* save start code line number  */
00131                 fetch_next_char();      /* get cmd char (!,@,etc.)      */
00132                 continue;
00133             } else {
00134                 set_state(S_HTML);
00135                 continue;
00136             }
00137             break;
00138         case S_START_CODE:
00139             if(isspace(c))
00140                 p->cmd_code = 0;
00141             else {
00142                 p->cmd_code = c;
00143                 fetch_next_char();
00144             }
00145             set_state(S_CODE);
00146             continue;
00147         case S_CODE:
00148             if(c == '"')
00149                 set_state(S_IN_DOUBLE_QUOTE);
00150             else if(c == '\'')
00151                 set_state(S_IN_SINGLE_QUOTE);
00152             else if(c == '%') 
00153                 set_state(S_WAIT_GT);
00154             break;
00155         case S_WAIT_GT:
00156             if(c == '>')
00157             {
00158                 if(idx && --idx) /* erase % */
00159                 {
00160                     buf[idx] = 0;
00161                     dbg_err_if(parser_on_block(p, buf, idx));
00162                     buf[0] = 0; idx = 0;
00163                 }
00164                 fetch_next_char();
00165                 p->cmd_code = 0;
00166                 set_state(S_HTML);
00167                 continue;
00168             } else {
00169                 set_state(S_CODE);
00170                 continue;
00171             }
00172             break;
00173         case S_EAT_NEWLINE:
00174             if(c == CR || c == LF)
00175             {
00176                 fetch_next_char();
00177                 continue; /* eat it */
00178             }
00179             set_state(S_HTML);
00180             continue;
00181         default:
00182             dbg_err_if("unknown parser state");
00183         }
00184         buf[idx++] = c;
00185         if(idx == BUFSZ - 1)
00186         {
00187             buf[idx] = 0;
00188             dbg_err_if(parser_on_block(p, buf, idx));
00189             buf[0] = 0; idx = 0;
00190         }
00191 
00192         fetch_next_char();
00193     }
00194 
00195     if(idx)
00196     {
00197         buf[idx] = 0;
00198         dbg_err_if(parser_on_block(p, buf, idx));
00199         buf[0] = 0; idx = 0;
00200     }
00201 
00202     return 0;
00203 err:
00204     return ~0;
00205 }
00206 
00207 void parser_set_cb_code(parser_t *p, parser_cb_code_t cb)
00208 {
00209     dbg_ifb (p == NULL) return;
00210     p->cb_code = cb;
00211 }
00212 
00213 void parser_set_cb_html(parser_t *p, parser_cb_html_t cb)
00214 {
00215     dbg_ifb (p == NULL) return;
00216     p->cb_html = cb;
00217 }
00218 
00219 void parser_set_cb_arg(parser_t *p, void *opaque)
00220 {
00221     dbg_ifb (p == NULL) return;
00222     p->cb_arg = opaque;
00223 }
00224 
00225 void parser_set_io(parser_t *p, io_t *in, io_t *out)
00226 {
00227     dbg_ifb (p == NULL) return;
00228     p->in = in;
00229     p->out = out;
00230 }
00231 
00232 int parser_free(parser_t *t)
00233 {
00234     U_FREE(t);
00235     return 0;
00236 }
00237 
00238 int parser_reset(parser_t *p)
00239 {
00240     dbg_return_if (p == NULL, ~0);
00241 
00242     p->line = 1;
00243     p->state = p->prev_state = S_START;
00244     p->cmd_code = 0;
00245 
00246     return 0;
00247 }
00248 
00249 int parser_create(parser_t **pt)
00250 {
00251     parser_t *p = NULL;
00252 
00253     dbg_return_if (pt == NULL, ~0);
00254     
00255     p = (parser_t*)u_zalloc(sizeof(parser_t));
00256     dbg_err_if(p == NULL);
00257 
00258     (void) parser_reset(p);
00259 
00260     *pt = p;
00261 
00262     return 0;
00263 err:
00264     if(p)
00265         parser_free(p);
00266     return ~0;
00267 }

←Products
© 2005-2006 - KoanLogic S.r.l. - All rights reserved