Main Page | Modules | Data Structures | Directories | File List | Data Fields | Globals

parser.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2005, 2006 by KoanLogic s.r.l. <http://www.koanlogic.com>
00003  * All rights reserved.
00004  *
00005  * This file is part of KLone, and as such it is subject to the license stated
00006  * in the LICENSE file which you have received as part of this distribution.
00007  *
00008  * $Id: parser.c,v 1.12 2006/09/28 03:05:38 tat Exp $
00009  */
00010 
00011 #include "klone_conf.h"
00012 #include <stdio.h>
00013 #include <stdlib.h>
00014 #include <ctype.h>
00015 #include <klone/klone.h>
00016 #include <klone/translat.h>
00017 #include <klone/parser.h>
00018 
00019 /* parser state */
00020 enum { 
00021     S_START, 
00022     S_IN_DOUBLE_QUOTE,
00023     S_IN_SINGLE_QUOTE, 
00024     S_HTML, 
00025     S_WAIT_PERC,
00026     S_START_CODE, 
00027     S_CODE, 
00028     S_WAIT_GT,
00029     S_EAT_NEWLINE
00030 };
00031 
00032 enum { LF = 0xA, CR = 0xD };
00033 
00034 static int parser_on_block(parser_t *p, const char *buf, size_t sz)
00035 {
00036     dbg_err_if (p == NULL);
00037     dbg_err_if (buf == NULL);
00038 
00039     for(;;)
00040     {
00041         switch(p->state)
00042         {
00043         case S_START:
00044             /* empty file */
00045             return 0;
00046         case S_IN_DOUBLE_QUOTE:
00047         case S_IN_SINGLE_QUOTE: 
00048             if(p->state != p->prev_state)
00049             {
00050                 p->state = p->prev_state;
00051                 continue;
00052             } else
00053                 return 0;
00054         case S_HTML: 
00055         case S_WAIT_PERC:
00056             if(sz && p->cb_html)
00057                 dbg_err_if(p->cb_html(p, p->cb_arg, buf, sz));
00058             return 0;
00059         case S_START_CODE:
00060         case S_CODE:
00061         case S_WAIT_GT:
00062             if(sz && p->cb_code)
00063                 dbg_err_if(p->cb_code(p, p->cmd_code, p->cb_arg, buf, sz));
00064             return 0;
00065         }
00066     }
00067 
00068     return 0;
00069 err:
00070     return ~0;
00071 }
00072 
00073 int parser_run(parser_t *p)
00074 {
00075     enum { BUFSZ = 262144 }; /* a big buffer is good to better zip *.kl1 */
00076     #define set_state( s ) \
00077         do { tmp = p->state; p->state = s; p->prev_state = tmp; } while(0)
00078     #define fetch_next_char()                                           \
00079         do { prev = c;                                                  \
00080             dbg_err_if((rc = io_getc(p->in, &c)) < 0);                  \
00081             if(rc == 0) break;                                          \
00082             if( (c == CR || c == LF) && prev != (c == CR ? LF : CR))    \
00083                 p->line++;                                              \
00084         } while(0)
00085     int tmp;
00086     char c = 0, prev;
00087     char buf[BUFSZ];
00088     size_t idx = 0;
00089     ssize_t rc;
00090 
00091     dbg_err_if (p == NULL);
00092     
00093     buf[0] = 0;
00094     prev = 0;
00095 
00096     dbg_err_if(p->line > 1);
00097 
00098     fetch_next_char();
00099 
00100     while(rc > 0)
00101     {
00102         prev = c;
00103         switch(p->state)
00104         {
00105         case S_START:
00106             set_state(S_HTML);
00107             continue;
00108         case S_IN_DOUBLE_QUOTE:
00109             if(c == '"' && prev != '\\')
00110                 set_state(p->prev_state);
00111             break;
00112         case S_IN_SINGLE_QUOTE:
00113             if(c == '\'' && prev != '\\')
00114                 set_state(p->prev_state);
00115             break;
00116         case S_HTML:
00117             if(c == '<')
00118                 set_state(S_WAIT_PERC);
00119             break;
00120         case S_WAIT_PERC:
00121             if(c == '%')
00122             {
00123                 if(idx && --idx) /* erase < */
00124                 {
00125                     buf[idx] = 0;
00126                     dbg_err_if(parser_on_block(p, buf, idx));
00127                     buf[0] = 0; idx = 0;
00128                 }
00129                 set_state(S_START_CODE);
00130                 p->code_line = p->line; /* save start code line number  */
00131                 fetch_next_char();      /* get cmd char (!,@,etc.)      */
00132                 continue;
00133             } else {
00134                 set_state(S_HTML);
00135                 continue;
00136             }
00137             break;
00138         case S_START_CODE:
00139             if(isspace(c))
00140                 p->cmd_code = 0;
00141             else {
00142                 p->cmd_code = c;
00143                 fetch_next_char();
00144             }
00145             set_state(S_CODE);
00146             continue;
00147         case S_CODE:
00148             if(c == '%') 
00149                 set_state(S_WAIT_GT);
00150             break;
00151         case S_WAIT_GT:
00152             if(c == '>')
00153             {
00154                 if(idx && --idx) /* erase % */
00155                 {
00156                     buf[idx] = 0;
00157                     dbg_err_if(parser_on_block(p, buf, idx));
00158                     buf[0] = 0; idx = 0;
00159                 }
00160                 fetch_next_char();
00161                 p->cmd_code = 0;
00162                 set_state(S_HTML);
00163                 continue;
00164             } else {
00165                 set_state(S_CODE);
00166                 continue;
00167             }
00168             break;
00169         case S_EAT_NEWLINE:
00170             if(c == CR || c == LF)
00171             {
00172                 fetch_next_char();
00173                 continue; /* eat it */
00174             }
00175             set_state(S_HTML);
00176             continue;
00177         default:
00178             dbg_err_if("unknown parser state");
00179         }
00180         buf[idx++] = c;
00181         if(idx == BUFSZ - 1)
00182         {
00183             buf[idx] = 0;
00184             dbg_err_if(parser_on_block(p, buf, idx));
00185             buf[0] = 0; idx = 0;
00186         }
00187 
00188         fetch_next_char();
00189     }
00190 
00191     if(idx)
00192     {
00193         buf[idx] = 0;
00194         dbg_err_if(parser_on_block(p, buf, idx));
00195         buf[0] = 0; idx = 0;
00196     }
00197 
00198     return 0;
00199 err:
00200     return ~0;
00201 }
00202 
00203 void parser_set_cb_code(parser_t *p, parser_cb_code_t cb)
00204 {
00205     dbg_ifb (p == NULL) return;
00206     p->cb_code = cb;
00207 }
00208 
00209 void parser_set_cb_html(parser_t *p, parser_cb_html_t cb)
00210 {
00211     dbg_ifb (p == NULL) return;
00212     p->cb_html = cb;
00213 }
00214 
00215 void parser_set_cb_arg(parser_t *p, void *opaque)
00216 {
00217     dbg_ifb (p == NULL) return;
00218     p->cb_arg = opaque;
00219 }
00220 
00221 void parser_set_io(parser_t *p, io_t *in, io_t *out)
00222 {
00223     dbg_ifb (p == NULL) return;
00224     p->in = in;
00225     p->out = out;
00226 }
00227 
00228 int parser_free(parser_t *t)
00229 {
00230     U_FREE(t);
00231     return 0;
00232 }
00233 
00234 int parser_reset(parser_t *p)
00235 {
00236     dbg_return_if (p == NULL, ~0);
00237 
00238     p->line = 1;
00239     p->state = p->prev_state = S_START;
00240     p->cmd_code = 0;
00241 
00242     return 0;
00243 }
00244 
00245 int parser_create(parser_t **pt)
00246 {
00247     parser_t *p = NULL;
00248 
00249     dbg_return_if (pt == NULL, ~0);
00250     
00251     p = (parser_t*)u_zalloc(sizeof(parser_t));
00252     dbg_err_if(p == NULL);
00253 
00254     (void) parser_reset(p);
00255 
00256     *pt = p;
00257 
00258     return 0;
00259 err:
00260     if(p)
00261         parser_free(p);
00262     return ~0;
00263 }

←Products
© 2005-2006 - KoanLogic S.r.l. - All rights reserved