00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "klone_conf.h"
00012 #include <stdio.h>
00013 #include <stdlib.h>
00014 #include <ctype.h>
00015 #include <klone/klone.h>
00016 #include <klone/translat.h>
00017 #include <klone/parser.h>
00018
00019
00020 enum {
00021 S_START,
00022 S_IN_DOUBLE_QUOTE,
00023 S_IN_SINGLE_QUOTE,
00024 S_HTML,
00025 S_WAIT_PERC,
00026 S_START_CODE,
00027 S_CODE,
00028 S_WAIT_GT,
00029 S_EAT_NEWLINE
00030 };
00031
00032 enum { LF = 0xA, CR = 0xD };
00033
00034 static int parser_on_block(parser_t *p, const char *buf, size_t sz)
00035 {
00036 dbg_err_if (p == NULL);
00037 dbg_err_if (buf == NULL);
00038
00039 for(;;)
00040 {
00041 switch(p->state)
00042 {
00043 case S_START:
00044
00045 return 0;
00046 case S_IN_DOUBLE_QUOTE:
00047 case S_IN_SINGLE_QUOTE:
00048 if(p->state != p->prev_state)
00049 {
00050 p->state = p->prev_state;
00051 continue;
00052 } else
00053 return 0;
00054 case S_HTML:
00055 case S_WAIT_PERC:
00056 if(sz && p->cb_html)
00057 dbg_err_if(p->cb_html(p, p->cb_arg, buf, sz));
00058 return 0;
00059 case S_START_CODE:
00060 case S_CODE:
00061 case S_WAIT_GT:
00062 if(sz && p->cb_code)
00063 dbg_err_if(p->cb_code(p, p->cmd_code, p->cb_arg, buf, sz));
00064 return 0;
00065 }
00066 }
00067
00068 return 0;
00069 err:
00070 return ~0;
00071 }
00072
00073 int parser_run(parser_t *p)
00074 {
00075 enum { BUFSZ = 262144 };
00076 #define set_state( s ) \
00077 do { tmp = p->state; p->state = s; p->prev_state = tmp; } while(0)
00078 #define fetch_next_char() \
00079 do { prev = c; \
00080 dbg_err_if((rc = io_getc(p->in, &c)) < 0); \
00081 if(rc == 0) break; \
00082 if( (c == CR || c == LF) && prev != (c == CR ? LF : CR)) \
00083 p->line++; \
00084 } while(0)
00085 int tmp;
00086 char c = 0, prev;
00087 char buf[BUFSZ];
00088 size_t idx = 0;
00089 ssize_t rc;
00090
00091 dbg_err_if (p == NULL);
00092
00093 buf[0] = 0;
00094 prev = 0;
00095
00096 dbg_err_if(p->line > 1);
00097
00098 fetch_next_char();
00099
00100 while(rc > 0)
00101 {
00102 prev = c;
00103 switch(p->state)
00104 {
00105 case S_START:
00106 set_state(S_HTML);
00107 continue;
00108 case S_IN_DOUBLE_QUOTE:
00109 if(c == '"' && prev != '\\')
00110 set_state(p->prev_state);
00111 break;
00112 case S_IN_SINGLE_QUOTE:
00113 if(c == '\'' && prev != '\\')
00114 set_state(p->prev_state);
00115 break;
00116 case S_HTML:
00117 if(c == '<')
00118 set_state(S_WAIT_PERC);
00119 break;
00120 case S_WAIT_PERC:
00121 if(c == '%')
00122 {
00123 if(idx && --idx)
00124 {
00125 buf[idx] = 0;
00126 dbg_err_if(parser_on_block(p, buf, idx));
00127 buf[0] = 0; idx = 0;
00128 }
00129 set_state(S_START_CODE);
00130 p->code_line = p->line;
00131 fetch_next_char();
00132 continue;
00133 } else {
00134 set_state(S_HTML);
00135 continue;
00136 }
00137 break;
00138 case S_START_CODE:
00139 if(isspace(c))
00140 p->cmd_code = 0;
00141 else {
00142 p->cmd_code = c;
00143 fetch_next_char();
00144 }
00145 set_state(S_CODE);
00146 continue;
00147 case S_CODE:
00148 if(c == '"')
00149 set_state(S_IN_DOUBLE_QUOTE);
00150 else if(c == '\'')
00151 set_state(S_IN_SINGLE_QUOTE);
00152 else if(c == '%')
00153 set_state(S_WAIT_GT);
00154 break;
00155 case S_WAIT_GT:
00156 if(c == '>')
00157 {
00158 if(idx && --idx)
00159 {
00160 buf[idx] = 0;
00161 dbg_err_if(parser_on_block(p, buf, idx));
00162 buf[0] = 0; idx = 0;
00163 }
00164 fetch_next_char();
00165 p->cmd_code = 0;
00166 set_state(S_HTML);
00167 continue;
00168 } else {
00169 set_state(S_CODE);
00170 continue;
00171 }
00172 break;
00173 case S_EAT_NEWLINE:
00174 if(c == CR || c == LF)
00175 {
00176 fetch_next_char();
00177 continue;
00178 }
00179 set_state(S_HTML);
00180 continue;
00181 default:
00182 dbg_err_if("unknown parser state");
00183 }
00184 buf[idx++] = c;
00185 if(idx == BUFSZ - 1)
00186 {
00187 buf[idx] = 0;
00188 dbg_err_if(parser_on_block(p, buf, idx));
00189 buf[0] = 0; idx = 0;
00190 }
00191
00192 fetch_next_char();
00193 }
00194
00195 if(idx)
00196 {
00197 buf[idx] = 0;
00198 dbg_err_if(parser_on_block(p, buf, idx));
00199 buf[0] = 0; idx = 0;
00200 }
00201
00202 return 0;
00203 err:
00204 return ~0;
00205 }
00206
00207 void parser_set_cb_code(parser_t *p, parser_cb_code_t cb)
00208 {
00209 dbg_ifb (p == NULL) return;
00210 p->cb_code = cb;
00211 }
00212
00213 void parser_set_cb_html(parser_t *p, parser_cb_html_t cb)
00214 {
00215 dbg_ifb (p == NULL) return;
00216 p->cb_html = cb;
00217 }
00218
00219 void parser_set_cb_arg(parser_t *p, void *opaque)
00220 {
00221 dbg_ifb (p == NULL) return;
00222 p->cb_arg = opaque;
00223 }
00224
00225 void parser_set_io(parser_t *p, io_t *in, io_t *out)
00226 {
00227 dbg_ifb (p == NULL) return;
00228 p->in = in;
00229 p->out = out;
00230 }
00231
00232 int parser_free(parser_t *t)
00233 {
00234 U_FREE(t);
00235 return 0;
00236 }
00237
00238 int parser_reset(parser_t *p)
00239 {
00240 dbg_return_if (p == NULL, ~0);
00241
00242 p->line = 1;
00243 p->state = p->prev_state = S_START;
00244 p->cmd_code = 0;
00245
00246 return 0;
00247 }
00248
00249 int parser_create(parser_t **pt)
00250 {
00251 parser_t *p = NULL;
00252
00253 dbg_return_if (pt == NULL, ~0);
00254
00255 p = (parser_t*)u_zalloc(sizeof(parser_t));
00256 dbg_err_if(p == NULL);
00257
00258 (void) parser_reset(p);
00259
00260 *pt = p;
00261
00262 return 0;
00263 err:
00264 if(p)
00265 parser_free(p);
00266 return ~0;
00267 }