dfa.cpp
Go to the documentation of this file.
00001 /* -*- mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- */ 00002 /* 00003 * Main authors: 00004 * Christian Schulte <schulte@gecode.org> 00005 * 00006 * Copyright: 00007 * Christian Schulte, 2004 00008 * 00009 * Last modified: 00010 * $Date: 2009-09-29 20:18:44 +0200 (Tue, 29 Sep 2009) $ by $Author: schulte $ 00011 * $Revision: 9773 $ 00012 * 00013 * This file is part of Gecode, the generic constraint 00014 * development environment: 00015 * http://www.gecode.org 00016 * 00017 * Permission is hereby granted, free of charge, to any person obtaining 00018 * a copy of this software and associated documentation files (the 00019 * "Software"), to deal in the Software without restriction, including 00020 * without limitation the rights to use, copy, modify, merge, publish, 00021 * distribute, sublicense, and/or sell copies of the Software, and to 00022 * permit persons to whom the Software is furnished to do so, subject to 00023 * the following conditions: 00024 * 00025 * The above copyright notice and this permission notice shall be 00026 * included in all copies or substantial portions of the Software. 00027 * 00028 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 00029 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 00030 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 00031 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 00032 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 00033 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 00034 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 00035 * 00036 */ 00037 00038 #include <gecode/int.hh> 00039 00040 namespace Gecode { namespace Int { namespace Extensional { 00041 00045 class TransByI_State { 00046 public: 00047 forceinline bool 00048 operator ()(const DFA::Transition& x, const DFA::Transition& y) { 00049 return x.i_state < y.i_state; 00050 } 00051 forceinline static void 00052 sort(DFA::Transition t[], int n) { 00053 TransByI_State tbis; 00054 Support::quicksort<DFA::Transition,TransByI_State>(t,n,tbis); 00055 } 00056 }; 00057 00061 class TransBySymbol { 00062 public: 00063 forceinline bool 00064 operator ()(const DFA::Transition& x, const DFA::Transition& y) { 00065 return x.symbol < y.symbol; 00066 } 00067 forceinline static void 00068 sort(DFA::Transition t[], int n) { 00069 TransBySymbol tbs; 00070 Support::quicksort<DFA::Transition,TransBySymbol>(t,n,tbs); 00071 } 00072 }; 00073 00077 class TransBySymbolI_State { 00078 public: 00079 forceinline bool 00080 operator ()(const DFA::Transition& x, const DFA::Transition& y) { 00081 return ((x.symbol < y.symbol) || 00082 ((x.symbol == y.symbol) && (x.i_state < y.i_state))); 00083 } 00084 forceinline static void 00085 sort(DFA::Transition t[], int n) { 00086 TransBySymbolI_State tbsi; 00087 Support::quicksort<DFA::Transition,TransBySymbolI_State>(t,n,tbsi); 00088 } 00089 }; 00090 00094 class TransByO_State { 00095 public: 00096 forceinline bool 00097 operator ()(const DFA::Transition& x, const DFA::Transition& y) { 00098 return x.o_state < y.o_state; 00099 } 00100 forceinline static void 00101 sort(DFA::Transition t[], int n) { 00102 TransByO_State tbos; 00103 Support::quicksort<DFA::Transition,TransByO_State>(t,n,tbos); 00104 } 00105 }; 00106 00107 00111 class StateGroup { 00112 public: 00113 int state; 00114 int group; 00115 }; 00116 00120 class StateGroupByGroup { 00121 public: 00122 forceinline bool 00123 operator ()(const StateGroup& x, const StateGroup& y) { 00124 return ((x.group < y.group) || 00125 ((x.group == y.group) && (x.state < y.state))); 00126 } 00127 static void 00128 sort(StateGroup sg[], int n) { 00129 StateGroupByGroup o; 00130 Support::quicksort<StateGroup,StateGroupByGroup>(sg,n,o); 00131 } 00132 }; 00133 00137 class GroupStates { 00138 public: 00139 StateGroup* fst; 00140 StateGroup* lst; 00141 }; 00142 00144 enum StateInfo { 00145 SI_NONE = 0, 00146 SI_FROM_START = 1, 00147 SI_TO_FINAL = 2, 00148 SI_FINAL = 4 00149 }; 00150 00151 }}} 00152 00153 namespace Gecode { 00154 00155 DFA::DFA(int start, Transition t_spec[], int f_spec[], bool minimize) { 00156 using namespace Int; 00157 using namespace Extensional; 00158 // Compute number of states and transitions 00159 int n_states = start; 00160 int n_trans = 0; 00161 for (Transition* t = &t_spec[0]; t->i_state >= 0; t++) { 00162 n_states = std::max(n_states,t->i_state); 00163 n_states = std::max(n_states,t->o_state); 00164 n_trans++; 00165 } 00166 for (int* f = &f_spec[0]; *f >= 0; f++) 00167 n_states = std::max(n_states,*f); 00168 n_states++; 00169 00170 // Temporary structure for transitions 00171 Transition* trans = heap.alloc<Transition>(n_trans); 00172 for (int i = n_trans; i--; ) 00173 trans[i] = t_spec[i]; 00174 // Temporary structures for finals 00175 int* final = heap.alloc<int>(n_states+1); 00176 bool* is_final = heap.alloc<bool>(n_states+1); 00177 int n_finals = 0; 00178 for (int i = n_states+1; i--; ) 00179 is_final[i] = false; 00180 for (int* f = &f_spec[0]; *f != -1; f++) { 00181 is_final[*f] = true; 00182 final[n_finals++] = *f; 00183 } 00184 00185 if (minimize) { 00186 // Sort transitions by symbol and i_state 00187 TransBySymbolI_State::sort(trans, n_trans); 00188 Transition** idx = heap.alloc<Transition*>(n_trans+1); 00189 // idx[i]...idx[i+1]-1 gives where transitions for symbol i start 00190 int n_symbols = 0; 00191 { 00192 int j = 0; 00193 while (j < n_trans) { 00194 idx[n_symbols++] = &trans[j]; 00195 int s = trans[j].symbol; 00196 while ((j < n_trans) && (s == trans[j].symbol)) 00197 j++; 00198 } 00199 idx[n_symbols] = &trans[j]; 00200 assert(j == n_trans); 00201 } 00202 // Map states to groups 00203 int* s2g = heap.alloc<int>(n_states+1); 00204 StateGroup* part = heap.alloc<StateGroup>(n_states+1); 00205 GroupStates* g2s = heap.alloc<GroupStates>(n_states+1); 00206 // Initialize: final states is group one, all other group zero 00207 for (int i = n_states+1; i--; ) { 00208 part[i].state = i; 00209 part[i].group = is_final[i] ? 1 : 0; 00210 s2g[i] = part[i].group; 00211 } 00212 // Important: the state n_state is the dead state, conceptually 00213 // if there is no transition for a symbol and input state, it is 00214 // assumed that there is an implicit transition to n_state 00215 00216 // Set up the indexing data structure, sort by group 00217 StateGroupByGroup::sort(part,n_states+1); 00218 int n_groups; 00219 if (part[0].group == part[n_states].group) { 00220 // No final states, just one group 00221 g2s[0].fst = &part[0]; g2s[0].lst = &part[n_states+1]; 00222 n_groups = 1; 00223 } else { 00224 int i = 0; 00225 assert(part[0].group == 0); 00226 do i++; while (part[i].group == 0); 00227 g2s[0].fst = &part[0]; g2s[0].lst = &part[i]; 00228 g2s[1].fst = &part[i]; g2s[1].lst = &part[n_states+1]; 00229 n_groups = 2; 00230 } 00231 00232 // Do the refinement 00233 { 00234 int m_groups; 00235 do { 00236 m_groups = n_groups; 00237 // Iterate over symbols 00238 for (int sidx = n_symbols; sidx--; ) { 00239 // Iterate over groups 00240 for (int g = n_groups; g--; ) { 00241 // Ignore singleton groups 00242 if (g2s[g].lst-g2s[g].fst > 1) { 00243 // Apply transitions to group states 00244 // This exploits that both transitions as well as 00245 // stategroups are sorted by (input) state 00246 Transition* t = idx[sidx]; 00247 Transition* t_lst = idx[sidx+1]; 00248 for (StateGroup* sg = g2s[g].fst; sg<g2s[g].lst; sg++) { 00249 while ((t < t_lst) && (t->i_state < sg->state)) 00250 t++; 00251 // Compute group resulting from transition 00252 if ((t < t_lst) && (t->i_state == sg->state)) 00253 sg->group = s2g[t->o_state]; 00254 else 00255 sg->group = s2g[n_states]; // Go to dead state 00256 } 00257 // Sort group by groups from transitions 00258 StateGroupByGroup::sort(g2s[g].fst, 00259 static_cast<int>(g2s[g].lst-g2s[g].fst)); 00260 // Group must be split? 00261 if (g2s[g].fst->group != (g2s[g].lst-1)->group) { 00262 // Skip first group 00263 StateGroup* sg = g2s[g].fst+1; 00264 while ((sg-1)->group == sg->group) 00265 sg++; 00266 // Start splitting 00267 StateGroup* lst = g2s[g].lst; 00268 g2s[g].lst = sg; 00269 while (sg < lst) { 00270 s2g[sg->state] = n_groups; 00271 g2s[n_groups].fst = sg++; 00272 while ((sg < lst) && ((sg-1)->group == sg->group)) { 00273 s2g[sg->state] = n_groups; sg++; 00274 } 00275 g2s[n_groups++].lst = sg; 00276 } 00277 } 00278 } 00279 } 00280 } 00281 } while (n_groups != m_groups); 00282 // New start state 00283 start = s2g[start]; 00284 // Compute new final states 00285 n_finals = 0; 00286 for (int g = n_groups; g--; ) 00287 for (StateGroup* sg = g2s[g].fst; sg < g2s[g].lst; sg++) 00288 if (is_final[sg->state]) { 00289 final[n_finals++] = g; 00290 break; 00291 } 00292 // Compute representatives 00293 int* s2r = heap.alloc<int>(n_states+1); 00294 for (int i = n_states+1; i--; ) 00295 s2r[i] = -1; 00296 for (int g = n_groups; g--; ) 00297 s2r[g2s[g].fst->state] = g; 00298 // Clean transitions 00299 int j = 0; 00300 for (int i = 0; i<n_trans; i++) 00301 if (s2r[trans[i].i_state] != -1) { 00302 trans[j].i_state = s2g[trans[i].i_state]; 00303 trans[j].symbol = trans[i].symbol; 00304 trans[j].o_state = s2g[trans[i].o_state]; 00305 j++; 00306 } 00307 n_trans = j; 00308 n_states = n_groups; 00309 heap.free<int>(s2r,n_states+1); 00310 } 00311 heap.free<GroupStates>(g2s,n_states+1); 00312 heap.free<StateGroup>(part,n_states+1); 00313 heap.free<int>(s2g,n_states+1); 00314 heap.free<Transition*>(idx,n_trans+1); 00315 } 00316 00317 // Do a reachability analysis for all states starting from start state 00318 Gecode::Support::StaticStack<int,Heap> visit(heap,n_states); 00319 int* state = heap.alloc<int>(n_states); 00320 for (int i=n_states; i--; ) 00321 state[i] = SI_NONE; 00322 00323 Transition** idx = heap.alloc<Transition*>(n_states+1); 00324 { 00325 // Sort all transitions according to i_state and create index structure 00326 // idx[i]...idx[i+1]-1 gives where transitions for state i start 00327 TransByI_State::sort(trans, n_trans); 00328 { 00329 int j = 0; 00330 for (int i=0; i<n_states; i++) { 00331 idx[i] = &trans[j]; 00332 while ((j < n_trans) && (i == trans[j].i_state)) 00333 j++; 00334 } 00335 idx[n_states] = &trans[j]; 00336 assert(j == n_trans); 00337 } 00338 00339 state[start] = SI_FROM_START; 00340 visit.push(start); 00341 while (!visit.empty()) { 00342 int s = visit.pop(); 00343 for (Transition* t = idx[s]; t < idx[s+1]; t++) 00344 if (!(state[t->o_state] & SI_FROM_START)) { 00345 state[t->o_state] |= SI_FROM_START; 00346 visit.push(t->o_state); 00347 } 00348 } 00349 } 00350 00351 // Do a reachability analysis for all states to a final state 00352 { 00353 // Sort all transitions according to o_state and create index structure 00354 // idx[i]...idx[i+1]-1 gives where transitions for state i start 00355 TransByO_State::sort(trans, n_trans); 00356 { 00357 int j = 0; 00358 for (int i=0; i<n_states; i++) { 00359 idx[i] = &trans[j]; 00360 while ((j < n_trans) && (i == trans[j].o_state)) 00361 j++; 00362 } 00363 idx[n_states] = &trans[j]; 00364 assert(j == n_trans); 00365 } 00366 00367 for (int i = n_finals; i--; ) { 00368 state[final[i]] |= (SI_TO_FINAL | SI_FINAL); 00369 visit.push(final[i]); 00370 } 00371 while (!visit.empty()) { 00372 int s = visit.pop(); 00373 for (Transition* t = idx[s]; t < idx[s+1]; t++) 00374 if (!(state[t->i_state] & SI_TO_FINAL)) { 00375 state[t->i_state] |= SI_TO_FINAL; 00376 visit.push(t->i_state); 00377 } 00378 } 00379 } 00380 heap.free<Transition*>(idx,n_states+1); 00381 heap.free<int>(final,n_states+1); 00382 heap.free<bool>(is_final,n_states+1); 00383 00384 // Now all reachable states are known (also the final ones) 00385 int* re = heap.alloc<int>(n_states); 00386 for (int i = n_states; i--; ) 00387 re[i] = -1; 00388 00389 // Renumber states 00390 int m_states = 0; 00391 // Start state gets zero 00392 re[start] = m_states++; 00393 00394 // Renumber final states 00395 for (int i = n_states; i--; ) 00396 if ((state[i] == (SI_FINAL | SI_FROM_START | SI_TO_FINAL)) && (re[i] < 0)) 00397 re[i] = m_states++; 00398 // If start state is final, final states start from zero, otherwise from one 00399 int final_fst = (state[start] & SI_FINAL) ? 0 : 1; 00400 int final_lst = m_states; 00401 // final_fst...final_lst-1 are the final states 00402 00403 // Renumber remaining states 00404 for (int i = n_states; i--; ) 00405 if ((state[i] == (SI_FROM_START | SI_TO_FINAL)) && (re[i] < 0)) 00406 re[i] = m_states++; 00407 00408 // Count number of remaining transitions 00409 int m_trans = 0; 00410 for (int i = n_trans; i--; ) 00411 if ((re[trans[i].i_state] >= 0) && (re[trans[i].o_state] >= 0)) 00412 m_trans++; 00413 00414 // All done... Construct the automaton 00415 DFAI* d = new DFAI(m_trans); 00416 d->n_states = m_states; 00417 d->n_trans = m_trans; 00418 d->final_fst = final_fst; 00419 d->final_lst = final_lst; 00420 { 00421 int j = 0; 00422 Transition* r = &d->trans[0]; 00423 for (int i = 0; i<n_trans; i++) 00424 if ((re[trans[i].i_state] >= 0) && (re[trans[i].o_state] >= 0)) { 00425 r[j].symbol = trans[i].symbol; 00426 r[j].i_state = re[trans[i].i_state]; 00427 r[j].o_state = re[trans[i].o_state]; 00428 j++; 00429 } 00430 TransBySymbol::sort(r,m_trans); 00431 } 00432 { 00433 // Count number of symbols 00434 unsigned int n_symbols = 0; 00435 for (int i = 0; i<m_trans; ) { 00436 int s = d->trans[i++].symbol; 00437 n_symbols++; 00438 while ((i<m_trans) && (d->trans[i].symbol == s)) 00439 i++; 00440 } 00441 d->n_symbols = n_symbols; 00442 } 00443 { 00444 // Compute maximal degree 00445 unsigned int max_degree = 0; 00446 unsigned int* deg = heap.alloc<unsigned int>(m_states); 00447 00448 // Compute in-degree per state 00449 for (int i = m_states; i--; ) 00450 deg[i] = 0; 00451 for (int i = m_trans; i--; ) 00452 deg[d->trans[i].o_state]++; 00453 for (int i = m_states; i--; ) 00454 max_degree = std::max(max_degree,deg[i]); 00455 00456 // Compute out-degree per state 00457 for (int i = m_states; i--; ) 00458 deg[i] = 0; 00459 for (int i = m_trans; i--; ) 00460 deg[d->trans[i].i_state]++; 00461 for (int i = m_states; i--; ) 00462 max_degree = std::max(max_degree,deg[i]); 00463 00464 heap.free<unsigned int>(deg,m_states); 00465 00466 // Compute transitions per symbol 00467 { 00468 int i=0; 00469 while (i < m_trans) { 00470 int j=i++; 00471 while ((i < m_trans) && 00472 (d->trans[j].symbol == d->trans[i].symbol)) 00473 i++; 00474 max_degree = std::max(max_degree,static_cast<unsigned int>(i-j)); 00475 } 00476 } 00477 00478 d->max_degree = max_degree; 00479 } 00480 00481 d->fill(); 00482 object(d); 00483 heap.free<int>(re,n_states); 00484 heap.free<int>(state,n_states); 00485 heap.free<Transition>(trans,n_trans); 00486 } 00487 00488 SharedHandle::Object* 00489 DFA::DFAI::copy(void) const { 00490 DFAI* d = new DFAI(n_trans); 00491 d->n_states = n_states; 00492 d->n_symbols = n_symbols; 00493 d->n_trans = n_trans; 00494 d->max_degree = max_degree; 00495 d->final_fst = final_fst; 00496 d->final_lst = final_lst; 00497 heap.copy<Transition>(&d->trans[0], &trans[0], n_trans); 00498 d->fill(); 00499 return d; 00500 } 00501 00502 void 00503 DFA::DFAI::fill(void) { 00504 // Compute smallest logarithm larger than n_symbols 00505 n_log = 1; 00506 while (n_symbols >= static_cast<unsigned int>(1<<n_log)) 00507 n_log++; 00508 // Allocate memory 00509 table = heap.alloc<HashEntry>(1<<n_log); 00510 // Initialize table 00511 for (int i=(1<<n_log); i--; ) 00512 table[i].fst = table[i].lst = NULL; 00513 int mask = (1 << n_log) - 1; 00514 // Enter transitions to table 00515 for (int i = 0; i<n_trans; ) { 00516 int s = trans[i].symbol; 00517 Transition* fst = &trans[i]; 00518 i++; 00519 while ((i<n_trans) && (trans[i].symbol == s)) 00520 i++; 00521 Transition* lst = &trans[i]; 00522 // Enter with linear collision resolution 00523 int p = s & mask; 00524 while (table[p].fst != NULL) 00525 p = (p+1) & mask; 00526 table[p].symbol = s; 00527 table[p].fst = fst; 00528 table[p].lst = lst; 00529 } 00530 } 00531 00532 } 00533 00534 // STATISTICS: int-prop 00535