strutils.cc

00001 /*
00002  * Worldvisions Weaver Software:
00003  *   Copyright (C) 1997-2002 Net Integration Technologies, Inc.
00004  * 
00005  * Various useful string-based utilities.
00006  *
00007  */
00008 #include "strutils.h"
00009 #include "wvbuf.h"
00010 #include <ctype.h>
00011 #include <stdio.h>
00012 #include <string.h>
00013 #include <time.h>
00014 #include <errno.h>
00015 
00016 #ifndef _WIN32
00017 //#include <uuid.h>
00018 #include <errno.h>
00019 #include <netdb.h>
00020 #include <unistd.h>
00021 #else
00022 #undef errno
00023 #define errno GetLastError()
00024 #define strcasecmp _stricmp
00025 #include <winsock2.h>
00026 #include <direct.h>
00027 #ifndef EACCES
00028 #define EACCES 0xfff
00029 #endif
00030 #endif
00031 
00032 char *terminate_string(char *string, char c)
00033 /**********************************************/
00034 // Add character c to the end of a string after removing crlf's.
00035 // NOTE: You need a buffer that's at least one character bigger than the
00036 // current length of the string, including the terminating NULL.
00037 {
00038     char *p;
00039 
00040     if (string == NULL)
00041         return NULL;
00042 
00043     p = string + strlen(string) - 1;
00044     while (p >= string)
00045     {
00046         if (*p == '\r' || *p == '\n')
00047             --p;
00048         else
00049             break;
00050     }
00051 
00052     *(++p) = c;
00053     *(++p) = 0;
00054 
00055     return string;
00056 }
00057 
00058 
00059 char *trim_string(char *string)
00060 /*********************************/
00061 // Trims spaces off the front and end of strings.  Modifies the string.
00062 // Specifically DOES allow string==NULL; returns NULL in that case.
00063 {
00064     char *p;
00065     char *q;
00066 
00067     if (string == NULL)
00068         return NULL;
00069 
00070     p = string;
00071     q = string + strlen(string) - 1;
00072 
00073     while (q >= p && isspace(*q))
00074         *(q--) = 0;
00075     while (isspace(*p))
00076         p++;
00077 
00078     return p;
00079 }
00080 
00081 
00082 char *trim_string(char *string, char c)
00083 // Searches the string for c and removes it plus everything afterwards.
00084 // Modifies the string and returns NULL if string == NULL.
00085 {
00086     char *p;
00087 
00088     if (string == NULL)
00089         return NULL;
00090 
00091     p = string;
00092 
00093     while (*p != 0 && *p != c)
00094         p++;
00095 
00096     while (*p)
00097         *(p++) = 0;
00098 
00099     return string;
00100 }
00101 
00102 
00103 // return the string formed by concatenating string 'a' and string 'b' with
00104 // the 'sep' character between them.  For example,
00105 //    spacecat("xx", "yy", ";")
00106 // returns "xx;yy", and
00107 //    spacecat("xx;;", "yy", ";")
00108 // returns "xx;;;yy", and
00109 //    spacecat("xx;;", "yy", ";", true)
00110 // returns "xx;yy".
00111 //
00112 // This function is much faster than the more obvious WvString("%s;%s", a, b),
00113 // so it's useful when you're producing a *lot* of string data.
00114 WvString spacecat(WvStringParm a, WvStringParm b, char sep, bool onesep)
00115 {
00116     size_t alen = strlen(a);
00117     size_t blen = strlen(b);
00118 
00119     // If we only want one separator, eat away at the back of string a
00120     if (onesep && alen)
00121     {
00122         while (a[alen-1] == sep)
00123             --alen;
00124     }
00125 
00126     // Create the destination string, and give it an appropriate size.
00127     // Then, fill it with string a.
00128     WvString s;
00129     s.setsize(alen + blen + 2);
00130     char *cptr = s.edit();
00131 
00132     memcpy(cptr, a, alen);
00133 
00134     // Write the separator in the appropriate spot.
00135     cptr[alen] = sep;
00136 
00137     // If we only want one separator, eat away at the from of string b.
00138     size_t boffset = 0;
00139     if (onesep)
00140     {
00141         while (b[boffset] == sep)
00142             ++boffset;
00143     }
00144 
00145     // Now copy the second half of the string in and terminate with a NUL.
00146     memcpy(cptr+alen+1, b.cstr()+boffset, blen-boffset);
00147     cptr[alen+1+blen-boffset] = 0;
00148 
00149     return s;
00150 }
00151 
00152 
00153 // Replaces whitespace characters with nonbreaking spaces.
00154 char *non_breaking(char * string)
00155 {
00156     if (string == NULL)
00157         return (NULL);
00158 
00159     WvDynBuf buf;
00160 
00161     while (*string)
00162     {
00163         if (isspace(*string))
00164             buf.putstr("&nbsp;");
00165         else 
00166             buf.putch(*string);
00167         string++;
00168     }
00169 
00170     WvString s(buf.getstr());
00171     char *nbstr = new char[s.len() + 1];
00172     return strcpy(nbstr, s.edit());
00173 }
00174 
00175 
00176 // Searches _string (up to length bytes), replacing any occurrences of c1
00177 // with c2.
00178 void replace_char(void *_string, char c1, char c2, int length)
00179 {
00180     char *string = (char *)_string;
00181     for (int i=0; i < length; i++)
00182         if (*(string+i) == c1)
00183             *(string+i) = c2;
00184 }
00185 
00186 // Snip off the first part of 'haystack' if it consists of 'needle'.
00187 char *snip_string(char *haystack, char *needle)
00188 {
00189     if(!haystack)
00190         return NULL;
00191     if(!needle)
00192         return haystack;
00193     char *p = strstr(haystack, needle);
00194     if(!p || p != haystack)
00195         return haystack;
00196     else
00197         return haystack + strlen(needle);
00198 }
00199 
00200 
00201 char *strlwr(char *string)
00202 {
00203     char *p = string;
00204     while (p && *p)
00205     {
00206         *p = tolower(*p);
00207         p++;
00208     }
00209 
00210     return string;
00211 }
00212 
00213 
00214 char *strupr(char *string)
00215 {
00216     char *p = string;
00217     while (p && *p)
00218     {
00219         *p = toupper(*p);
00220         p++;
00221     }
00222 
00223     return string;
00224 }
00225 
00226 
00227 // true if all the characters in "string" are isalnum().
00228 bool is_word(const char *p)
00229 {
00230     assert(p);
00231 
00232     while (*p)
00233     {
00234         if(!isalnum(*p++))
00235             return false;
00236     }
00237     
00238     return true;
00239 }
00240 
00241 
00242 // produce a hexadecimal dump of the data buffer in 'buf' of length 'len'.
00243 // it is formatted with 16 bytes per line; each line has an address offset,
00244 // hex representation, and printable representation.
00245 WvString hexdump_buffer(const void *_buf, size_t len, bool charRep)
00246 {
00247     const unsigned char *buf = (const unsigned char *)_buf;
00248     size_t count, count2, top;
00249     WvString out;
00250 
00251     out.setsize(len / 16 * 80 + 80);
00252     char *cptr = out.edit();
00253     
00254     for (count = 0; count < len; count+=16)
00255     {
00256         top = len-count < 16 ? len-count : 16;
00257         cptr += sprintf(cptr, "[%03X] ", (unsigned int)count);
00258         
00259         // dump hex values
00260         for (count2 = 0; count2 < top; count2++)
00261         {
00262             if (count2 && !(count2 % 4))
00263                 *cptr++ = ' ';
00264             cptr += sprintf(cptr, "%02X", buf[count+count2]);
00265         }
00266         
00267         // print horizontal separation
00268         for (count2 = top; count2 < 16; count2++)
00269         {
00270             if (count2 && !(count2 % 4))
00271             {
00272                 strcat(cptr, "   ");
00273                 cptr += 3;
00274             }
00275             else
00276             {
00277                 strcat(cptr, "  ");
00278                 cptr += 2;
00279             }
00280         }
00281         
00282         *cptr++ = ' ';
00283         
00284         // dump character representation
00285         if (charRep)
00286             for (count2 = 0; count2 < top; count2++)
00287                 *cptr++ = (isprint(buf[count+count2])
00288                            ? buf[count+count2] : '.');
00289 
00290         *cptr++ = '\n';
00291     }
00292     *cptr = 0;
00293     return out;
00294 }
00295 
00296 
00297 // return true if the character is a newline.
00298 bool isnewline(char c)
00299 {
00300     return c=='\n' || c=='\r';
00301 }
00302 
00303 // ex: WvString foo = web_unescape("I+am+text.%0D%0A");
00304 WvString web_unescape(const char *str, bool no_space)
00305 {
00306     const char *iptr;
00307     char *optr;
00308     char *idx1, *idx2;
00309     static const char hex[] = "0123456789ABCDEF";
00310     WvString in, intmp(str), out;
00311  
00312     in = trim_string(intmp.edit());
00313     out.setsize(strlen(in) + 1);
00314 
00315     optr = out.edit();
00316     for (iptr = in, optr = out.edit(); *iptr; iptr++)
00317     {
00318         if (*iptr == '+' && !no_space)
00319             *optr++ = ' ';
00320         else if (*iptr == '%' && iptr[1] && iptr[2])
00321         {
00322             idx1 = strchr(hex, toupper((unsigned char) iptr[1]));
00323             idx2 = strchr(hex, toupper((unsigned char) iptr[2]));
00324 
00325             if (idx1 && idx2)
00326                 *optr++ = ((idx1 - hex) << 4) | (idx2 - hex);
00327 
00328             iptr += 2;
00329         }
00330         else
00331             *optr++ = *iptr;
00332     }
00333 
00334     *optr = 0;
00335 
00336     return out;
00337 }
00338 
00339 
00340 // And it's magic companion: url_encode
00341 WvString url_encode(WvStringParm stuff)
00342 {
00343     unsigned int i;
00344     WvDynBuf retval;
00345 
00346     for (i=0; i < stuff.len(); i++)
00347     {
00348         if (isalnum(stuff[i]) || strchr("/_.-~", stuff[i]))
00349         {
00350             retval.put(&stuff[i], 1);
00351         }               
00352         else            
00353         {               
00354             char buf[4];
00355             sprintf(buf, "%%%02x", stuff[i] & 0xff);
00356             retval.put(&buf, 3);
00357         }
00358     }
00359     return retval.getstr();
00360 }
00361 
00362 WvString diff_dates(time_t t1, time_t t2)
00363 {
00364     char out[25]; //Should be more then enough
00365     double diff = difftime(t1, t2);
00366     if(diff < 0)
00367         diff = -diff;
00368     if(diff > (60 * 60 * 24))
00369         //give a touch more granularity then the rest
00370         sprintf(out, "%.1f day(s)", diff / (60 * 60 * 24));
00371     else if(diff > (60 * 60)) 
00372         sprintf(out, "%.0f hour(s)", diff / (60 * 60));
00373     else if(diff > 60)
00374         sprintf(out, "%.0f minute(s)", diff / 60);
00375     else
00376         sprintf(out, "%.0f second(s)", diff);
00377     return out;
00378 }
00379 
00380 WvString rfc822_date(time_t when)
00381 {
00382     WvString out;
00383     out.setsize(80);
00384 
00385     if (when < 0)
00386         when = time(NULL);
00387 
00388     struct tm *tmwhen = localtime(&when);
00389     strftime(out.edit(), 80, "%a, %d %b %Y %H:%M:%S %z", tmwhen);
00390 
00391     return out;
00392 }
00393 
00394 
00395 WvString backslash_escape(WvStringParm s1)
00396 {
00397     // stick a backslash in front of every !isalnum() character in s1
00398     if (!s1)
00399         return "";
00400 
00401     WvString s2;
00402     s2.setsize(s1.len() * 2 + 1);
00403 
00404     const char *p1 = s1;
00405     char *p2 = s2.edit();
00406     while (*p1)
00407     {
00408         if (!isalnum(*p1))
00409             *p2++ = '\\';
00410         *p2++ = *p1++;
00411     }
00412     *p2 = 0;
00413 
00414     return s2;
00415 }
00416 
00417 
00418 int strcount(WvStringParm s, const char c)
00419 {
00420     int n=0;
00421     const char *p = s;
00422     while ((p=strchr(p, c)) != NULL && p++)
00423         n++;
00424 
00425     return n;
00426 }
00427 
00428 
00429 WvString encode_hostname_as_DN(WvStringParm hostname)
00430 {
00431     WvString dn("");
00432     
00433     WvStringList fqdnlist;
00434     WvStringList::Iter i(fqdnlist);
00435     
00436     fqdnlist.split(hostname, ".");
00437     for (i.rewind(); i.next(); )
00438         dn.append("dc=%s,", *i);
00439     dn.append("cn=%s", hostname);
00440     
00441     return dn;
00442 }
00443 
00444 
00445 WvString nice_hostname(WvStringParm name)
00446 {
00447     WvString nice;
00448     char *optr, *optr_start;
00449     const char *iptr;
00450     bool last_was_dash;
00451     
00452     nice.setsize(name.len() + 2);
00453 
00454     iptr = name;
00455     optr = optr_start = nice.edit();
00456     if (!isascii(*iptr) || !isalnum(*(const unsigned char *)iptr))
00457         *optr++ = 'x'; // DNS names must start with a letter!
00458     
00459     last_was_dash = false;
00460     for (; *iptr; iptr++)
00461     {
00462         if (!isascii(*iptr))
00463             continue; // skip it entirely
00464         
00465         if (*iptr == '-' || *iptr == '_')
00466         {
00467             if (last_was_dash)
00468                 continue;
00469             last_was_dash = true;
00470             *optr++ = '-';
00471         }
00472         else if (isalnum(*(const unsigned char *)iptr) || *iptr == '.')
00473         {
00474             *optr++ = *iptr;
00475             last_was_dash = false;
00476         }
00477     }
00478     
00479     if (optr > optr_start && !isalnum(*(const unsigned char *)(optr-1)))
00480         *optr++ = 'x'; // must _end_ in a letter/number too!
00481     
00482     *optr++ = 0;
00483     
00484     if (!nice.len())
00485         return "UNKNOWN";
00486     
00487     return nice;
00488 }
00489 
00490 
00491 WvString getfilename(WvStringParm fullname)
00492 {
00493     WvString tmp(fullname);
00494     char *cptr = strrchr(tmp.edit(), '/');
00495     
00496     if (!cptr) // no slash at all
00497         return fullname;
00498     else if (!cptr[1]) // terminating slash
00499     {
00500         *cptr = 0;
00501         return getfilename(tmp);
00502     }
00503     else // no terminating slash
00504         return cptr+1;
00505 }
00506 
00507 
00508 WvString getdirname(WvStringParm fullname)
00509 {
00510     WvString tmp(fullname);
00511     char *cptr = strrchr(tmp.edit(), '/');
00512     
00513     if (!cptr) // no slash at all
00514         return ".";
00515     else if (!cptr[1]) // terminating slash
00516     {
00517         *cptr = 0;
00518         return getdirname(tmp);
00519     }
00520     else // no terminating slash
00521     {
00522         *cptr = 0;
00523         return !tmp ? WvString("/") : tmp;
00524     }
00525 }
00526 
00527 // Programmatically determine the units.  In order, these are:
00528 // bytes, kilobytes, megabytes, gigabytes, terabytes, petabytes,
00529 // exabytes, zettabytes, yottabytes.  Note that these are SI
00530 // prefixes, not binary ones.
00531 
00532 // This structure allows us to choose between SI-prefixes which are
00533 // powers of 10, and IEC-prefixes which are powers of 2.
00534 struct prefix_t
00535 {
00536     const char *name;
00537     unsigned long long base;
00538 };
00539 
00540 // SI-prefixes:
00541 // kilo, mega, giga, tera, peta, and exa.
00542 static const prefix_t si[] =
00543 {
00544     { "k", 1000ull },
00545     { "M", 1000ull * 1000ull },
00546     { "G", 1000ull * 1000ull * 1000ull },
00547     { "T", 1000ull * 1000ull * 1000ull * 1000ull },
00548     { "P", 1000ull * 1000ull * 1000ull * 1000ull * 1000ull},
00549     { "E", 1000ull * 1000ull * 1000ull * 1000ull * 1000ull * 1000ull},
00550     { "Z", 0 },
00551     { "Y", 0 },
00552     { NULL, 0 }
00553 };
00554 
00555 // IEC-prefixes:
00556 // kibi, mebi, gibi, tebi, pebi, and exbi.
00557 static const prefix_t iec[] =
00558 {
00559     { "Ki", 1024ull },
00560     { "Mi", 1024ull * 1024ull},
00561     { "Gi", 1024ull * 1024ull * 1024ull },
00562     { "Ti", 1024ull * 1024ull * 1024ull * 1024ull },
00563     { "Pi", 1024ull * 1024ull * 1024ull * 1024ull * 1024ull},
00564     { "Ei", 1024ull * 1024ull * 1024ull * 1024ull * 1024ull * 1024ull},
00565     { "Zi", 0 },
00566     { "Yi", 0 },
00567     { NULL, 0 }
00568 };
00569 
00570 
00571 // This function expects size to be ten-times the actual number.
00572 static inline unsigned long long _sizetoa_rounder(RoundingMethod method,
00573                                                   unsigned long long size,
00574                                                   unsigned long long remainder,
00575                                                   unsigned long long base)
00576 {
00577     unsigned long long half = base / 2;
00578     unsigned long long significant_digits = size / base;
00579     switch (method)
00580     {
00581     case ROUND_DOWN:
00582         break;
00583 
00584     case ROUND_UP:
00585         if (remainder || (size % base))
00586             ++significant_digits;
00587         break;
00588 
00589     case ROUND_UP_AT_POINT_FIVE:
00590         if ((size % base) >= half)
00591             ++significant_digits;
00592         break;
00593 
00594     case ROUND_DOWN_AT_POINT_FIVE:
00595         unsigned long long r = size % base;
00596         if ((r > half) || (remainder && (r == half)))
00597             ++significant_digits;
00598         break;
00599     }
00600     return significant_digits;
00601 }
00602 
00603 
00604 // This function helps sizetoa() and sizektoa() below.  It takes a
00605 // bunch of digits, and the default unit (indexed by size); and turns
00606 // them into a WvString that's formatted to human-readable rounded
00607 // sizes, with one decimal place.
00608 //
00609 // You must be very careful here never to add anything to size.
00610 // Otherwise, you might cause an overflow to occur.  Similarly, you
00611 // must be careful when you subtract or you might cause an underflow.
00612 static WvString _sizetoa(unsigned long long size, unsigned long blocksize,
00613                          RoundingMethod rounding_method,
00614                          const prefix_t *prefixes, WvStringParm unit)
00615 {
00616     assert(blocksize);
00617 
00618     // To understand rounding, consider the display of the value 999949.
00619     // For each rounding method the string displayed should be:
00620     // ROUND_DOWN: 999.9 kB
00621     // ROUND_UP_AT_POINT_FIVE: 999.9 kB
00622     // ROUND_UP: 1.0 MB
00623     // On the other hand, for the value 999950, the strings should be:
00624     // ROUND_DOWN: 999.9 kB
00625     // ROUND_DOWN_AT_POINT_FIVE: 999.9 kB
00626     // ROUND_UP_AT_POINT_FIVE: 1.0 MB
00627     // ROUND_UP: 1.0 MB
00628 
00629     // Deal with blocksizes without overflowing.
00630     const unsigned long long group_base = prefixes[0].base;
00631     int shift = 0;
00632     unsigned long prev_blocksize = 0;
00633     while (blocksize >= group_base)
00634     {
00635         prev_blocksize = blocksize;
00636         blocksize /= group_base;
00637         ++shift;
00638     }
00639 
00640     // If we have a very large blocksize, make sure to keep enough of
00641     // it to make rounding possible.
00642     if (prev_blocksize && prev_blocksize != group_base)
00643     {
00644         blocksize = prev_blocksize;
00645         --shift;
00646     }
00647 
00648     int p = -1;
00649     unsigned long long significant_digits = size * 10;
00650     unsigned int remainder = 0;
00651     if (significant_digits < size)
00652     {
00653         // A really big size.  We'll divide by a grouping before going up one.
00654         remainder = size % group_base;
00655         size /= group_base;
00656         ++shift;
00657     }
00658     while (size >= group_base)
00659     {
00660         ++p;
00661         significant_digits = _sizetoa_rounder(rounding_method,
00662                                               size * 10,
00663                                               remainder,
00664                                               prefixes[p].base);
00665         if (significant_digits < (group_base * 10)
00666             || !prefixes[p + shift + 1].name)
00667             break;
00668     }
00669 
00670     // Correct for blocksizes that aren't powers of group_base.
00671     if (blocksize > 1)
00672     {
00673         significant_digits *= blocksize;
00674         while (significant_digits >= (group_base * 10)
00675                && prefixes[p + shift + 1].name)
00676         {
00677             significant_digits = _sizetoa_rounder(rounding_method,
00678                                                   significant_digits,
00679                                                   0,
00680                                                   group_base);
00681             ++p;
00682         }
00683     }
00684 
00685     // Now we can return our result.
00686     return WvString("%s.%s %s%s",
00687                     significant_digits / 10,
00688                     significant_digits % 10,
00689                     prefixes[p + shift].name,
00690                     unit);
00691 }
00692 
00693 WvString sizetoa(unsigned long long blocks, unsigned long blocksize,
00694                  RoundingMethod rounding_method)
00695 {
00696     unsigned long long bytes = blocks * blocksize;
00697 
00698     // Test if we are dealing in just bytes.
00699     if (bytes < 1000 && bytes >= blocks)
00700         return WvString("%s bytes", bytes);
00701 
00702     return _sizetoa(blocks, blocksize, rounding_method, si, "B");
00703 }
00704 
00705 
00706 WvString sizektoa(unsigned long long kbytes, RoundingMethod rounding_method)
00707 {
00708     if (kbytes < 1000)
00709         return WvString("%s kB", kbytes);
00710 
00711     return sizetoa(kbytes, 1000, rounding_method);
00712 }
00713 
00714 WvString sizeitoa(unsigned long long blocks, unsigned long blocksize,
00715                   RoundingMethod rounding_method)
00716 {
00717     unsigned long long bytes = blocks * blocksize;
00718 
00719     // Test if we are dealing in just bytes.
00720     if (bytes < 1024 && bytes >= blocks)
00721         return WvString("%s bytes", bytes);
00722 
00723     return _sizetoa(blocks, blocksize, rounding_method, iec, "B");
00724 }
00725 
00726 
00727 WvString sizekitoa(unsigned long long kbytes, RoundingMethod rounding_method)
00728 {
00729     if (kbytes < 1024)
00730         return WvString("%s KiB", kbytes);
00731 
00732     return sizeitoa(kbytes, 1024, rounding_method);
00733 }
00734 
00735 WvString secondstoa(unsigned int total_seconds)
00736 {
00737     WvString result("");
00738 
00739     unsigned int days = total_seconds / (3600*24);
00740     total_seconds %= (3600*24);
00741     unsigned int hours = total_seconds / 3600;
00742     total_seconds %= 3600;
00743     unsigned int mins = total_seconds / 60;
00744     unsigned int secs = total_seconds % 60; 
00745 
00746     int num_elements = (days > 0) + (hours > 0) + (mins > 0);
00747 
00748     if (days > 0)
00749     {
00750         result.append(days);
00751         result.append(days > 1 ? " days" : " day");
00752         num_elements--;
00753         if (num_elements > 1)
00754             result.append(", ");
00755         else if (num_elements == 1)
00756             result.append(" and ");
00757     }
00758     if (hours > 0)
00759     {
00760         result.append(hours);
00761         result.append(hours > 1 ? " hours" : " hour");
00762         num_elements--;
00763         if (num_elements > 1)
00764             result.append(", ");
00765         else if (num_elements == 1)
00766             result.append(" and ");
00767     }
00768     if (mins > 0)
00769     {
00770         result.append(mins);
00771         result.append(mins > 1 ? " minutes" : " minute");
00772     }
00773     if (days == 0 && hours == 0 && mins == 0)
00774     {
00775         result.append(secs);
00776         result.append(secs != 1 ? " seconds" : " second");
00777     }
00778 
00779     return result;
00780 }
00781 
00782 WvString strreplace(WvStringParm s, WvStringParm a, WvStringParm b)
00783 {
00784     WvDynBuf buf;
00785     const char *sptr = s, *eptr;
00786     
00787     while ((eptr = strstr(sptr, a)) != NULL)
00788     {
00789         buf.put(sptr, eptr-sptr);
00790         buf.putstr(b);
00791         sptr = eptr + strlen(a);
00792     }
00793     
00794     buf.put(sptr, strlen(sptr));
00795     
00796     return buf.getstr();
00797 }
00798 
00799 WvString undupe(WvStringParm s, char c)
00800 {
00801     WvDynBuf out;
00802 
00803     bool last = false;
00804 
00805     for (int i = 0; s[i] != '\0'; i++)
00806     {
00807         if (s[i] != c)
00808         {
00809             out.putch(s[i]);
00810             last = false;
00811         }
00812         else if (!last)
00813         {
00814             out.putch(c);
00815             last = true;
00816         }
00817     }
00818     
00819     return out.getstr();
00820 }
00821 
00822 
00823 WvString rfc1123_date(time_t t)
00824 {
00825     struct tm *tm = gmtime(&t);
00826     WvString s;
00827 
00828     s.setsize(128);
00829     strftime(s.edit(), 128, "%a, %d %b %Y %H:%M:%S GMT", tm);
00830 
00831     return s;
00832 }
00833 
00834 
00835 int lookup(const char *str, const char * const *table, bool case_sensitive)
00836 {
00837     for (int i = 0; table[i]; ++i)
00838     {
00839         if (case_sensitive)
00840         {
00841             if (strcmp(str, table[i]) != 0)
00842                 continue;
00843         }
00844         else
00845         {
00846             if (strcasecmp(str, table[i]) != 0)
00847                 continue;
00848         }
00849         return i;
00850     }
00851     return -1;
00852 }
00853 
00854 
00855 WvString hostname()
00856 {
00857     int maxlen = 0;
00858     for (;;)
00859     {
00860         maxlen += 80;
00861         char *name = new char[maxlen];
00862         int result = gethostname(name, maxlen);
00863         if (result == 0)
00864         {
00865             WvString hostname(name);
00866             deletev name;
00867             return hostname;
00868         }
00869 #ifdef _WIN32
00870         assert(errno == WSAEFAULT);
00871 #else
00872         assert(errno == EINVAL);
00873 #endif
00874     }
00875 }
00876 
00877 
00878 WvString fqdomainname() 
00879 {
00880     struct hostent *myhost;
00881 
00882     myhost = gethostbyname(hostname());
00883     if (myhost)
00884         return myhost->h_name;
00885     else
00886         return WvString::null;
00887 }
00888 
00889 
00890 WvString wvgetcwd()
00891 {
00892     int maxlen = 0;
00893     for (;;)
00894     {
00895         maxlen += 80;
00896         char *name = new char[maxlen];
00897         char *res = getcwd(name, maxlen);
00898         if (res)
00899         {
00900             WvString s(name);
00901             deletev name;
00902             return s;
00903         }
00904         if (errno == EACCES || errno == ENOENT)
00905             return "."; // can't deal with those errors
00906         assert(errno == ERANGE); // buffer too small
00907     }
00908 }
00909 
00910 
00911 WvString metriculate(const off_t i)
00912 {
00913     WvString res;
00914     int digits=0;
00915     int digit=0;
00916     long long int j=i;
00917     char *p;
00918 
00919     while (j)
00920     {
00921         digits++;
00922         j/=10;
00923     }
00924 
00925     j=i;
00926     // setsize says it takes care of the terminating NULL char
00927     res.setsize(digits + ((digits - 1) / 3) + ((j < 0) ? 1 : 0));
00928     p = res.edit();
00929     if (j < 0)
00930     {
00931         *p++ = '-';
00932         j = -j;
00933     }
00934 
00935     p += digits + ((digits - 1) / 3);
00936     *p-- = '\0';
00937 
00938     for (digit=0; digit<digits; digit++)
00939     {
00940         *p-- = '0' + ( j%10 );
00941         if (((digit+1) % 3) == 0 && digit < digits - 1)
00942             *p-- = ' ';
00943         j /= 10;
00944     }
00945 
00946     return res;
00947 }
00948 
00949 
00950 WvString afterstr(WvStringParm line, WvStringParm a)
00951 {
00952     if (!line || !a)
00953         return WvString::null;
00954 
00955     char *loc = strstr(line, a);
00956     if (loc == 0)
00957         return "";
00958 
00959     loc += a.len();
00960     WvString ret = loc;
00961     ret.unique();
00962     return ret;
00963 }
00964 
00965 
00966 WvString beforestr(WvStringParm line, WvStringParm a)
00967 {
00968     if (!line || !a)
00969         return WvString::null;
00970 
00971     WvString ret = line;
00972     ret.unique();     
00973     char *loc = strstr(ret, a);
00974 
00975     if (loc == 0)
00976         return line;
00977 
00978     loc[0] = '\0';
00979     return ret;
00980 }
00981 
00982 
00983 WvString substr(WvString line, unsigned int pos, unsigned int len)
00984 {
00985     const char *tmp = line.cstr();
00986     if (pos > line.len()-1)
00987         return "";
00988     tmp += pos;
00989 
00990     WvString ret = tmp;
00991     char *tmp2 = ret.edit();
00992     if (pos + len < line.len())
00993         tmp2[len] = '\0';
00994 
00995     return ret;
00996 }
00997 
00998 const CStrExtraEscape CSTR_TCLSTR_ESCAPES[3] =
00999 {
01000     { '{', "\\<" },
01001     { '}', "\\>" },
01002     { 0, NULL }
01003 };
01004 
01005 static inline const char *cstr_escape_char(char ch)
01006 {
01007     static const char *xlat[256] =
01008     {
01009         "\\0", "\\x01", "\\x02", "\\x03", "\\x04", "\\x05", "\\x06", "\\a", 
01010         "\\b", "\\t", "\\n", "\\v", "\\x0C", "\\r", "\\x0E", "\\x0F", 
01011         "\\x10", "\\x11", "\\x12", "\\x13", "\\x14", "\\x15", "\\x16", "\\x17", 
01012         "\\x18", "\\x19", "\\x1A", "\\x1B", "\\x1C", "\\x1D", "\\x1E", "\\x1F", 
01013         " ", "!", "\\\"", "#", "$", "%", "&", "'", 
01014         "(", ")", "*", "+", ",", "-", ".", "/", 
01015         "0", "1", "2", "3", "4", "5", "6", "7", 
01016         "8", "9", ":", ";", "<", "=", ">", "?", 
01017         "@", "A", "B", "C", "D", "E", "F", "G", 
01018         "H", "I", "J", "K", "L", "M", "N", "O", 
01019         "P", "Q", "R", "S", "T", "U", "V", "W", 
01020         "X", "Y", "Z", "[", "\\\\", "]", "^", "_", 
01021         "`", "a", "b", "c", "d", "e", "f", "g", 
01022         "h", "i", "j", "k", "l", "m", "n", "o", 
01023         "p", "q", "r", "s", "t", "u", "v", "w", 
01024         "x", "y", "z", "{", "|", "}", "~", "\\x7F", 
01025         "\\x80", "\\x81", "\\x82", "\\x83", "\\x84", "\\x85", "\\x86", "\\x87", 
01026         "\\x88", "\\x89", "\\x8A", "\\x8B", "\\x8C", "\\x8D", "\\x8E", "\\x8F", 
01027         "\\x90", "\\x91", "\\x92", "\\x93", "\\x94", "\\x95", "\\x96", "\\x97", 
01028         "\\x98", "\\x99", "\\x9A", "\\x9B", "\\x9C", "\\x9D", "\\x9E", "\\x9F", 
01029         "\\xA0", "\\xA1", "\\xA2", "\\xA3", "\\xA4", "\\xA5", "\\xA6", "\\xA7", 
01030         "\\xA8", "\\xA9", "\\xAA", "\\xAB", "\\xAC", "\\xAD", "\\xAE", "\\xAF", 
01031         "\\xB0", "\\xB1", "\\xB2", "\\xB3", "\\xB4", "\\xB5", "\\xB6", "\\xB7", 
01032         "\\xB8", "\\xB9", "\\xBA", "\\xBB", "\\xBC", "\\xBD", "\\xBE", "\\xBF", 
01033         "\\xC0", "\\xC1", "\\xC2", "\\xC3", "\\xC4", "\\xC5", "\\xC6", "\\xC7", 
01034         "\\xC8", "\\xC9", "\\xCA", "\\xCB", "\\xCC", "\\xCD", "\\xCE", "\\xCF", 
01035         "\\xD0", "\\xD1", "\\xD2", "\\xD3", "\\xD4", "\\xD5", "\\xD6", "\\xD7", 
01036         "\\xD8", "\\xD9", "\\xDA", "\\xDB", "\\xDC", "\\xDD", "\\xDE", "\\xDF", 
01037         "\\xE0", "\\xE1", "\\xE2", "\\xE3", "\\xE4", "\\xE5", "\\xE6", "\\xE7", 
01038         "\\xE8", "\\xE9", "\\xEA", "\\xEB", "\\xEC", "\\xED", "\\xEE", "\\xEF", 
01039         "\\xF0", "\\xF1", "\\xF2", "\\xF3", "\\xF4", "\\xF5", "\\xF6", "\\xF7", 
01040         "\\xF8", "\\xF9", "\\xFA", "\\xFB", "\\xFC", "\\xFD", "\\xFE", "\\xFF"
01041     };
01042     return xlat[(unsigned char)ch];
01043 }
01044 
01045 static inline int hex_digit_val(char ch)
01046 {
01047     static int val[256] =
01048     {
01049         -1, -1, -1, -1, -1, -1, -1, -1,
01050         -1, -1, -1, -1, -1, -1, -1, -1,
01051         -1, -1, -1, -1, -1, -1, -1, -1,
01052         -1, -1, -1, -1, -1, -1, -1, -1,
01053         -1, -1, -1, -1, -1, -1, -1, -1,
01054         -1, -1, -1, -1, -1, -1, -1, -1,
01055         0, 1, 2, 3, 4, 5, 6, 7, 
01056         8, 9, -1, -1, -1, -1, -1, -1,
01057         -1, 10, 11, 12, 13, 14, 15, -1,
01058         -1, -1, -1, -1, -1, -1, -1, -1,
01059         -1, -1, -1, -1, -1, -1, -1, -1,
01060         -1, -1, -1, -1, -1, -1, -1, -1,
01061         -1, 10, 11, 12, 13, 14, 15, -1,
01062         -1, -1, -1, -1, -1, -1, -1, -1,
01063         -1, -1, -1, -1, -1, -1, -1, -1,
01064         -1, -1, -1, -1, -1, -1, -1, -1,
01065         -1, -1, -1, -1, -1, -1, -1, -1,
01066         -1, -1, -1, -1, -1, -1, -1, -1,
01067         -1, -1, -1, -1, -1, -1, -1, -1,
01068         -1, -1, -1, -1, -1, -1, -1, -1,
01069         -1, -1, -1, -1, -1, -1, -1, -1,
01070         -1, -1, -1, -1, -1, -1, -1, -1,
01071         -1, -1, -1, -1, -1, -1, -1, -1,
01072         -1, -1, -1, -1, -1, -1, -1, -1,
01073         -1, -1, -1, -1, -1, -1, -1, -1,
01074         -1, -1, -1, -1, -1, -1, -1, -1,
01075         -1, -1, -1, -1, -1, -1, -1, -1,
01076         -1, -1, -1, -1, -1, -1, -1, -1,
01077         -1, -1, -1, -1, -1, -1, -1, -1,
01078         -1, -1, -1, -1, -1, -1, -1, -1,
01079         -1, -1, -1, -1, -1, -1, -1, -1,
01080         -1, -1, -1, -1, -1, -1, -1, -1
01081     };
01082     return val[(unsigned char)ch];
01083 }
01084 
01085 static inline bool cstr_unescape_char(const char *&cstr, char &ch)
01086 {
01087     if (*cstr == '\\')
01088     {
01089         ++cstr;
01090     
01091         switch (*cstr)
01092         {
01093             case '"': ch = '"'; break;
01094             case 't': ch = '\t'; break;
01095             case 'n': ch = '\n'; break;
01096             case '\\': ch = '\\'; break;
01097             case 'r': ch = '\r'; break;
01098             case 'a': ch = '\a'; break;
01099             case 'v': ch = '\v'; break;
01100             case 'b': ch = '\b'; break;
01101             case '0': ch = '\0'; break;
01102             case 'x':
01103             {
01104                 int vals[2];
01105                 int i;
01106                 for (i=0; i<2; ++i)
01107                 {
01108                     if ((vals[i] = hex_digit_val(*++cstr)) == -1)
01109                         return false;
01110                 }
01111                 ch = (vals[0] << 4) | vals[1];
01112             }
01113             break;
01114             default: return false;
01115         }
01116         
01117         ++cstr;
01118         
01119         return true;
01120     }
01121     else
01122     {
01123         ch = *cstr++;
01124         return true;
01125     }
01126 }
01127 
01128 WvString cstr_escape(const void *data, size_t size,
01129         const CStrExtraEscape extra_escapes[])
01130 {
01131     if (!data) return WvString::null;
01132 
01133     const char *cdata = (const char *)data;
01134 
01135     WvString result;
01136     result.setsize(4*size + 3); // We could do better but it would slow us down
01137     char *cstr = result.edit();
01138     
01139     *cstr++ = '\"';
01140     while (size-- > 0)
01141     {
01142         const char *esc = NULL;
01143         if (extra_escapes)
01144         {
01145             const CStrExtraEscape *extra = &extra_escapes[0];
01146             while (extra->ch && extra->esc)
01147             {
01148                 if (*cdata == extra->ch)
01149                 {
01150                     esc = extra->esc;
01151                     break;
01152                 }
01153                 
01154                 ++extra;
01155             }
01156         }
01157         if (!esc) esc = cstr_escape_char(*cdata);
01158         ++cdata;
01159         while (*esc) *cstr++ = *esc++;
01160     }
01161     *cstr++ = '\"';
01162     *cstr = '\0';
01163     
01164     return result;
01165 }
01166 
01167 bool cstr_unescape(WvStringParm cstr, void *data, size_t max_size, size_t &size,
01168         const CStrExtraEscape extra_escapes[])
01169 {
01170     const char *q = cstr;
01171     char *cdata = (char *)data;
01172     
01173     if (!q) goto misformatted;
01174     size = 0;
01175     
01176     for (;;)
01177     {
01178         while (isspace(*q)) q++;
01179         if (*q == '\0') break;
01180 
01181         if (*q++ != '\"') goto misformatted;
01182         while (*q && *q != '\"')
01183         {
01184             bool found = false;
01185             char unesc;
01186             if (extra_escapes)
01187             {
01188                 const CStrExtraEscape *extra = &extra_escapes[0];
01189                 while (extra->ch && extra->esc)
01190                 {
01191                     size_t len = strlen(extra->esc);
01192                     if (strncmp(extra->esc, q, len) == 0)
01193                     {
01194                         unesc = extra->ch;
01195                         q += len;
01196                         found = true;
01197                         break;
01198                     }
01199                     
01200                     ++extra;
01201                 }
01202             }
01203             if (!found && !cstr_unescape_char(q, unesc)) goto misformatted;
01204             if (size++ < max_size && cdata) *cdata++ = unesc;
01205         }
01206         if (*q++ != '\"') goto misformatted;
01207     }
01208     
01209     return size <= max_size;
01210 
01211 misformatted:
01212 
01213     size = 0;
01214     return false;
01215 }
01216 
01217 WvString local_date(time_t when)
01218 {
01219     WvString out;
01220     out.setsize(80);
01221 
01222     if (when < 0)
01223         when = time(NULL);
01224 
01225     struct tm *tmwhen = localtime(&when);
01226     strftime(out.edit(), 80, "%b %d %I:%M:%S %p", tmwhen);
01227 
01228     return out;
01229 }
01230 
01231 WvString intl_time(time_t when)
01232 {
01233     WvString out;
01234     out.setsize(12);
01235 
01236     if (when < 0)
01237         when = time(NULL);
01238 
01239     struct tm *tmwhen = localtime(&when); 
01240     strftime(out.edit(), 12, "%H:%M:%S", tmwhen);
01241 
01242     return out;
01243 }
01244 
01245 WvString intl_date(time_t when)
01246 {
01247     WvString out;
01248     out.setsize(16);
01249 
01250     if (when < 0)
01251         when = time(NULL);
01252 
01253     struct tm *tmwhen = localtime(&when); 
01254     strftime(out.edit(), 16, "%Y-%m-%d", tmwhen);
01255 
01256     return out;
01257 }
01258 
01259 WvString intl_datetime(time_t when)
01260 {
01261     WvString out;
01262     out.setsize(24);
01263 
01264     if (when < 0)
01265         when = time(NULL);
01266 
01267     struct tm *tmwhen = localtime(&when); 
01268     strftime(out.edit(), 24, "%Y-%m-%d %H:%M:%S", tmwhen);
01269 
01270     return out;
01271 }
01272 
01273 
01279 time_t intl_gmtoff(time_t t)
01280 {
01281     struct tm *l = localtime(&t);
01282     l->tm_isdst = 0;
01283     time_t local = mktime(l);
01284     time_t gmt   = mktime(gmtime(&t));
01285     
01286     return local-gmt;
01287 }
01288 
01289 
01290 // Removes any trailing punctuation ('.', '?', or '!') from the line
01291 WvString depunctuate(WvStringParm line)
01292 {
01293     WvString ret = line;
01294     char * edit = ret.edit();
01295     int last = ret.len() - 1;
01296     if (edit[last] == '.' || edit[last] == '?' || edit[last] == '!')
01297         edit[last] = '\0';
01298 
01299     return ret;
01300 }
01301 
01302 
01303 WvString ptr2str(void* ptr)
01304 {
01305     char buf[(sizeof(ptr) * 2) + 3];
01306     int rv;
01307 
01308     rv = snprintf(buf, sizeof(buf), "%p", ptr);
01309 
01310     assert(rv != -1);
01311 
01312     return buf;
01313 }
01314 
01315 
01316 // Reads the contents of a symlink.  Returns WvString::null on error.
01317 WvString wvreadlink(WvStringParm path)
01318 {
01319 #ifdef _WIN32
01320     return WvString::null; // no such thing as a symlink on Windows
01321 #else
01322     WvString result;
01323     int size = 64;
01324     for (;;)
01325     {
01326         result.setsize(size);
01327         int readlink_result = readlink(path, result.edit(), size);
01328         if (readlink_result == -1)
01329             return WvString::null;
01330         if (readlink_result < size)
01331         {
01332             result.edit()[readlink_result] = '\0';
01333             break;
01334         }
01335         size = 2*size; // increase buffer size
01336     }
01337     return result;
01338 #endif
01339 }
01340 

Generated on Thu Jan 24 16:50:54 2008 for WvStreams by  doxygen 1.5.4