00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 using namespace std;
00012
00013
00014 #include <set>
00015 #include <vector>
00016 #include <string>
00017
00018 extern "C" {
00019 #include "define.h"
00020 #include "lzfu.h"
00021 }
00022
00023 void usage(void);
00024 void version(void);
00025 char *check_filename(char *fname);
00026 void print_ldif_single(const char *attr, const char *value);
00027 void print_ldif_address(const char *attr, int nvalues, char *value, ...);
00028 void print_ldif_dn(const char *attr, const char *value, const char *base);
00029 void print_ldif_multi(const char *dn, const char *value);
00030 void print_ldif_two(const char *attr, const char *value1, const char *value2);
00031 void print_escaped_dn(const char *value);
00032 void build_cn(char *cn, size_t len, int nvalues, char *value, ...);
00033
00034 char *prog_name;
00035 pst_file pstfile;
00036 bool old_schema = false;
00037 char *ldap_base = NULL;
00038 int ldif_extra_line_count = 0;
00039 iconv_t cd = 0;
00040 vector<string> ldap_class;
00041 vector<string> ldif_extra_line;
00042
00043
00045
00046 struct ltstr {
00047 bool operator()(const char* s1, const char* s2) const {
00048 return strcasecmp(s1, s2) < 0;
00049 }
00050 };
00051
00052 typedef set<const char *, ltstr> string_set;
00053
00054 static string_set all_strings;
00055
00056
00058
00059
00060 static void free_strings(string_set &s);
00061 static void free_strings(string_set &s)
00062 {
00063 for (string_set::iterator i=s.begin(); i!=s.end(); i++) {
00064 free((void*)*i);
00065 }
00066 s.clear();
00067 }
00068
00069
00071
00072
00073 static const char* register_string(string_set &s, const char *name);
00074 static const char* register_string(string_set &s, const char *name) {
00075 string_set::const_iterator i = s.find(name);
00076 if (i != s.end()) return *i;
00077 char *x = strdup(name);
00078 s.insert(x);
00079 return x;
00080 }
00081
00082
00084
00085
00086 static const char* register_string(const char *name);
00087 static const char* register_string(const char *name) {
00088 return register_string(all_strings, name);
00089 }
00090
00091
00093
00094
00095 static const char* unique_string(const char *name);
00096 static const char* unique_string(const char *name) {
00097 int unique = 2;
00098 string_set::iterator i = all_strings.find(name);
00099 if (i == all_strings.end()) return register_string(name);
00100 while (true) {
00101 vector<char> n(strlen(name)+10);
00102 snprintf(&n[0], n.size(), "%s %d", name, unique++);
00103 string_set::iterator i = all_strings.find(&n[0]);
00104 if (i == all_strings.end()) return register_string(&n[0]);
00105 }
00106 }
00107
00108
00109 static void process(pst_desc_ll *d_ptr);
00110 static void process(pst_desc_ll *d_ptr) {
00111 pst_item *item = NULL;
00112 while (d_ptr) {
00113 if (d_ptr->desc) {
00114 item = pst_parse_item(&pstfile, d_ptr);
00115 DEBUG_INFO(("item pointer is %p\n", item));
00116 if (item) {
00117 if (item->folder && d_ptr->child && strcasecmp(item->file_as, "Deleted Items")) {
00118
00119 fprintf(stderr, "entering folder %s\n", item->file_as);
00120 process(d_ptr->child);
00121
00122 } else if (item->contact && (item->type == PST_TYPE_CONTACT)) {
00123
00124 char cn[1000];
00125
00126 build_cn(cn, sizeof(cn), 4,
00127 item->contact->display_name_prefix,
00128 item->contact->first_name,
00129 item->contact->surname,
00130 item->contact->suffix);
00131 if (cn[0] != 0) {
00132
00133 const char *ucn = unique_string(cn);
00134
00135 print_ldif_dn("dn", ucn, ldap_base);
00136 print_ldif_single("cn", ucn);
00137 if (item->contact->first_name) {
00138 print_ldif_two("givenName",
00139 item->contact->display_name_prefix,
00140 item->contact->first_name);
00141 }
00142 if (item->contact->surname) {
00143 print_ldif_two("sn",
00144 item->contact->surname,
00145 item->contact->suffix);
00146 }
00147 else if (item->contact->company_name) {
00148 print_ldif_single("sn", item->contact->company_name);
00149 }
00150 else
00151 print_ldif_single("sn", ucn);
00152
00153 if (old_schema) {
00154 if (item->contact->job_title)
00155 print_ldif_single("personalTitle", item->contact->job_title);
00156 if (item->contact->company_name)
00157 print_ldif_single("company", item->contact->company_name);
00158 }
00159 else {
00160
00161 if (item->contact->job_title)
00162 print_ldif_single("title", item->contact->job_title);
00163 if (item->contact->company_name)
00164 print_ldif_single("o", item->contact->company_name);
00165 }
00166 if (item->contact->address1 && *item->contact->address1)
00167 print_ldif_single("mail", item->contact->address1);
00168 if (item->contact->address2 && *item->contact->address2)
00169 print_ldif_single("mail", item->contact->address2);
00170 if (item->contact->address3 && *item->contact->address3)
00171 print_ldif_single("mail", item->contact->address3);
00172 if (item->contact->address1a && *item->contact->address1a)
00173 print_ldif_single("mail", item->contact->address1a);
00174 if (item->contact->address2a && *item->contact->address2a)
00175 print_ldif_single("mail", item->contact->address2a);
00176 if (item->contact->address3a && *item->contact->address3a)
00177 print_ldif_single("mail", item->contact->address3a);
00178
00179 if (old_schema) {
00180 if (item->contact->business_address) {
00181 if (item->contact->business_po_box)
00182 print_ldif_single("postalAddress", item->contact->business_po_box);
00183 if (item->contact->business_street)
00184 print_ldif_multi("postalAddress", item->contact->business_street);
00185 if (item->contact->business_city)
00186 print_ldif_single("l", item->contact->business_city);
00187 if (item->contact->business_state)
00188 print_ldif_single("st", item->contact->business_state);
00189 if (item->contact->business_postal_code)
00190 print_ldif_single("postalCode", item->contact->business_postal_code);
00191 }
00192 else if (item->contact->home_address) {
00193 if (item->contact->home_po_box)
00194 print_ldif_single("postalAddress", item->contact->home_po_box);
00195 if (item->contact->home_street)
00196 print_ldif_multi("postalAddress", item->contact->home_street);
00197 if (item->contact->home_city)
00198 print_ldif_single("l", item->contact->home_city);
00199 if (item->contact->home_state)
00200 print_ldif_single("st", item->contact->home_state);
00201 if (item->contact->home_postal_code)
00202 print_ldif_single("postalCode", item->contact->home_postal_code);
00203 }
00204 else if (item->contact->other_address) {
00205 if (item->contact->other_po_box)
00206 print_ldif_single("postalAddress", item->contact->other_po_box);
00207 if (item->contact->other_street)
00208 print_ldif_multi("postalAddress", item->contact->other_street);
00209 if (item->contact->other_city)
00210 print_ldif_single("l", item->contact->other_city);
00211 if (item->contact->other_state)
00212 print_ldif_single("st", item->contact->other_state);
00213 if (item->contact->other_postal_code)
00214 print_ldif_single("postalCode", item->contact->other_postal_code);
00215 }
00216 }
00217 else {
00218
00219 if (item->contact->business_address) {
00220 print_ldif_address("postalAddress", 6,
00221 item->contact->business_po_box,
00222 item->contact->business_street,
00223 item->contact->business_city,
00224 item->contact->business_state,
00225 item->contact->business_postal_code,
00226 item->contact->business_country);
00227 if (item->contact->business_city)
00228 print_ldif_single("l", item->contact->business_city);
00229 if (item->contact->business_state)
00230 print_ldif_single("st", item->contact->business_state);
00231 if (item->contact->business_postal_code)
00232 print_ldif_single("postalCode", item->contact->business_postal_code);
00233 }
00234 else if (item->contact->home_address) {
00235 if (item->contact->home_city)
00236 print_ldif_single("l", item->contact->home_city);
00237 if (item->contact->home_state)
00238 print_ldif_single("st", item->contact->home_state);
00239 if (item->contact->home_postal_code)
00240 print_ldif_single("postalCode", item->contact->home_postal_code);
00241 }
00242 else if (item->contact->other_address) {
00243 print_ldif_address("postalAddress", 6,
00244 item->contact->other_po_box,
00245 item->contact->other_street,
00246 item->contact->other_city,
00247 item->contact->other_state,
00248 item->contact->other_postal_code,
00249 item->contact->other_country);
00250 if (item->contact->other_city)
00251 print_ldif_single("l", item->contact->other_city);
00252 if (item->contact->other_state)
00253 print_ldif_single("st", item->contact->other_state);
00254 if (item->contact->other_postal_code)
00255 print_ldif_single("postalCode", item->contact->other_postal_code);
00256 }
00257 if (item->contact->home_address) {
00258 print_ldif_address("homePostalAddress", 6,
00259 item->contact->home_po_box,
00260 item->contact->home_street,
00261 item->contact->home_city,
00262 item->contact->home_state,
00263 item->contact->home_postal_code,
00264 item->contact->home_country);
00265 }
00266 }
00267
00268 if (item->contact->business_fax)
00269 print_ldif_single("facsimileTelephoneNumber", item->contact->business_fax);
00270 else if (item->contact->home_fax)
00271 print_ldif_single("facsimileTelephoneNumber", item->contact->home_fax);
00272
00273 if (item->contact->business_phone)
00274 print_ldif_single("telephoneNumber", item->contact->business_phone);
00275 if (item->contact->home_phone)
00276 print_ldif_single("homePhone", item->contact->home_phone);
00277
00278 if (item->contact->car_phone)
00279 print_ldif_single("mobile", item->contact->car_phone);
00280 else if (item->contact->mobile_phone)
00281 print_ldif_single("mobile", item->contact->mobile_phone);
00282 else if (item->contact->other_phone)
00283 print_ldif_single("mobile", item->contact->other_phone);
00284
00285 if (!old_schema) {
00286 if (item->contact->business_homepage)
00287 print_ldif_single("labeledURI", item->contact->business_homepage);
00288 if (item->contact->personal_homepage)
00289 print_ldif_single("labeledURI", item->contact->personal_homepage);
00290 }
00291
00292 if (item->comment)
00293 print_ldif_single("description", item->comment);
00294
00295 for (vector<string>::size_type i=0; i<ldap_class.size(); i++)
00296 print_ldif_single("objectClass", ldap_class[i].c_str());
00297 printf("\n");
00298 }
00299 }
00300 else {
00301 DEBUG_INFO(("item is not a contact\n"));
00302 }
00303 }
00304 pst_freeItem(item);
00305 }
00306 d_ptr = d_ptr->next;
00307 }
00308 }
00309
00310
00311
00312
00313
00314 void print_ldif_single(const char *attr, const char *value)
00315 {
00316 size_t len;
00317 bool is_safe_string = true;
00318 bool needs_code_conversion = false;
00319 bool space_flag = false;
00320
00321
00322 while (*value == ' ') value++;
00323 len = strlen(value) + 1;
00324 vector<char> buffer(len);
00325 char *p = &buffer[0];
00326
00327
00328
00329 if (*value == ':' || *value == '<')
00330 is_safe_string = false;
00331 for (;;) {
00332 char ch = *value++;
00333
00334 if (ch == 0 || ch == '\n')
00335 break;
00336 else if (ch == '\r')
00337 continue;
00338 else if (ch == ' ') {
00339 space_flag = true;
00340 continue;
00341 }
00342 else {
00343 if ((ch & 0x80) == 0x80) {
00344 needs_code_conversion = true;
00345 is_safe_string = false;
00346 }
00347 if (space_flag) {
00348 *p++ = ' ';
00349 space_flag = false;
00350 }
00351 *p++ = ch;
00352 }
00353 }
00354 *p = 0;
00355 if (is_safe_string) {
00356 printf("%s: %s\n", attr, &buffer[0]);
00357 return;
00358 }
00359
00360 if (needs_code_conversion && cd != 0) {
00361 size_t inlen = p - &buffer[0];
00362 size_t utf8_len = 2 * inlen + 1;
00363 vector<char> utf8_buffer(utf8_len);
00364 char *utf8_p = &utf8_buffer[0];
00365
00366 iconv(cd, NULL, NULL, NULL, NULL);
00367 p = &buffer[0];
00368 int ret = iconv(cd, (ICONV_CONST char**)&p, &inlen, &utf8_p, &utf8_len);
00369
00370 if (ret >= 0) {
00371 *utf8_p = 0;
00372 p = base64_encode(&utf8_buffer[0], utf8_p - &utf8_buffer[0]);
00373 }
00374 else
00375 p = base64_encode(&buffer[0], buffer.size());
00376 }
00377 else
00378 p = base64_encode(&buffer[0], buffer.size());
00379 printf("%s:: %s\n", attr, p);
00380 free(p);
00381 }
00382
00383
00384
00385
00386 void print_ldif_address(const char *attr, int nvalues, char *value, ...)
00387 {
00388 bool space_flag = false;
00389 bool newline_flag = false;
00390 char *address = NULL;
00391 int len = 0;
00392 int i = 0;
00393 va_list ap;
00394
00395 va_start(ap, value);
00396
00397 while (!value) {
00398 nvalues--;
00399 if (nvalues == 0) {
00400 va_end(ap);
00401 return;
00402 }
00403 value = va_arg(ap, char *);
00404 }
00405 for (;;) {
00406 char ch = *value++;
00407
00408 if (ch == 0 || ch == '\n') {
00409 do {
00410 value = NULL;
00411 nvalues--;
00412 if (nvalues == 0) break;
00413 value = va_arg(ap, char *);
00414 } while (!value);
00415 if (!value) break;
00416 space_flag = true;
00417 newline_flag = true;
00418 }
00419 else if (ch == '\r')
00420 continue;
00421 else if (ch == '\n') {
00422 newline_flag = true;
00423 continue;
00424 }
00425 else if (ch == ' ') {
00426 space_flag = true;
00427 continue;
00428 }
00429 else {
00430 if (i > (len-5)) {
00431 len += 256;
00432 address = (char *)realloc(address, len);
00433 }
00434 if (newline_flag) {
00435 address[i++] = '$';
00436 newline_flag = false;
00437 space_flag = false;
00438 }
00439 else if (space_flag) {
00440 address[i++] = ' ';
00441 space_flag = false;
00442 }
00443 if (ch == '$' || ch == '\\') address[i++] = '\\';
00444 address[i++] = ch;
00445 }
00446 }
00447 va_end(ap);
00448 if (i == 0) return;
00449 address[i] = 0;
00450 print_ldif_single(attr, address);
00451 free(address);
00452 }
00453
00454
00455 void print_ldif_multi(const char *dn, const char *value)
00456 {
00457 const char *n;
00458 while ((n = strchr(value, '\n'))) {
00459 print_ldif_single(dn, value);
00460 value = n + 1;
00461 }
00462 print_ldif_single(dn, value);
00463 }
00464
00465
00466 void print_ldif_two(const char *attr, const char *value1, const char *value2)
00467 {
00468 size_t len1, len2;
00469 if (value1 && *value1)
00470 len1 = strlen(value1);
00471 else {
00472 print_ldif_single(attr, value2);
00473 return;
00474 }
00475
00476 if (value2 && *value2)
00477 len2 = strlen(value2);
00478 else {
00479 print_ldif_single(attr, value1);
00480 return;
00481 }
00482
00483 vector<char> value(len1 + len2 + 2);
00484 memcpy(&value[0], value1, len1);
00485 value[len1] = ' ';
00486 memcpy(&value[0] + len1 + 1, value2, len2 + 1);
00487 print_ldif_single(attr, &value[0]);
00488 }
00489
00490
00491 void build_cn(char *cn, size_t len, int nvalues, char *value, ...)
00492 {
00493 bool space_flag = false;
00494 size_t i = 0;
00495 va_list ap;
00496
00497 va_start(ap, value);
00498
00499 while (!value) {
00500 nvalues--;
00501 if (nvalues == 0) {
00502 cn[0] = 0;
00503 va_end(ap);
00504 return;
00505 }
00506 value = va_arg(ap, char *);
00507 }
00508 for (;;) {
00509 char ch = *value++;
00510
00511 if (ch == 0 || ch == '\n') {
00512 do {
00513 value = NULL;
00514 nvalues--;
00515 if (nvalues == 0) break;
00516 value = va_arg(ap, char *);
00517 } while (!value);
00518 if (!value) break;
00519 space_flag = true;
00520 }
00521 else if (ch == '\r')
00522 continue;
00523 else if (ch == ' ') {
00524 space_flag = true;
00525 continue;
00526 }
00527 else {
00528 if (space_flag) {
00529 if (i > 0) {
00530 if (i < (len - 2)) cn[i++] = ' ';
00531 else break;
00532 }
00533 space_flag = false;
00534 }
00535 if (i < (len - 1)) cn[i++] = ch;
00536 else break;
00537 }
00538 }
00539 cn[i] = 0;
00540 va_end(ap);
00541 }
00542
00543
00544 int main(int argc, char* const* argv) {
00545 pst_desc_ll *d_ptr;
00546 char *fname = NULL;
00547 int c;
00548 char *d_log = NULL;
00549 prog_name = argv[0];
00550 pst_item *item = NULL;
00551
00552 while ((c = getopt(argc, argv, "b:c:C:d:l:oVh"))!= -1) {
00553 switch (c) {
00554 case 'b':
00555 ldap_base = optarg;
00556 break;
00557 case 'c':
00558 ldap_class.push_back(string(optarg));
00559 break;
00560 case 'C':
00561 cd = iconv_open("UTF-8", optarg);
00562 if (cd == (iconv_t)(-1)) {
00563 fprintf(stderr, "I don't know character set \"%s\"!\n\n", optarg);
00564 fprintf(stderr, "Type: \"iconv --list\" to get list of known character sets\n");
00565 return 1;
00566 }
00567 break;
00568 case 'd':
00569 d_log = optarg;
00570 break;
00571 case 'h':
00572 usage();
00573 exit(0);
00574 break;
00575 case 'l':
00576 ldif_extra_line.push_back(string(optarg));
00577 break;
00578 case 'o':
00579 old_schema = true;
00580 break;
00581 case 'V':
00582 version();
00583 exit(0);
00584 break;
00585 default:
00586 usage();
00587 exit(1);
00588 break;
00589 }
00590 }
00591
00592 if ((argc > optind) && (ldap_base)) {
00593 fname = argv[optind];
00594 } else {
00595 usage();
00596 exit(2);
00597 }
00598
00599 #ifdef DEBUG_ALL
00600
00601 if (!d_log) d_log = "pst2ldif.log";
00602 #endif
00603 DEBUG_INIT(d_log);
00604 DEBUG_REGISTER_CLOSE();
00605 DEBUG_ENT("main");
00606 RET_DERROR(pst_open(&pstfile, fname), 1, ("Error opening File\n"));
00607 RET_DERROR(pst_load_index(&pstfile), 2, ("Index Error\n"));
00608
00609 pst_load_extended_attributes(&pstfile);
00610
00611 d_ptr = pstfile.d_head;
00612 item = (pst_item*)pst_parse_item(&pstfile, d_ptr);
00613 if (!item || !item->message_store) {
00614 DEBUG_RET();
00615 DIE(("main: Could not get root record\n"));
00616 }
00617
00618 d_ptr = pst_getTopOfFolders(&pstfile, item);
00619 if (!d_ptr) {
00620 DEBUG_RET();
00621 DIE(("Top of folders record not found. Cannot continue\n"));
00622 }
00623
00624 pst_freeItem(item);
00625
00626 if (old_schema && (strlen(ldap_base) > 2)) {
00627 char *ldap_org = strdup(ldap_base+2);
00628 char *temp = strchr(ldap_org, ',');
00629 if (temp) {
00630 *temp = '\0';
00631
00632 printf("dn: %s\n", ldap_base);
00633 printf("o: %s\n", ldap_org);
00634 printf("objectClass: organization\n\n");
00635 printf("dn: cn=root, %s\n", ldap_base);
00636 printf("cn: root\n");
00637 printf("sn: root\n");
00638 for (vector<string>::size_type i=0; i<ldap_class.size(); i++)
00639 print_ldif_single("objectClass", ldap_class[i].c_str());
00640 printf("\n");
00641 }
00642 }
00643
00644 process(d_ptr->child);
00645 pst_close(&pstfile);
00646 DEBUG_RET();
00647 free_strings(all_strings);
00648 if (cd) iconv_close(cd);
00649
00650 return 0;
00651 }
00652
00653
00654 void usage(void) {
00655 version();
00656 printf("Usage: %s [OPTIONS] {PST FILENAME}\n", prog_name);
00657 printf("OPTIONS:\n");
00658 printf("\t-V\t- Version. Display program version\n");
00659 printf("\t-C charset\t- assumed character set of non-ASCII characters\n");
00660 printf("\t-b ldapbase\t- set the LDAP base value\n");
00661 printf("\t-c class\t- set the class of the LDAP objects (may contain more than one)\n");
00662 printf("\t-d <filename>\t- Debug to file. This is a binary log. Use readpstlog to print it\n");
00663 printf("\t-h\t- Help. This screen\n");
00664 printf("\t-l line\t- extra line to insert in the LDIF file for each contact\n");
00665 printf("\t-o\t- use old schema, default is new schema\n");
00666 }
00667
00668
00669 void version(void) {
00670 printf("pst2ldif v%s\n", VERSION);
00671 #if BYTE_ORDER == BIG_ENDIAN
00672 printf("Big Endian implementation being used.\n");
00673 #elif BYTE_ORDER == LITTLE_ENDIAN
00674 printf("Little Endian implementation being used.\n");
00675 #else
00676 # error "Byte order not supported by this library"
00677 #endif
00678 #ifdef __GNUC__
00679 printf("GCC %d.%d : %s %s\n", __GNUC__, __GNUC_MINOR__, __DATE__, __TIME__);
00680 #endif
00681 }
00682
00683
00684 char *check_filename(char *fname) {
00685 char *t = fname;
00686 if (t == NULL) {
00687 return fname;
00688 }
00689 while ((t = strpbrk(t, "/\\:"))) {
00690
00691 *t = '_';
00692 }
00693 return fname;
00694 }
00695
00696
00697
00698 void print_ldif_dn(const char *attr, const char *value, const char *base)
00699 {
00700 printf("dn: cn=");
00701
00702 while (*value == ' ')
00703 value++;
00704
00705 print_escaped_dn(value);
00706 if (base && base[0]) {
00707 printf(", %s", base);
00708 }
00709 printf("\n");
00710 return;
00711 }
00712
00713
00714 void print_escaped_dn(const char *value)
00715 {
00716 char ch;
00717 bool needs_code_conversion = false;
00718 char *utf8_buffer = NULL;
00719
00720
00721 if (cd) {
00722 const char *p = value;
00723 while (*p) {
00724 if (*p++ & 0x80) {
00725 needs_code_conversion = true;
00726 break;
00727 }
00728 }
00729 }
00730
00731 if (needs_code_conversion) {
00732 size_t inlen = strlen(value);
00733 size_t utf8_len = 2 * inlen + 1;
00734 char *p = (char *)value;
00735 char *utf8_p = utf8_buffer;
00736
00737 utf8_buffer = (char *)malloc(utf8_len);
00738 utf8_p = utf8_buffer;
00739 iconv(cd, NULL, NULL, NULL, NULL);
00740 if (iconv(cd, (ICONV_CONST char**)&p, &inlen, &utf8_p, &utf8_len) >= 0) {
00741 *utf8_p = 0;
00742 value = utf8_buffer;
00743 }
00744 }
00745
00746
00747 if (*value == '#' || *value == ' ')
00748 putchar('\\');
00749
00750 while ((ch = *value++) != 0) {
00751 if (((ch & 0x80) != 0) || (ch <= 0x1F))
00752
00753 printf("\\%2.2X", ch & 0xFF);
00754 else switch (ch) {
00755 case '\\':
00756 case '"' :
00757 case '+' :
00758 case ',' :
00759 case ';' :
00760 case '<' :
00761 case '>' :
00762 putchar('\\');
00763
00764 default:
00765 putchar(ch);
00766 }
00767 }
00768 if (utf8_buffer) free((void *)utf8_buffer);
00769 return;
00770 }