Main Page | Namespace List | Alphabetical List | Data Structures | Directories | File List | Data Fields | Globals

pst2ldif.cpp

Go to the documentation of this file.
00001 /*
00002 
00003 Copyright (c) 2004 Carl Byington - 510 Software Group, released under
00004 the GPL version 2 or any later version at your choice available at
00005 http://www.fsf.org/licenses/gpl.txt
00006 
00007 Based on readpst.c by David Smith
00008 
00009 */
00010 
00011 using namespace std;
00012 
00013 // needed for std c++ collections
00014 #include <set>
00015 
00016 extern "C" {
00017     #include "define.h"
00018     #include "libstrfunc.h"
00019     #include "libpst.h"
00020     #include "common.h"
00021     #include "timeconv.h"
00022     #include "lzfu.h"
00023 }
00024 
00025 int32_t   usage();
00026 int32_t   version();
00027 char       *my_stristr(char *haystack, char *needle);
00028 char       *check_filename(char *fname);
00029 const char *single(char *str);
00030 const char *folded(char *str);
00031 void        multi(const char *fmt, char *str);
00032 char       *rfc2426_escape(char *str);
00033 int32_t     chr_count(char *str, char x);
00034 
00035 char *prog_name;
00036 pst_file pstfile;
00037 char *ldap_base  = NULL;    // 'o=some.domain.tld, c=US'
00038 char *ldap_class = NULL;    // 'newPerson'
00039 char *ldap_org   = NULL;    // 'o=some.domain.tld', computed from ldap_base
00040 
00041 
00043 // define our ordering
00044 struct ltstr {
00045     bool operator()(const char* s1, const char* s2) const {
00046         return strcasecmp(s1, s2) < 0;
00047     }
00048 };
00049 // define our set
00050 typedef set<const char *, ltstr>    string_set;
00051 // make a static set to hold the cn values
00052 static string_set all_strings;
00053 
00054 
00056 // helper to free all the strings in a set
00057 //
00058 static void free_strings(string_set &s);
00059 static void free_strings(string_set &s)
00060 {
00061     for (string_set::iterator i=s.begin(); i!=s.end(); i++) {
00062         free((void*)*i);
00063     }
00064     s.clear();
00065 }
00066 
00067 
00069 // helper to register a string in a string set
00070 //
00071 static const char* register_string(string_set &s, const char *name);
00072 static const char* register_string(string_set &s, const char *name) {
00073     string_set::const_iterator i = s.find(name);
00074     if (i != s.end()) return *i;
00075     char *x = strdup(name);
00076     s.insert(x);
00077     return x;
00078 }
00079 
00081 // register a global string
00082 //
00083 static const char* register_string(const char *name);
00084 static const char* register_string(const char *name) {
00085     return register_string(all_strings, name);
00086 }
00087 
00088 
00090 // make a unique string
00091 //
00092 static const char* unique_string(const char *name);
00093 static const char* unique_string(const char *name) {
00094     int  unique = 2;
00095     string_set::iterator i = all_strings.find(name);
00096     if (i == all_strings.end()) return register_string(name);
00097     while (true) {
00098         char n[strlen(name)+10];
00099         snprintf(n, sizeof(n), "%s %d", name, unique++);
00100         string_set::iterator i = all_strings.find(n);
00101         if (i == all_strings.end()) return register_string(n);
00102     }
00103 }
00104 
00105 
00107 // remove leading and trailing blanks
00108 //
00109 static char *trim(char *name);
00110 static char *trim(char *name) {
00111     char *p;
00112     while (*name == ' ') name++;
00113     p = name + strlen(name) - 1;
00114     while ((p >= name) && (*p == ' ')) *p-- = '\0';
00115     return name;
00116 }
00117 
00118 
00119 static void process(pst_desc_ll *d_ptr);
00120 static void process(pst_desc_ll *d_ptr) {
00121     pst_item *item = NULL;
00122     while (d_ptr) {
00123         if (d_ptr->desc) {
00124             item = pst_parse_item(&pstfile, d_ptr);
00125             DEBUG_INFO(("item pointer is %p\n", item));
00126             if (item) {
00127                 if (item->folder && d_ptr->child && strcasecmp(item->file_as, "Deleted Items")) {
00128                     //if this is a non-empty folder other than deleted items, we want to recurse into it
00129                     fprintf(stderr, "entering folder %s\n", item->file_as);
00130                     process(d_ptr->child);
00131 
00132                 } else if (item->contact && (item->type == PST_TYPE_CONTACT)) {
00133                     // deal with a contact
00134                     char cn[1000];
00135                     snprintf(cn, sizeof(cn), "%s %s %s %s",
00136                         single(item->contact->display_name_prefix),
00137                         single(item->contact->first_name),
00138                         single(item->contact->surname),
00139                         single(item->contact->suffix));
00140                     if (strcmp(cn, "   ")) {
00141                         // have a valid cn
00142                         const char *ucn = unique_string(folded(trim(cn)));
00143                         printf("dn: cn=%s, %s\n", ucn, ldap_base);
00144                         printf("cn: %s\n", ucn);
00145                         if (item->contact->first_name) {
00146                             snprintf(cn, sizeof(cn), "%s %s",
00147                                 single(item->contact->display_name_prefix),
00148                                 single(item->contact->first_name));
00149                             printf("givenName: %s\n", trim(cn));
00150                         }
00151                         if (item->contact->surname) {
00152                             snprintf(cn, sizeof(cn), "%s %s",
00153                                 single(item->contact->surname),
00154                                 single(item->contact->suffix));
00155                             printf("sn: %s\n", trim(cn));
00156                         }
00157                         else if (item->contact->company_name) {
00158                             printf("sn: %s\n", single(item->contact->company_name));
00159                         }
00160                         else
00161                             printf("sn: %s\n", ucn);    // use cn as sn if we cannot find something better
00162 
00163                         if (item->contact->job_title)
00164                             printf("personalTitle: %s\n", single(item->contact->job_title));
00165                         if (item->contact->company_name)
00166                             printf("company: %s\n", single(item->contact->company_name));
00167                         if (item->contact->address1  && *item->contact->address1)
00168                             printf("mail: %s\n", single(item->contact->address1));
00169                         if (item->contact->address2  && *item->contact->address2)
00170                             printf("mail: %s\n", single(item->contact->address2));
00171                         if (item->contact->address3  && *item->contact->address3)
00172                             printf("mail: %s\n", single(item->contact->address3));
00173                         if (item->contact->address1a && *item->contact->address1a)
00174                             printf("mail: %s\n", single(item->contact->address1a));
00175                         if (item->contact->address2a && *item->contact->address2a)
00176                             printf("mail: %s\n", single(item->contact->address2a));
00177                         if (item->contact->address3a && *item->contact->address3a)
00178                             printf("mail: %s\n", single(item->contact->address3a));
00179                         if (item->contact->business_address) {
00180                             if (item->contact->business_po_box)
00181                                 printf("postalAddress: %s\n", single(item->contact->business_po_box));
00182                             if (item->contact->business_street)
00183                                 multi("postalAddress: %s\n", item->contact->business_street);
00184                             if (item->contact->business_city)
00185                                 printf("l: %s\n", single(item->contact->business_city));
00186                             if (item->contact->business_state)
00187                                 printf("st: %s\n", single(item->contact->business_state));
00188                             if (item->contact->business_postal_code)
00189                                 printf("postalCode: %s\n", single(item->contact->business_postal_code));
00190                         }
00191                         else if (item->contact->home_address) {
00192                             if (item->contact->home_po_box)
00193                                 printf("postalAddress: %s\n", single(item->contact->home_po_box));
00194                             if (item->contact->home_street)
00195                                 multi("postalAddress: %s\n", item->contact->home_street);
00196                             if (item->contact->home_city)
00197                                 printf("l: %s\n", single(item->contact->home_city));
00198                             if (item->contact->home_state)
00199                                 printf("st: %s\n", single(item->contact->home_state));
00200                             if (item->contact->home_postal_code)
00201                                 printf("postalCode: %s\n", single(item->contact->home_postal_code));
00202                         }
00203                         else if (item->contact->other_address) {
00204                             if (item->contact->other_po_box)
00205                                 printf("postalAddress: %s\n", single(item->contact->other_po_box));
00206                             if (item->contact->other_street)
00207                                 multi("postalAddress: %s\n", item->contact->other_street);
00208                             if (item->contact->other_city)
00209                                 printf("l: %s\n", single(item->contact->other_city));
00210                             if (item->contact->other_state)
00211                                 printf("st: %s\n", single(item->contact->other_state));
00212                             if (item->contact->other_postal_code)
00213                                 printf("postalCode: %s\n", single(item->contact->other_postal_code));
00214                         }
00215                         if (item->contact->business_fax)
00216                             printf("facsimileTelephoneNumber: %s\n", single(item->contact->business_fax));
00217                         else if (item->contact->home_fax)
00218                             printf("facsimileTelephoneNumber: %s\n", single(item->contact->home_fax));
00219 
00220                         if (item->contact->business_phone)
00221                             printf("telephoneNumber: %s\n", single(item->contact->business_phone));
00222                         if (item->contact->home_phone)
00223                             printf("homePhone: %s\n", single(item->contact->home_phone));
00224 
00225                         if (item->contact->car_phone)
00226                             printf("mobile: %s\n", single(item->contact->car_phone));
00227                         else if (item->contact->mobile_phone)
00228                             printf("mobile: %s\n", single(item->contact->mobile_phone));
00229                         else if (item->contact->other_phone)
00230                             printf("mobile: %s\n", single(item->contact->other_phone));
00231 
00232 
00233                         if (item->comment)
00234                             printf("description: %s\n", single(item->comment));
00235 
00236                         printf("objectClass: %s\n\n", ldap_class);
00237                     }
00238                 }
00239                 else {
00240                     DEBUG_INFO(("item is not a contact\n"));
00241                 }
00242             }
00243             pst_freeItem(item);
00244         }
00245         d_ptr = d_ptr->next;
00246     }
00247 }
00248 
00249 
00250 int main(int argc, char** argv) {
00251     pst_desc_ll *d_ptr;
00252     char *fname = NULL;
00253     char *temp = NULL;        //temporary char pointer
00254     char c;
00255     char *d_log = NULL;
00256     prog_name = argv[0];
00257     pst_item *item = NULL;
00258 
00259     while ((c = getopt(argc, argv, "b:c:d:Vh"))!= -1) {
00260         switch (c) {
00261         case 'b':
00262             ldap_base = optarg;
00263             temp = strchr(ldap_base, ',');
00264             if (temp) {
00265                 *temp = '\0';
00266                 ldap_org = strdup(ldap_base+2); // assume first 2 chars are o=
00267                 *temp = ',';
00268             }
00269             break;
00270         case 'c':
00271             ldap_class = optarg;
00272             break;
00273         case 'd':
00274             d_log = optarg;
00275             break;
00276         case 'h':
00277             usage();
00278             exit(0);
00279             break;
00280         case 'V':
00281             version();
00282             exit(0);
00283             break;
00284         default:
00285             usage();
00286             exit(1);
00287             break;
00288         }
00289     }
00290 
00291     if ((argc > optind) && (ldap_base) && (ldap_class) && (ldap_org)) {
00292         fname = argv[optind];
00293     } else {
00294         usage();
00295         exit(2);
00296     }
00297 
00298     #ifdef DEBUG_ALL
00299         // force a log file
00300         if (!d_log) d_log = "pst2ldif.log";
00301     #endif
00302     DEBUG_INIT(d_log);
00303     DEBUG_REGISTER_CLOSE();
00304     DEBUG_ENT("main");
00305     RET_DERROR(pst_open(&pstfile, fname), 1, ("Error opening File\n"));
00306     RET_DERROR(pst_load_index(&pstfile), 2, ("Index Error\n"));
00307 
00308     pst_load_extended_attributes(&pstfile);
00309 
00310     d_ptr = pstfile.d_head; // first record is main record
00311     item  = (pst_item*)pst_parse_item(&pstfile, d_ptr);
00312     if (!item || !item->message_store) {
00313         DEBUG_RET();
00314         DIE(("main: Could not get root record\n"));
00315     }
00316 
00317     d_ptr = pst_getTopOfFolders(&pstfile, item);
00318     if (!d_ptr) {
00319         DEBUG_RET();
00320         DIE(("Top of folders record not found. Cannot continue\n"));
00321     }
00322 
00323     pst_freeItem(item);
00324 
00325     // write the ldap header
00326     printf("dn: %s\n", ldap_base);
00327     printf("o: %s\n", ldap_org);
00328     printf("objectClass: organization\n\n");
00329     printf("dn: cn=root, %s\n", ldap_base);
00330     printf("cn: root\n");
00331     printf("objectClass: %s\n\n", ldap_class);
00332 
00333     process(d_ptr->child);  // do the children of TOPF
00334     pst_close(&pstfile);
00335     DEBUG_RET();
00336     free_strings(all_strings);
00337     return 0;
00338 }
00339 
00340 
00341 int usage() {
00342     version();
00343     printf("Usage: %s [OPTIONS] {PST FILENAME}\n", prog_name);
00344     printf("OPTIONS:\n");
00345     printf("\t-h\t- Help. This screen\n");
00346     printf("\t-V\t- Version. Display program version\n");
00347     printf("\t-b ldapbase\t- set the ldap base value\n");
00348     printf("\t-c class   \t- set the class of the ldap objects\n");
00349     return 0;
00350 }
00351 
00352 
00353 int version() {
00354     printf("pst2ldif v%s\n", VERSION);
00355 #if BYTE_ORDER == BIG_ENDIAN
00356     printf("Big Endian implementation being used.\n");
00357 #elif BYTE_ORDER == LITTLE_ENDIAN
00358     printf("Little Endian implementation being used.\n");
00359 #else
00360 #  error "Byte order not supported by this library"
00361 #endif
00362 #ifdef __GNUC__
00363     printf("GCC %d.%d : %s %s\n", __GNUC__, __GNUC_MINOR__, __DATE__, __TIME__);
00364 #endif
00365     return 0;
00366 }
00367 
00368 
00369 // my_stristr varies from strstr in that its searches are case-insensitive
00370 char * my_stristr(char *haystack, char *needle) {
00371     char *x=haystack, *y=needle, *z = NULL;
00372     if (haystack == NULL || needle == NULL)
00373         return NULL;
00374     while (*y != '\0' && *x != '\0') {
00375         if (tolower(*y) == tolower(*x)) {
00376             // move y on one
00377             y++;
00378             if (z == NULL) {
00379         z = x; // store first position in haystack where a match is made
00380             }
00381         } else {
00382             y = needle; // reset y to the beginning of the needle
00383             z = NULL; // reset the haystack storage point
00384         }
00385         x++; // advance the search in the haystack
00386     }
00387     return z;
00388 }
00389 
00390 
00391 char *check_filename(char *fname) {
00392     char *t = fname;
00393     if (t == NULL) {
00394         return fname;
00395     }
00396     while ((t = strpbrk(t, "/\\:"))) {
00397         // while there are characters in the second string that we don't want
00398         *t = '_'; //replace them with an underscore
00399     }
00400     return fname;
00401 }
00402 
00403 
00404 const char *single(char *str) {
00405     if (!str) return "";
00406     char *ret = rfc2426_escape(str);
00407     char *n = strchr(ret, '\n');
00408     if (n) *n = '\0';
00409     return ret;
00410 }
00411 
00412 
00413 const char *folded(char *str) {
00414     if (!str) return "";
00415     char *ret = rfc2426_escape(str);
00416     char *n = ret;
00417     while ((n = strchr(n, '\n'))) {
00418         *n = ' ';
00419     }
00420     n = ret;
00421     while ((n = strchr(n, ','))) {
00422         *n = ' ';
00423     }
00424     return ret;
00425 }
00426 
00427 
00428 void multi(const char *fmt, char *str) {
00429     if (!str) return;
00430     char *ret = rfc2426_escape(str);
00431     char *n = ret;
00432     while ((n = strchr(ret, '\n'))) {
00433         *n = '\0';
00434         printf(fmt, ret);
00435         ret = n+1;
00436     }
00437     if (*ret) printf(fmt, ret);
00438 }
00439 
00440 
00441 char *rfc2426_escape(char *str) {
00442     static char* buf = NULL;
00443     char *ret, *a, *b;
00444     int x = 0, y, z;
00445     if (str == NULL)
00446         ret = str;
00447     else {
00448 
00449         // calculate space required to escape all the following characters
00450         y = chr_count(str, '\\')
00451           + chr_count(str, ';');
00452         z = chr_count(str, '\r');
00453         if (y == 0 && z == 0)
00454             // there isn't any extra space required
00455             ret = str;
00456         else {
00457             x = strlen(str) + y - z + 1; // don't forget room for the NUL
00458             buf = (char*) realloc(buf, x);
00459             a = str;
00460             b = buf;
00461             while (*a != '\0') {
00462                 switch(*a) {
00463                     case '\\':
00464                     case ';' :
00465                         *(b++)='\\';
00466                         *b=*a;
00467                     break;
00468                     case '\r':  // skip cr
00469                         b--;
00470                         break;
00471                     default:
00472                         *b=*a;
00473                 }
00474                 b++;
00475                 a++;
00476             }
00477             *b = '\0'; // NUL-terminate the string (buf)
00478             ret = buf;
00479         }
00480     }
00481     return ret;
00482 }
00483 
00484 
00485 int chr_count(char *str, char x) {
00486     int r = 0;
00487     while (*str != '\0') {
00488         if (*str == x)
00489             r++;
00490         str++;
00491     }
00492     return r;
00493 }
00494 

Generated on Tue Aug 5 12:06:14 2008 for 'LibPst' by  doxygen 1.3.9.1