Logo Search packages:      
Sourcecode: beagle version File versions  Download package

KMailIndexer.cs

//
// KMailIndexer.cs
//
// Copyright (C) 2005 Novell, Inc.
// Copyright (C) 2005 Debajyoti Bera
//
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
//

using System;
using System.Collections;
using System.IO;

using Beagle.Util;
using Beagle.Daemon;

namespace Beagle.Daemon.KMailQueryable {
      
      /**
       * Main indexer class
       * The bulk of the indexing work is done here
       */
00041       public class KMailIndexer {
            // location of mail folder
            private string mail_root;
            public string MailRoot {
                  get { return mail_root; }
            }
            // account name for this folder
            private string account_name;
            public string AccountName {
                  get { return account_name; }
            }
            // mail folders not to scan
            private ArrayList excludes;
            // list of maildir directories which store mails in cur/, new/, tmp/ subdirs
            private ArrayList mail_directories;
            // list of directories which contain mbox files and other mail folders
            private ArrayList folder_directories;
            // list of mbox files
            private ArrayList mbox_files;
            // also store the queryable
            private KMailQueryable queryable;
            public KMailQueryable Queryable {
                get { return queryable; }
            }
            
            private string lastGoodDirPath = ""; // cache last successful directory

            public KMailIndexer (KMailQueryable queryable, string account, string root)
            {
                  this.queryable = queryable;
                  account_name = account;
                  mail_root = root;
                  mail_directories = new ArrayList ();
                  Logger.Log.Debug ("mail_directories created for:" + mail_root + " (" + mail_directories.Count + ")");
                  folder_directories = new ArrayList ();
                  mbox_files = new ArrayList ();

                  excludes = new ArrayList ();
                  excludes.Add ("spam");
                  excludes.Add ("outbox");
                  excludes.Add ("trash");
                  excludes.Add ("drafts");
            }

            /**
             * inotify callback
             */
00088             private void OnInotifyEvent (Inotify.Watch watch,
                                   string path,
                                   string subitem,
                                   string srcpath,
                                   Inotify.EventType type)
            {
                  //FIXME this case should NEVER occur, still it does
                  if (mail_directories == null) {
                        Logger.Log.Debug ("*** WEIRD AVIRAM CASE for :" + mail_root);
                        Logger.Log.Debug ("Received inotify event{3} for {4}: path={0}, subitem={1}, srcpath={2}", path, subitem, srcpath, type, mail_root);
                        return;
                  }
                  
                  if (subitem == "")
                        return;
                  string fullPath = Path.Combine (path, subitem);

                  // we need to watch for all kinds of events - this is tricky

                  // Case: new file is created
                  // - if it is one of the folder_directories, index it
                  // - if is in one of the mail_directories, index it if it is an mbox file
                  if ((type & Inotify.EventType.Create) != 0 && (type & Inotify.EventType.IsDirectory) == 0) {
                        if (IsMailDir (path)) {
                              Indexable indexable = MaildirMessageToIndexable (fullPath);
                              AddIndexableTask (indexable, fullPath);
                        } else {
                              // add mbox file to mbox_files
                              string mbox = GetMboxFile (path, subitem);
                              if (mbox != null) {
                                    mbox_files.Add (mbox);
                                    IndexMbox (mbox, true);
                              }
                        }
                        return;
                  }

                  // Case: file is deleted
                  // - if it is a mail file, we might like it to be deleted
                  if ((type & Inotify.EventType.MovedFrom) != 0 ||
                      ((type & Inotify.EventType.Delete) != 0 &&
                      (type & Inotify.EventType.IsDirectory) == 0)) {
                        if (IsMailDir (path))
                              RemoveMail (fullPath);
                        else if (mbox_files.Contains (fullPath)) {
                              RemoveMbox (fullPath);
                              mbox_files.Remove (fullPath);
                        }
                        return;
                  }

                  // Case: file is moved
                  // - files are moved from tmp/new to cur
                  // - need to delete from the source
                  if ((type & Inotify.EventType.MovedTo) != 0 && (type & Inotify.EventType.IsDirectory) == 0) {
                        if (IsMailDir (path)) {
                              Indexable indexable = MaildirMessageToIndexable (fullPath);
                              AddIndexableTask (indexable, fullPath);
                        }
                        if (IsMailDir (srcpath))
                              RemoveMail (srcpath);
                        if (mbox_files.Contains (fullPath)) {
                              // check if this because of compaction, in which case need to delete previous mbox
                              if (srcpath != null && srcpath.EndsWith ("." + subitem + ".compacted"))
                                    RemoveMbox (fullPath);
                              // FIXME need to ensure IndexMbox is scheduled *after* RemoveMbox finishes
                              // RemoveMbox creates a job with immediate priority while
                              // IndexMbox creates a job with the default priority of a generator
                              // Is there a better way to ensure the order ?
                              IndexMbox (fullPath, true);
                        }
                        return;
                  }

                  // Case: file is modified i.e. there was no create event but closewrite event
                  // - possibly some mbox was changed
                  // FIXME kmail doesnt physically delete the deleted mails from mbox files unless compacted
                  // - which means one has to read the .index files to find deleted messages...
                  // - need to find the format of the .index/.index.ids etc files and parse them
                  if ((type & Inotify.EventType.Modify) != 0 && (type & Inotify.EventType.IsDirectory) == 0) {
                        if (mbox_files.Contains (fullPath))
                              IndexMbox (fullPath, false);
                        return;
                  }

                  // Case: a directory is created:
                  // well watch it anyway but also make sure its a maildir directory
                  // if it a maildir directory, then add it to maildir_dirs
                  if ((type & Inotify.EventType.Create) != 0 && (type & Inotify.EventType.IsDirectory) != 0) {
                        if (!IgnoreFolder (fullPath)) {
                              Watch (fullPath);
                              UpdateDirectories(fullPath);
                        }
                        return;
                  }

                  // Case: if a directory is deleted:
                  // remove watch
                  if ((type & Inotify.EventType.Delete) != 0 && (type & Inotify.EventType.IsDirectory) != 0) {
                        watch.Unsubscribe ();
                        mail_directories.Remove (fullPath);
                        folder_directories.Remove (fullPath);
                        return;
                  }

                  // Case: directory is moved
                  // FIXME: implement renaming of mail folders
                  
            }

            /**
             * Add watch to the parameter directory and its subdirs, recursively
             */
00201             public void Watch (string path)
            {
                  DirectoryInfo root = new DirectoryInfo (path);
                  if (! root.Exists)
                        return;

                  Queue queue = new Queue ();
                  queue.Enqueue (root);

                  while (queue.Count > 0) {
                        DirectoryInfo dir = queue.Dequeue () as DirectoryInfo;

                        if (! dir.Exists)
                              continue;

                        //log.Debug ("Adding inotify watch to " + dir.FullName);
                        Inotify.Subscribe (dir.FullName, OnInotifyEvent,
                                          Inotify.EventType.Create
                                          | Inotify.EventType.Delete
                                          | Inotify.EventType.MovedFrom
                                          | Inotify.EventType.MovedTo);

                        foreach (DirectoryInfo subdir in DirectoryWalker.GetDirectoryInfos (dir))
                              queue.Enqueue (subdir);
                  }
            }
            
            /**
             * Recursively traverse the files and dirctories under mail_root
             * to find files that need to be indexed, directories that
             * need to be watched for changes
             */
00233             public void Crawl ()
            {
                  if (!Directory.Exists (mail_root))
                        return;

                  mail_directories.Clear ();
                  folder_directories.Clear ();
                  mbox_files.Clear();

                  Queue pending = new Queue ();
                  pending.Enqueue (mail_root);
                  folder_directories.Add (mail_root);
                  // add inotify watch to root folder
                  if (Inotify.Enabled)
                        Inotify.Subscribe (mail_root, OnInotifyEvent,
                              Inotify.EventType.Create
                              | Inotify.EventType.Delete
                              | Inotify.EventType.MovedFrom
                              | Inotify.EventType.MovedTo
                              | Inotify.EventType.Modify);

                  while (pending.Count > 0) {

                        string dir = (string) pending.Dequeue ();
                        Logger.Log.Debug ("Searching for mbox and maildirs in " + dir);

                        foreach (FileInfo fi in DirectoryWalker.GetFileInfos (dir)) {
                              if (!fi.Name.EndsWith (".index"))
                                    continue;
                              string indexFile = fi.Name;
                              string mailFolderName = 
                                    indexFile.Substring (1, indexFile.LastIndexOf (".index")-1);
                              string mailFolder = Path.Combine (dir, mailFolderName);
                              if (IgnoreFolder (mailFolder))
                                    continue;
                              if (Directory.Exists (mailFolder)) {
                                    mail_directories.Add (mailFolder);
                                    if (Inotify.Enabled)
                                          Watch (mailFolder);
                              } else if (File.Exists (mailFolder)) {
                                    mbox_files.Add (mailFolder);
                              }
                              // if there is a directory with name .<mailFolderName>.directory
                              // then it contains sub-folders
                              string subFolder = 
                                    Path.Combine (dir, "." + mailFolderName + ".directory");
                              if (Directory.Exists (subFolder)) {
                                    pending.Enqueue (subFolder);
                                    folder_directories.Add (subFolder);
                                    if (Inotify.Enabled)
                                          Inotify.Subscribe (subFolder, OnInotifyEvent,
                                                Inotify.EventType.Create
                                                | Inotify.EventType.Delete
                                                | Inotify.EventType.MovedFrom
                                                | Inotify.EventType.MovedTo
                                                | Inotify.EventType.Modify);
                              }
                        }
                  }     

                  // copy the contents as mail_directories, mbox_files might change due to async events
                  ArrayList _mail_directories = new ArrayList (mail_directories);
                  ArrayList _mbox_files = new ArrayList (mbox_files);
                  
                  if (queryable.ThisScheduler.ContainsByTag (mail_root)) {
                        Logger.Log.Debug ("Not adding task for already running task: {0}", mail_root);
                        return;
                  } else {
                        KMaildirIndexableGenerator generator = new KMaildirIndexableGenerator (this, _mail_directories);
                        AddIIndexableTask (generator, mail_root);
                  }

                  foreach (string mbox_file in _mbox_files) {
                        IndexMbox (mbox_file, true);
                  }
            }

            private void AddIndexableTask (Indexable indexable, string tag)
            {
                  if (indexable == null)
                        return;

                  Scheduler.Task task = queryable.NewAddTask (indexable);
                  task.Priority = Scheduler.Priority.Immediate;
                  task.Tag = tag;
                  queryable.ThisScheduler.Add (task);
            }     

            private void AddIIndexableTask (IIndexableGenerator generator, string tag)
            {
                  if (generator == null)
                        return;

                  Scheduler.Task task = queryable.NewAddTask (generator);
                  task.Tag = tag;
                  queryable.ThisScheduler.Add (task);
            }     

            /**
             * Start a task for indexing an mbox file
             */
00334             public void IndexMbox (string mbox_file, bool initial_scan)
            {
                  if (queryable.ThisScheduler.ContainsByTag (mbox_file)) {
                        Logger.Log.Debug ("Not adding task for already running task: {0}", mbox_file);
                        return;
                  }

                  //Logger.Log.Debug ("Creating task to index mbox {0}", mbox_file);
                  KMailMboxIndexableGenerator generator = new KMailMboxIndexableGenerator (this, mbox_file, initial_scan);
                  AddIIndexableTask (generator, mbox_file);
            }

            /**
             * Remove maildir mail file
             */
00349             private void RemoveMail (string file)
            {
                  Logger.Log.Debug ("Removing mail:" + file);
                  Uri uri = UriFu.PathToFileUri (file);
                  Scheduler.Task task = queryable.NewRemoveTask (uri);
                  task.Priority = Scheduler.Priority.Immediate;
                  task.SubPriority = 0;
                  queryable.ThisScheduler.Add (task);
            }

            /** 
             * Create an indexable from a maildir message
             */
00362             public Indexable MaildirMessageToIndexable (string filename)
            {
                  //Logger.Log.Debug ("+ indexing maildir mail:" + filename);
                  String folder = GetFolderMaildir(filename);
                  Uri file_uri = UriFu.PathToFileUri (filename);

                  Indexable indexable = new Indexable (file_uri);
                  indexable.HitType = "MailMessage";
                  indexable.MimeType = "message/rfc822";
                  indexable.CacheContent = false;

                  indexable.AddProperty (Property.NewUnsearched ("fixme:client", "kmail"));
                  indexable.AddProperty (Property.NewUnsearched ("fixme:account", account_name));
                        indexable.AddProperty (Property.NewUnsearched ("fixme:folder", folder));
                  indexable.ContentUri = file_uri;

                  return indexable;
            }
      
            /**
             * Create an indexable from an mbox message
             * Most of the code here is from Evo backend
             */
00385             public Indexable MessageToIndexable (string file_name, System.Uri uri, GMime.Message message, string folder_name)
            {
                  //Logger.Log.Debug ("Indexing " + uri + " in folder " + folder_name);
                  Indexable indexable = new Indexable (uri);
                  // set parent uri to the filename so that when an mbox file
                  // is deleted, all the messages in that file can be deleted
                  indexable.ParentUri = UriFu.PathToFileUri (file_name);

                  indexable.Timestamp = message.Date.ToUniversalTime ();
                  indexable.HitType = "MailMessage";
                  indexable.MimeType = "message/rfc822";
                  indexable.CacheContent = false;

                  indexable.AddProperty (Property.NewUnsearched ("fixme:client", "kmail"));
                  indexable.AddProperty (Property.NewUnsearched ("fixme:account", account_name));
                        indexable.AddProperty (Property.NewUnsearched ("fixme:folder", folder_name));

                  GMime.InternetAddressList addrs;

                  addrs = message.GetRecipients (GMime.Message.RecipientType.To);
                  foreach (GMime.InternetAddress ia in addrs) {
                        if (folder_name == Queryable.SentMailFolderName && ia.AddressType != GMime.InternetAddressType.Group)
                              indexable.AddProperty (Property.NewKeyword ("fixme:sentTo", ia.Addr));
                  }
                  addrs.Dispose ();

                  addrs = message.GetRecipients (GMime.Message.RecipientType.Cc);
                  foreach (GMime.InternetAddress ia in addrs) {
                        if (folder_name == Queryable.SentMailFolderName && ia.AddressType != GMime.InternetAddressType.Group)
                              indexable.AddProperty (Property.NewKeyword ("fixme:sentTo", ia.Addr));
                  }
                  addrs.Dispose ();

                  addrs = GMime.InternetAddressList.ParseString (GMime.Utils.HeaderDecodePhrase (message.Sender));
                  foreach (GMime.InternetAddress ia in addrs) {
                        if (folder_name != Queryable.SentMailFolderName && ia.AddressType != GMime.InternetAddressType.Group)
                              indexable.AddProperty (Property.NewKeyword ("fixme:gotFrom", ia.Addr));
                  }
                  addrs.Dispose ();

                  if (folder_name == Queryable.SentMailFolderName)
                        indexable.AddProperty (Property.NewFlag ("fixme:isSent"));
                  else {
                        string kmail_msg_sent = message.GetHeader ("X-KMail-Link-Type");
                        if (kmail_msg_sent == "reply")
                              indexable.AddProperty (Property.NewFlag ("fixme:isSent"));
                  }
                        
// no need to store date again, use the issent flag to determine if the date is sentdate or not             
#if false
                  if (folder_name == Queryable.SentMailFolderName)
                        indexable.AddProperty (Property.NewDate ("fixme:sentdate", message.Date.ToUniversalTime ()));
                  else
                        indexable.AddProperty (Property.NewDate ("fixme:received", message.Date.ToUniversalTime ()));
#endif

                  indexable.SetBinaryStream (message.Stream);

                  return indexable;
            }
            
            /**
             * deleting mbox means deleting all the mails which were in this mbox
             * we use the idea of parent-uri
             * while creating indexables, we set the parent uri to be the uri of the mbox file
             * so to delete all mails in the mbox we just delete all documents whose parent uri
             *     is the uri of the mbox file
             */
00453             public void RemoveMbox (string file)
            {
                  Logger.Log.Debug ("Removing mbox:" + file);
                  Uri uri = UriFu.PathToFileUri (file);
                  Scheduler.Task task = queryable.NewRemoveTask (uri);
                  task.Priority = Scheduler.Priority.Immediate;
                  task.SubPriority = 0;
                  queryable.ThisScheduler.Add (task);
            }

            ///////////////////////////////////////////////////////////

            // Helpers

            /**
             * a maildir is of format:
             * some_dir_in_currently_watched_directories/{cur,new,tmp}
             * again we ignore tmp - no point trying to watch it - it will be moved anyway
             * should we check with the kmail directory structure ?
             * presence of files like directory.index, directory.index.ids ?
             */
00474             public bool IsMailDir (string dirPath)
            {
                  if (dirPath == null || ! (dirPath.EndsWith("cur") || dirPath.EndsWith("new")))
                        return false;

                  string possibleMaildir = (Directory.GetParent (dirPath)).FullName;
                  if (lastGoodDirPath == possibleMaildir)
                        return true;
                  Logger.Log.Debug ("checking if " + possibleMaildir + " is a maildir ?");
                  if (mail_directories.Contains (possibleMaildir)) {
                        lastGoodDirPath = possibleMaildir;
                        return true;
                  } else
                        return false;
            }

            /**
             * how to decide if this filename denotes an mbox file ?
             * if its of the form .aaa.index, then aaa is the inbox file
             * if its of the form aaa (no .index) then there should be a .aaa.index
             */
00495             public string GetMboxFile (string dir, string filename)
            {
                  int pos = filename.LastIndexOf (".index"); 
                  if (pos > 0) {
                        string possible_mbox_name = filename.Substring (1, pos - 2); //Remove (pos, 6).Remove (0,1);
                        possible_mbox_name = Path.Combine (dir, possible_mbox_name);
                        if (File.Exists (possible_mbox_name))
                              return possible_mbox_name;
                  } else {
                        string possible_index_name = "." + filename + ".index";
                        possible_index_name = Path.Combine (dir, possible_index_name);
                        if (File.Exists (possible_index_name))
                              return Path.Combine (dir, filename);
                  }
                  
                  return null; // not found
            }
            
            /**
             * Called when a new directory is created
             * Decide what to do with this new directory
             */
00517             public void UpdateDirectories (string dirPath)
            {
                  string parentDir = (Directory.GetParent (dirPath)).FullName;
                  DirectoryInfo dirinfo = new DirectoryInfo (dirPath);
                  string dirName = dirinfo.Name;
                  
                  if (dirName == "cur" || dirName == "new" || dirName == "tmp") {
                        // check and add the parentdir to mail_directories
                        if (!mail_directories.Contains (parentDir))
                              mail_directories.Add (parentDir);
                        return;
                  }
                  
                  // format .name.directory - in which case add it to folder_dir
                  // format name  - in which case add it to mail_dir
                  if (dirName.EndsWith (".directory"))
                        folder_directories.Add (dirPath);
                  else
                        mail_directories.Add (dirPath);
            }

            /**
             * FIXME:if we can parse kmailrc file, then we might be
             * able to deduce the mail folder name
             * currently get it from the file name (mbox) or parent.parent directory name
             */
            
00544             public string GetFolderMbox (string mbox_file)
            {
                  FileInfo fi = new FileInfo (mbox_file);
                  return fi.Name;
            }
            
            public string GetFolderMaildir (string mailFile)
            {
                  return (Directory.GetParent ((Directory.GetParent (mailFile)).FullName).Name);
            }

            private bool IgnoreFolder (string path)
            {
                  foreach (string exclude in excludes) {
                        if (path.ToLower().EndsWith (exclude))
                              return true;
                  }
                  return false;
            }
      }
}

Generated by  Doxygen 1.6.0   Back to index