Logo Search packages:      
Sourcecode: beagle version File versions  Download package

LuceneCommon.cs

//
// LuceneCommon.cs
//
// Copyright (C) 2004-2005 Novell, Inc.
//

//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
//

using System;
using System.Collections;
using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.Text;
using System.Threading;
using System.Xml;
using System.Xml.Serialization;

using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using LNS = Lucene.Net.Search;

using Beagle.Util;

namespace Beagle.Daemon {

      public class LuceneCommon {

            public delegate bool HitFilter (Hit hit);

            // VERSION HISTORY
            // ---------------
            //
            //  1: Original
            //  2: Changed format of timestamp strings
            //  3: Schema changed to be more Dashboard-Match-like
            //  4: Schema changed for files to include _Directory property
            //  5: Changed analyzer to support stemming.  Bumped version # to
            //     force everyone to re-index.
            //  6: lots of schema changes as part of the general refactoring
            //  7: incremented to force a re-index after our upgrade to lucene 1.4
            //     (in theory the file formats are compatible, we are seeing 'term
            //     out of order' exceptions in some cases)
            //  8: another forced re-index, this time because of massive changes
            //     in the file system backend (it would be nice to have per-backend
            //     versioning so that we didn't have to purge all indexes just
            //     because one changed)
            //  9: changed the way properties are stored, changed in conjunction
            //     with sane handling of multiple properties on hits.
            // 10: changed to support typed and mutable properties
            // 11: moved mime type and hit type into properties
            // 12: added year-month and year-month-day resolutions for all
            //     date properties
            // 13: moved source into a property
            // 14: allow wildcard queries to also match keywords
            // 15: analyze PropertyKeyword field, and store all properties as
            //     lower case so that we're truly case insensitive.
            // 16: add inverted timestamp to make querying substantially faster
            private const int MAJOR_VERSION = 16;
            private int minor_version = 0;

            private string index_name;
            private string top_dir;

            private string fingerprint;
            private int last_item_count = -1;

            // This is the big index, containing document full-texts and
            // data that is expensive to index.
            private Lucene.Net.Store.Directory primary_store = null;

            // This is the small index, containing document info that we
            // expect to have change.  Canonical example: file names.
            private Lucene.Net.Store.Directory secondary_store = null;

            //////////////////////////////////////////////////////////////////////////////

            protected LuceneCommon (string index_name, int minor_version)
            {
                  this.index_name = index_name;
                  this.minor_version = minor_version;

                  this.top_dir = (Path.IsPathRooted (index_name)) ? index_name : Path.Combine (PathFinder.IndexDir, index_name);
            }

            //////////////////////////////////////////////////////////////////////////////

            protected string IndexName { get { return index_name; } }

            public Lucene.Net.Store.Directory PrimaryStore { get { return primary_store; } }

            public Lucene.Net.Store.Directory SecondaryStore { get { return secondary_store; } }

            public string Fingerprint { get { return fingerprint; } }

            public string TopDirectory { get { return top_dir; } }

            //////////////////////////////////////////////////////////////////////////////

            protected TextCache text_cache = null;

            public TextCache TextCache {
                  get { return text_cache; }
                  set { text_cache = value; }
            }

            //////////////////////////////////////////////////////////////////////////////

            private string VersionFile {
                  get { return Path.Combine (top_dir, "version"); }
            }

            private string FingerprintFile {
                  get { return Path.Combine (top_dir, "fingerprint"); }
            }

            // Shouldn't really be public
            public string PrimaryIndexDirectory {
                  get { return Path.Combine (top_dir, "PrimaryIndex"); }
            }

            // Shouldn't really be public
            public string SecondaryIndexDirectory {
                  get { return Path.Combine (top_dir, "SecondaryIndex"); }
            }

            public string LockDirectory {
                  get { return Path.Combine (top_dir, "Locks"); }
            }

            //////////////////////////////////////////////////////////////////////////////

            // Deal with dangling locks

            private bool IsDanglingLock (FileInfo info)
            {
                  Log.Debug ("Checking for dangling locks...");

                  // It isn't even a lock file
                  if (! info.Name.EndsWith (".lock"))
                        return false;

                  StreamReader reader;
                  string pid = null;

                  try {
                        reader = new StreamReader (info.FullName);
                        pid = reader.ReadLine ();
                        reader.Close ();

                  } catch {
                        // We couldn't read the lockfile, so it probably went away.
                        return false;
                  }

                  
                  if (pid == null) {
                        // Looks like the lock file was empty, which really
                        // shouldn't happen.  It should contain the PID of
                        // the process which locked it.  Lets be on the safe
                        // side and assume it's a dangling lock.
                        Log.Warn ("Found an empty lock file, that shouldn't happen: {0}", info.FullName);
                        return true;
                  }

                  string cmdline_file;
                  cmdline_file = String.Format ("/proc/{0}/cmdline", pid);
                  
                  string cmdline = "";
                  try {
                        reader = new StreamReader (cmdline_file);
                        cmdline = reader.ReadLine ();
                        reader.Close ();
                  } catch {
                        // If we can't open that file, either:
                        // (1) The process doesn't exist
                        // (2) It does exist, but it doesn't belong to us.
                        //     Thus it isn't an IndexHelper
                        // In either case, the lock is dangling --- if it
                        // still exists.
                        return info.Exists;
                  }

                  // The process exists, but isn't an IndexHelper.
                  // If the lock file is still there, it is dangling.
                  // FIXME: During one run of bludgeon I got a null reference
                  // exception here, so I added the cmdline == null check.
                  // Why exactly would that happen?  Is this logic correct
                  // in that (odd and presumably rare) case?
                  if (cmdline == null || cmdline.IndexOf ("IndexHelper.exe") == -1)
                        return info.Exists;
                  
                  // If we reach this point, we know:
                  // (1) The process still exists
                  // (2) We own it
                  // (3) It is an IndexHelper process
                  // Thus it almost certainly isn't a dangling lock.
                  // The process might be wedged, but that is
                  // another issue...
                  return false;
            }
            
            protected bool Exists ()
            {
                  if (! (Directory.Exists (top_dir)
                         && File.Exists (VersionFile)
                         && File.Exists (FingerprintFile)
                         && Directory.Exists (PrimaryIndexDirectory)
                         && IndexReader.IndexExists (PrimaryIndexDirectory)
                         && Directory.Exists (SecondaryIndexDirectory)
                         && IndexReader.IndexExists (SecondaryIndexDirectory)
                         && Directory.Exists (LockDirectory)))
                        return false;

                  // Check the index's version number.  If it is wrong,
                  // declare the index non-existent.

                  StreamReader version_reader;
                  string version_str;
                  version_reader = new StreamReader (VersionFile);
                  version_str = version_reader.ReadLine ();
                  version_reader.Close ();

                  int current_major_version, current_minor_version;
                  int i = version_str.IndexOf ('.');
                  
                  if (i != -1) {
                        current_major_version = Convert.ToInt32 (version_str.Substring (0, i));
                        current_minor_version = Convert.ToInt32 (version_str.Substring (i+1));
                  } else {
                        current_minor_version = Convert.ToInt32 (version_str);
                        current_major_version = 0;
                  }

                  if (current_major_version != MAJOR_VERSION
                      || (minor_version >= 0 && current_minor_version != minor_version)) {
                        Logger.Log.Debug ("Version mismatch in {0}", index_name);
                        Logger.Log.Debug ("Index has version {0}.{1}, expected {2}.{3}",
                                      current_major_version, current_minor_version,
                                      MAJOR_VERSION, minor_version);
                        return false;
                  }

                  // Check the lock directory: If there is a dangling write lock,
                  // assume that the index is corrupted and declare it non-existent.
                  DirectoryInfo lock_dir_info;
                  lock_dir_info = new DirectoryInfo (LockDirectory);
                  foreach (FileInfo info in lock_dir_info.GetFiles ()) {
                        if (IsDanglingLock (info)) {
                              Logger.Log.Warn ("Found a dangling index lock on {0}", info.FullName);
                              return false;
                        }
                  }

                  return true;
            }

            private Lucene.Net.Store.Directory CreateIndex (string path)
            {
                  // Create a directory to put the index in.
                  Directory.CreateDirectory (path);

                  // Create a new store.
                  Lucene.Net.Store.Directory store;
                  store = Lucene.Net.Store.FSDirectory.GetDirectory (path, LockDirectory, true);

                  // Create an empty index in that store.
                  IndexWriter writer;
                  writer = new IndexWriter (store, null, true);
                  writer.Close ();

                  return store;
            }

            // Create will kill your index dead.  Use it with care.
            // You don't need to call Open after calling Create.
            protected void Create ()
            {
                  if (minor_version < 0)
                        minor_version = 0;

                  // Purge any existing directories.
                  if (Directory.Exists (top_dir)) {
                        Logger.Log.Debug ("Purging {0}", top_dir);
                        Directory.Delete (top_dir, true);
                  }

                  // Create any necessary directories.
                  Directory.CreateDirectory (top_dir);
                  Directory.CreateDirectory (LockDirectory);
                  
                  // Create the indexes.
                  primary_store = CreateIndex (PrimaryIndexDirectory);
                  secondary_store = CreateIndex (SecondaryIndexDirectory);

                  // Generate and store the index fingerprint.
                  fingerprint = GuidFu.ToShortString (Guid.NewGuid ());
                  TextWriter writer;
                  writer = new StreamWriter (FingerprintFile, false);
                  writer.WriteLine (fingerprint);
                  writer.Close ();

                  // Store our index version information.
                  writer = new StreamWriter (VersionFile, false);
                  writer.WriteLine ("{0}.{1}", MAJOR_VERSION, minor_version);
                  writer.Close ();
            }

            protected void Open ()
            {
                  Open (false);
            }

            protected void Open (bool read_only_mode)
            {
                  // Read our index fingerprint.
                  TextReader reader;
                  reader = new StreamReader (FingerprintFile);
                  fingerprint = reader.ReadLine ();
                  reader.Close ();

                  // Create stores for our indexes.
                  primary_store = Lucene.Net.Store.FSDirectory.GetDirectory (PrimaryIndexDirectory, LockDirectory, false, read_only_mode);
                  secondary_store = Lucene.Net.Store.FSDirectory.GetDirectory (SecondaryIndexDirectory, LockDirectory, false, read_only_mode);
            }

            ////////////////////////////////////////////////////////////////

            //
            // Custom Analyzers
            //

            private class SingletonTokenStream : TokenStream {

                  private string singleton_str;

                  public SingletonTokenStream (string singleton_str)
                  {
                        this.singleton_str = singleton_str;
                  }

                  override public Lucene.Net.Analysis.Token Next ()
                  {
                        if (singleton_str == null)
                              return null;

                        Lucene.Net.Analysis.Token token;
                        token = new Lucene.Net.Analysis.Token (singleton_str, 0, singleton_str.Length);

                        singleton_str = null;
                        
                        return token;
                  }
            }

            // FIXME: This assumes everything being indexed is in English!
            private class BeagleAnalyzer : StandardAnalyzer {

                  private char [] buffer = new char [2];
                  private bool strip_extra_property_info = false;
                  private bool tokenize_email_hostname = false;

                  public BeagleAnalyzer (bool is_indexing_analyzer)
                  {
                        if (is_indexing_analyzer) {
                              this.strip_extra_property_info = true;
                              this.tokenize_email_hostname = true;
                        } else {
                              this.strip_extra_property_info = false;
                              this.tokenize_email_hostname = false;
                        }
                  }

                  public override TokenStream TokenStream (string fieldName, TextReader reader)
                  {
                        bool is_text_prop = false;

                        // Strip off the first two characters in a property.
                        // We store type information in those two characters, so we don't
                        // want to index them.
                        if (fieldName.StartsWith ("prop:")) {
                              
                              if (strip_extra_property_info) {
                                    // Skip everything up to and including the first :
                                    int c;
                                    do {
                                          c = reader.Read ();
                                    } while (c != -1 && c != ':');
                              }

                              is_text_prop = fieldName.StartsWith ("prop:t");

                              // If this is non-text property, just return one token
                              // containing the entire string.  We do this to avoid
                              // tokenizing keywords.
                              if (! is_text_prop) {
                                    // We don't want to lower case the token if it's
                                    // not in the private namespace.
                                          
                                    TokenStream singleton_stream = new SingletonTokenStream (reader.ReadToEnd ());
                                    
                                    if (fieldName.StartsWith ("prop:k:" + LuceneQueryingDriver.PrivateNamespace))
                                          return singleton_stream;
                                    else
                                          return new LowerCaseFilter (singleton_stream);
                              }
                        } else if (fieldName == "PropertyKeyword")
                              return new LowerCaseFilter (new SingletonTokenStream (reader.ReadToEnd ()));

                        TokenStream outstream;
                        outstream = base.TokenStream (fieldName, reader);

                        if (fieldName == "Text"
                            || fieldName == "HotText"
                            || fieldName == "PropertyText"
                            || is_text_prop) {
                              outstream = new NoiseEmailHostFilter (outstream, tokenize_email_hostname);
                              outstream = new PorterStemFilter (outstream);
                        }

                        return outstream;
                  }
            }

            static private Analyzer indexing_analyzer = new BeagleAnalyzer (true);
            static private Analyzer query_analyzer = new BeagleAnalyzer (false);

            static protected Analyzer IndexingAnalyzer { get { return indexing_analyzer; } }
            static protected Analyzer QueryAnalyzer { get { return query_analyzer; } }

            ////////////////////////////////////////////////////////////////

            //
            // Dealing with properties
            //

            static private char TypeToCode (PropertyType type)
            {
                  switch (type) {
                  case PropertyType.Text:    return 't';
                  case PropertyType.Keyword: return 'k';
                  case PropertyType.Date:    return 'd';
                  }
                  throw new Exception ("Bad property type: " + type);
            }

            static private PropertyType CodeToType (char c)
            {
                  switch (c) {
                  case 't': return PropertyType.Text;
                  case 'k': return PropertyType.Keyword;
                  case 'd': return PropertyType.Date;
                  }

                  throw new Exception ("Bad property code: " + c);
            }

            static private string TypeToWildcardField (PropertyType type)
            {
                  switch (type) {
                  case PropertyType.Text:    return "PropertyText";
                  case PropertyType.Keyword: return "PropertyKeyword";
                  case PropertyType.Date:    return "PropertyDate";
                  }

                  return null;
            }

            // Exposing this is a little bit suspicious.
            static protected string PropertyToFieldName (PropertyType type, string key)
            {
                  return String.Format ("prop:{0}:{1}", TypeToCode (type), key);

            }

            static private void AddDateFields (string field_name, Property prop, Document doc)
            {
                  DateTime dt = StringFu.StringToDateTime (prop.Value);

                  Field f;
                  f = new Field ("YM:" + field_name,
                               StringFu.DateTimeToYearMonthString (dt),
                               false,   // never store
                               true,    // always index
                               false);  // never tokenize
                  doc.Add (f);

                  f = new Field ("D:" + field_name,
                               StringFu.DateTimeToDayString (dt),
                               false,   // never store
                               true,    // always index
                               false);  // never tokenize
                  doc.Add (f);
            }

            static protected void AddPropertyToDocument (Property prop, Document doc)
            {
                  if (prop == null || prop.Value == null)
                        return;

                  // Don't actually put properties in the UnindexedNamespace
                  // in the document.  A horrible (and yet lovely!) hack.
                  if (prop.Key.StartsWith (StringFu.UnindexedNamespace))
                        return;

                  Field f;

                  if (prop.IsSearched) {
                        string wildcard_field = TypeToWildcardField (prop.Type);
                        if (wildcard_field != null) {
                              f = new Field (wildcard_field,
                                           prop.Value,
                                           false, // never stored
                                           true,  // always indexed
                                           true); // always tokenize (just lowercases for keywords; full analysis for text)
                              doc.Add (f);

                              if (prop.Type == PropertyType.Date)
                                    AddDateFields (wildcard_field, prop, doc);
                        }
                  }

                  string coded_value;
                  coded_value = String.Format ("{0}:{1}",
                                         prop.IsSearched ? 's' : '_',
                                         prop.Value);

                  string field_name = PropertyToFieldName (prop.Type, prop.Key);

                  f = new Field (field_name,
                               coded_value,
                               prop.IsStored,
                               true,        // always index
                               true);       // always tokenize (strips off type code for keywords and lowercases)
                  doc.Add (f);

                  if (prop.Type == PropertyType.Date)
                        AddDateFields (field_name, prop, doc);
            }

            static protected Property GetPropertyFromDocument (Field f, Document doc, bool from_primary_index)
            {
                  // Note: we don't use the document that we pass in,
                  // but in theory we could.  At some later point we
                  // might need to split a property's data across two or
                  // more fields in the document.

                  if (f == null)
                        return null;

                  string field_name;
                  field_name = f.Name ();
                  if (field_name.Length < 7
                      || ! field_name.StartsWith ("prop:"))
                        return null;

                  string field_value;
                  field_value = f.StringValue ();

                  Property prop;
                  prop = new Property ();
                  prop.Type = CodeToType (field_name [5]);
                  prop.Key = field_name.Substring (7);
                  prop.Value = field_value.Substring (2);
                  prop.IsSearched = (field_value [0] == 's');
                  prop.IsMutable = ! from_primary_index;
                  prop.IsStored = f.IsStored ();

                  return prop;
            }

            //////////////////////////////////////////////////////////////////////////////

            //
            // Dealing with documents
            //

            static protected void BuildDocuments (Indexable indexable,
                                          out Document primary_doc,
                                          out Document secondary_doc)
            {
                  primary_doc = new Document ();
                  secondary_doc = null;

                  Field f;

                  f = Field.Keyword ("Uri", UriFu.UriToEscapedString (indexable.Uri));
                  primary_doc.Add (f);

                  if (indexable.ParentUri != null) {
                        f = Field.Keyword ("ParentUri", UriFu.UriToEscapedString (indexable.ParentUri));
                        primary_doc.Add (f);
                  }
                  
                  if (indexable.ValidTimestamp) {
                        // Note that we also want to search in the
                        // Timestamp field when we do a wildcard date
                        // query, so that's why we also add a wildcard
                        // field for each item here.

                        string wildcard_field = TypeToWildcardField (PropertyType.Date);

                        string str = StringFu.DateTimeToString (indexable.Timestamp);
                        f = Field.Keyword ("Timestamp", str);
                        primary_doc.Add (f);
                        f = Field.UnStored (wildcard_field, str);
                        primary_doc.Add (f);

                        // Create an inverted timestamp so that we can
                        // sort by timestamp at search-time.
                        long timeval = Convert.ToInt64 (str);
                        f = Field.UnStored ("InvertedTimestamp", (Int64.MaxValue - timeval).ToString ());
                        primary_doc.Add (f);

                        str = StringFu.DateTimeToYearMonthString (indexable.Timestamp);
                        f = Field.Keyword ("YM:Timestamp", str);
                        primary_doc.Add (f);
                        f = Field.UnStored ("YM:" + wildcard_field, str);
                        primary_doc.Add (f);

                        str = StringFu.DateTimeToDayString (indexable.Timestamp);
                        f = Field.Keyword ("D:Timestamp", str);
                        primary_doc.Add (f);
                        f = Field.UnStored ("D:" + wildcard_field, str);
                        primary_doc.Add (f);
                  }

                  if (indexable.NoContent) {
                        // If there is no content, make a note of that
                        // in a special property.
                        Property prop;
                        prop = Property.NewBool ("beagle:NoContent", true);
                        AddPropertyToDocument (prop, primary_doc);
                        
                  } else {

                        // Since we might have content, add our text
                        // readers.

                        TextReader reader;
                        
                        reader = indexable.GetTextReader ();
                        if (reader != null) {
                              f = Field.Text ("Text", reader);
                              primary_doc.Add (f);
                        }
                  
                        reader = indexable.GetHotTextReader ();
                        if (reader != null) {
                              f = Field.Text ("HotText", reader);
                              primary_doc.Add (f);
                        }
                  }

                  // Store the Type and MimeType in special properties

                  if (indexable.HitType != null) {
                        Property prop;
                        prop = Property.NewUnsearched ("beagle:HitType", indexable.HitType);
                        AddPropertyToDocument (prop, primary_doc);
                  }

                  if (indexable.MimeType != null) {
                        Property prop;
                        prop = Property.NewUnsearched ("beagle:MimeType", indexable.MimeType);
                        AddPropertyToDocument (prop, primary_doc);
                  }

                  if (indexable.Source != null) {
                        Property prop;
                        prop = Property.NewUnsearched ("beagle:Source", indexable.Source);
                        AddPropertyToDocument (prop, primary_doc);
                  }

                  // Store the other properties
                        
                  foreach (Property prop in indexable.Properties) {
                        Document target_doc = primary_doc;
                        if (prop.IsMutable) {
                              if (secondary_doc == null) {
                                    secondary_doc = new Document ();
                                    f = Field.Keyword ("Uri", UriFu.UriToEscapedString (indexable.Uri));
                                    secondary_doc.Add (f);
                              }
                              target_doc = secondary_doc;
                        }
                              
                        AddPropertyToDocument (prop, target_doc);
                  }
            }

            static protected Document RewriteDocument (Document old_secondary_doc,
                                             Indexable prop_only_indexable)
            {
                  Hashtable seen_props;
                  seen_props = new Hashtable ();

                  Document new_doc;
                  new_doc = new Document ();

                  Field uri_f;
                  uri_f = Field.Keyword ("Uri", UriFu.UriToEscapedString (prop_only_indexable.Uri));
                  new_doc.Add (uri_f);

                  Logger.Log.Debug ("Rewriting {0}", prop_only_indexable.DisplayUri);

                  // Add the new properties to the new document.  To
                  // delete a property, set the Value to null... then it
                  // will be added to seen_props (so the old value will
                  // be ignored below), but AddPropertyToDocument will
                  // return w/o doing anything.
                  foreach (Property prop in prop_only_indexable.Properties) {
                        seen_props [prop.Key] = prop;
                        AddPropertyToDocument (prop, new_doc);
                        Logger.Log.Debug ("New prop '{0}' = '{1}'", prop.Key, prop.Value);
                  }

                  // Copy the other properties from the old document to the
                  // new one, skipping any properties that we got new values
                  // for out of the Indexable.
                  if (old_secondary_doc != null) {
                        foreach (Field f in old_secondary_doc.Fields ()) {
                              Property prop;
                              prop = GetPropertyFromDocument (f, old_secondary_doc, false);
                              if (prop != null && ! seen_props.Contains (prop.Key)) {
                                    Logger.Log.Debug ("Old prop '{0}' = '{1}'", prop.Key, prop.Value);
                                    AddPropertyToDocument (prop, new_doc);
                              }
                        }
                  }

                  return new_doc;
            }

            static protected Uri GetUriFromDocument (Document doc)
            {
                  string uri;
                  uri = doc.Get ("Uri");
                  if (uri == null)
                        throw new Exception ("Got document from Lucene w/o a URI!");
                  return UriFu.EscapedStringToUri (uri);
            }

            static protected Hit DocumentToHit (Document doc)
            {
                  Hit hit;
                  hit = new Hit ();

                  hit.Uri = GetUriFromDocument (doc);

                  string str;
                  str = doc.Get ("ParentUri");
                  if (str != null)
                        hit.ParentUri = UriFu.EscapedStringToUri (str);
                  
                  hit.Timestamp = StringFu.StringToDateTime (doc.Get ("Timestamp"));

                  AddPropertiesToHit (hit, doc, true);

                  // Get the Type and MimeType from the properties.
                  hit.Type = hit.GetFirstProperty ("beagle:HitType");
                  hit.MimeType = hit.GetFirstProperty ("beagle:MimeType");
                  hit.Source = hit.GetFirstProperty ("beagle:Source");

                  return hit;
            }

            static protected void AddPropertiesToHit (Hit hit, Document doc, bool from_primary_index)
            {
                  foreach (Field f in doc.Fields ()) {
                        Property prop;
                        prop = GetPropertyFromDocument (f, doc, from_primary_index);
                        if (prop != null)
                              hit.AddProperty (prop);
                  }
            }


            //////////////////////////////////////////////////////////////////////////////

            //
            // Handle the index's item count
            //

            public int GetItemCount ()
            {
                  if (last_item_count < 0) {
                        IndexReader reader;
                        reader = GetReader (PrimaryStore);
                        last_item_count = reader.NumDocs ();
                        ReleaseReader (reader);
                  }
                  return last_item_count;
            }

            // We should set the cached count of index items when IndexReaders
            // are open and available, so calls to GetItemCount will return immediately.

            protected bool HaveItemCount { get { return last_item_count >= 0; } }
            
            protected void SetItemCount (IndexReader reader)
            {
                  last_item_count = reader.NumDocs ();
            }

            public void SetItemCount (int count)
            {
                  last_item_count = count;
            }

            protected void AdjustItemCount (int delta)
            {
                  if (last_item_count >= 0)
                        last_item_count += delta;
            }

            //////////////////////////////////////////////////////////////////////////////

            //
            // Access to the stemmer and list of stop words
            //

            static PorterStemmer stemmer = new PorterStemmer ();

            static public string Stem (string str)
            {
                  return stemmer.Stem (str);
            }

            public static bool IsStopWord (string stemmed_word)
            {
                  return ArrayFu.IndexOfString (StopAnalyzer.ENGLISH_STOP_WORDS, stemmed_word) != -1;
            }

            //////////////////////////////////////////////////////////////////////////////

            //
            // Special Hit Filtering classes
            //

            static private bool TrueHitFilter (Hit hit)
            {
                  return true;
            }

            static private HitFilter true_hit_filter = new HitFilter (TrueHitFilter);

            public class OrHitFilter {

                  private ArrayList all = new ArrayList ();
                  private bool contains_known_true = false;

                  public void Add (HitFilter hit_filter)
                  {
                        if (hit_filter == true_hit_filter)
                              contains_known_true = true;
                        all.Add (hit_filter);
                  }

                  public bool HitFilter (Hit hit)
                  {
                        if (contains_known_true)
                              return true;
                        foreach (HitFilter hit_filter in all)
                              if (hit_filter (hit))
                                    return true;
                        return false;
                  }
            }
            
            public class AndHitFilter {

                  private ArrayList all = new ArrayList ();
                  
                  public void Add (HitFilter hit_filter) 
                  {
                        all.Add (hit_filter);
                  }

                  public bool HitFilter (Hit hit)
                  {
                        foreach (HitFilter hit_filter in all)
                              if (! hit_filter (hit))
                                    return false;
                        return true;
                  }
            }

            public class NotHitFilter {
                  HitFilter original;

                  public NotHitFilter (HitFilter original)
                  {
                        this.original = original;
                  }
                  
                  public bool HitFilter (Hit hit)
                  {
                        return ! original (hit);
                  }
            }

            //////////////////////////////////////////////////////////////////////////////

            //
            // Queries
            //

            static private LNS.Query StringToQuery (string field_name,
                                          string text,
                                          ArrayList term_list)
            {
                  ArrayList tokens = new ArrayList ();

                  // Use the analyzer to extract the query's tokens.
                  // This code is taken from Lucene's query parser.
                  TokenStream source = QueryAnalyzer.TokenStream (field_name, new StringReader (text));
                  while (true) {
                        Lucene.Net.Analysis.Token token;
                        try {
                              token = source.Next ();
                              if (token == null)
                                    break;
                        } catch (IOException) {
                              break;
                        }
                        if (token != null)
                              tokens.Add (token.TermText ());
                  }
                  try {
                        source.Close ();
                  } catch (IOException) { 
                        // ignore
                  }

                  if (tokens.Count == 0)
                        return null;

                  LNS.PhraseQuery query = new LNS.PhraseQuery ();

                  foreach (string token in tokens) {
                        Term term;
                        term = new Term (field_name, token);
                        query.Add (term);
                        if (term_list != null)
                              term_list.Add (term);
                  }

                  return query;
            }

            //
            // Date Range Handling
            //

            // This function will break down dates to discrete chunks of
            // time to avoid expanding RangeQuerys as much as possible.
            // For example, searching for
            //
            // YMD(5 May 2005, 16 Oct 2006)
            //
            // would break down into three queries:
            //
            // (YM(May 2005) AND D(5,31)) OR
            // YM(Jun 2005, Sep 2006) OR
            // (YM(Oct 2006) AND D(1,16))

            static private DateTime lower_bound = new DateTime (1970, 1, 1);

            // FIXME: we should probably boost this sometime around 2030.
            // Mark your calendar.
            static private DateTime upper_bound = new DateTime (2038, 12, 31);

            static private Term NewYearMonthTerm (string field_name, int y, int m)
            {
                  return new Term ("YM:" + field_name, String.Format ("{0}{1:00}", y, m));
            }

            static private LNS.Query NewYearMonthQuery (string field_name, int y, int m)
            {
                  return new LNS.TermQuery (NewYearMonthTerm (field_name, y, m));
            }

            static private LNS.Query NewYearMonthQuery (string field_name, int y1, int m1, int y2, int m2)
            {
                  return new LNS.RangeQuery (NewYearMonthTerm (field_name, y1, m1),
                                       NewYearMonthTerm (field_name, y2, m2),
                                       true); // query is inclusive
            }

            static private Term NewDayTerm (string field_name, int d)
            {
                  return new Term ("D:" + field_name, String.Format ("{0:00}", d));
            }

            static private LNS.Query NewDayQuery (string field_name, int d1, int d2)
            {
                  return new LNS.RangeQuery (NewDayTerm (field_name, d1),
                                       NewDayTerm (field_name, d2),
                                       true); // query is inclusive
            }

            private class DateRangeHitFilter {
                  public string Key;
                  public DateTime StartDate;
                  public DateTime EndDate;

                  public bool HitFilter (Hit hit)
                  {
                        // First, check the Timestamp
                        if (Key == QueryPart_DateRange.AllPropertiesKey 
                            || Key == QueryPart_DateRange.TimestampKey) {
                              DateTime dt;
                              dt = hit.Timestamp;
                              if (StartDate <= dt && dt <= EndDate)
                                    return true;
                              if (Key == QueryPart_DateRange.TimestampKey)
                                    return false;
                        }

                        if (Key == QueryPart_DateRange.AllPropertiesKey) {
                              // Walk through all of the properties, and see if any
                              // date properties fall inside the range.
                              foreach (Property prop in hit.Properties) {
                                    if (prop.Type == PropertyType.Date) {
                                          DateTime dt;
                                          dt = StringFu.StringToDateTime (prop.Value);
                                          if (StartDate <= dt && dt <= EndDate)
                                                return true;
                                    }
                              }
                              return false;
                        } else {
                              // Walk through all of the properties with the given key,
                              // and see if any of them fall inside of the range.
                              string[] values;
                              values = hit.GetProperties (Key);
                              foreach (string v in values) {
                                    DateTime dt;
                                    dt = StringFu.StringToDateTime (v);
                                    if (StartDate <= dt && dt <= EndDate)
                                          return true;
                              }
                              return false;
                        }
                  }
            }

            static private LNS.Query GetDateRangeQuery (QueryPart_DateRange part, out HitFilter hit_filter)
            {
                  string field_name;
                  if (part.Key == QueryPart_DateRange.AllPropertiesKey)
                        field_name = TypeToWildcardField (PropertyType.Date);
                  else if (part.Key == QueryPart_DateRange.TimestampKey)
                        field_name = "Timestamp";
                  else
                        field_name = PropertyToFieldName (PropertyType.Date, part.Key);
            
                  // FIXME: We could optimize this and reduce the size of our range
                  // queries if we actually new the min and max date that appear in
                  // any properties in the index.  We would need to inspect the index to
                  // determine that at start-up, and then track it as new documents
                  // get added to the index.
                  if (part.StartDate < lower_bound)
                        part.StartDate = lower_bound;
                  if (part.EndDate > upper_bound || part.EndDate == DateTime.MinValue)
                        part.EndDate = upper_bound;

                  // Swap the start and end dates if they come in reversed.
                  if (part.StartDate > part.EndDate) {
                        DateTime swap;
                        swap = part.StartDate;
                        part.StartDate = part.EndDate;
                        part.EndDate = swap;
                  }

                  // Set up our hit filter to cull out the bad dates.
                  DateRangeHitFilter drhf;
                  drhf = new DateRangeHitFilter ();
                  drhf.Key = part.Key;
                  drhf.StartDate = part.StartDate;
                  drhf.EndDate = part.EndDate;
                  hit_filter = new HitFilter (drhf.HitFilter);

                  Logger.Log.Debug ("Building new date range query");
                  Logger.Log.Debug ("Start: {0}", part.StartDate);
                  Logger.Log.Debug ("End: {0}", part.EndDate);

                  int y1, m1, d1, y2, m2, d2;
                  y1 = part.StartDate.Year;
                  m1 = part.StartDate.Month;
                  d1 = part.StartDate.Day;
                  y2 = part.EndDate.Year;
                  m2 = part.EndDate.Month;
                  d2 = part.EndDate.Day;

                  LNS.BooleanQuery top_level_query;
                  top_level_query = new LNS.BooleanQuery ();

                  // A special case: both the start and the end of our range fall
                  // in the same month.
                  if (y1 == y2 && m1 == m2) {
                        LNS.Query ym_query;
                        ym_query = NewYearMonthQuery (field_name, y1, m1);

                        // If our range only covers a part of the month, do a range query on the days.
                        if (d1 != 1 || d2 != DateTime.DaysInMonth (y2, m2)) {
                              LNS.BooleanQuery sub_query;
                              sub_query = new LNS.BooleanQuery ();
                              sub_query.Add (ym_query, true, false);
                              sub_query.Add (NewDayQuery (field_name, d1, d2), true, false);
                              top_level_query.Add (sub_query, false, false);
                        } else {
                              top_level_query.Add (ym_query, false, false);
                        }

                  } else {

                        // Handle a partial month at the beginning of our range.
                        if (d1 > 1) {
                              LNS.BooleanQuery sub_query;
                              sub_query = new LNS.BooleanQuery ();
                              sub_query.Add (NewYearMonthQuery (field_name, y1, m1), true, false);
                              sub_query.Add (NewDayQuery (field_name, d1, DateTime.DaysInMonth (y1, m1)), true, false);
                              top_level_query.Add (sub_query, false, false);
                              
                              ++m1;
                              if (m1 == 13) {
                                    m1 = 1;
                                    ++y1;
                              }
                        }

                        // And likewise, handle a partial month at the end of our range.
                        if (d2 < DateTime.DaysInMonth (y2, m2)) {
                              LNS.BooleanQuery sub_query;
                              sub_query = new LNS.BooleanQuery ();
                              sub_query.Add (NewYearMonthQuery (field_name, y2, m2), true, false);
                              sub_query.Add (NewDayQuery (field_name, 1, d2), true, false);
                              top_level_query.Add (sub_query, false, false);

                              --m2;
                              if (m2 == 0) {
                                    m2 = 12;
                                    --y2;
                              }
                        }

                        // Generate the query for the "middle" of our period, if it is non-empty
                        if (y1 < y2 || ((y1 == y2) && m1 <= m2))
                              top_level_query.Add (NewYearMonthQuery (field_name, y1, m1, y2, m2),
                                               false, false);
                  }
                        
                  return top_level_query;
            }

            // search_subset_uris is a list of Uris that this search should be
            // limited to.
            static protected void QueryPartToQuery (QueryPart     abstract_part,
                                          bool          only_build_primary_query,
                                          ArrayList     term_list,
                                          out LNS.Query primary_query,
                                          out LNS.Query secondary_query,
                                          out HitFilter hit_filter)
            {
                  primary_query = null;
                  secondary_query = null;

                  // By default, we assume that our lucene queries will return exactly the
                  // matching set of objects.  We need to set the hit filter if further
                  // refinement of the search results is required.  (As in the case of
                  // date range queries, for example.)  We essentially have to do this
                  // to make OR queries work correctly.
                  hit_filter = true_hit_filter;

                  // The exception is when dealing with a prohibited part.  Just return
                  // null for the hit filter in that case.  This works since
                  // prohibited parts are not allowed inside of OR queries.
                  if (abstract_part.Logic == QueryPartLogic.Prohibited)
                        hit_filter = null;

                  if (abstract_part == null)
                        return;

                  if (abstract_part is QueryPart_Text) {
                        QueryPart_Text part = (QueryPart_Text) abstract_part;

                        if (! (part.SearchFullText || part.SearchTextProperties))
                              return;

                        LNS.BooleanQuery p_query = new LNS.BooleanQuery ();
                        LNS.BooleanQuery s_query = new LNS.BooleanQuery ();

                        if (part.SearchFullText) {
                              LNS.Query subquery;
                              subquery = StringToQuery ("Text", part.Text, term_list);
                              if (subquery != null)
                                    p_query.Add (subquery, false, false);

                              // FIXME: HotText is ignored for now!
                              // subquery = StringToQuery ("HotText", part.Text);
                              // if (subquery != null) 
                              //    p_query.Add (subquery, false, false);
                        }

                        if (part.SearchTextProperties) {
                              LNS.Query subquery;
                              subquery = StringToQuery ("PropertyText", part.Text, term_list);
                              if (subquery != null) {
                                    p_query.Add (subquery, false, false);
                                    // Properties can live in either index
                                    if (! only_build_primary_query)
                                          s_query.Add (subquery.Clone () as LNS.Query, false, false);
                              }

                              Term term;
                              term = new Term ("PropertyKeyword", part.Text.ToLower ()); // make sure text is lowercased
                              // FIXME: terms are already added in term_list. But they may have been tokenized
                              // The term here is non-tokenized version. Should this be added to term_list ?
                              // term_list is used to calculate scores
                              if (term_list != null)
                                    term_list.Add (term);
                              subquery = new LNS.TermQuery (term);
                              p_query.Add (subquery, false, false);
                              // Properties can live in either index
                              if (! only_build_primary_query)
                                    s_query.Add (subquery.Clone () as LNS.Query, false, false);
                        }

                        primary_query = p_query;
                        if (! only_build_primary_query)
                              secondary_query = s_query;

                        return;
                  }

                  if (abstract_part is QueryPart_Wildcard) {
                        QueryPart_Wildcard part = (QueryPart_Wildcard) abstract_part;

                        LNS.BooleanQuery p_query = new LNS.BooleanQuery ();
                        LNS.BooleanQuery s_query = new LNS.BooleanQuery ();
                        
                        Term term;
                        LNS.Query subquery;

                        // Lower case the terms for searching
                        string query_string_lower = part.QueryString.ToLower ();

                        // Search text content
                        term = new Term ("Text", query_string_lower);
                        subquery = new LNS.WildcardQuery (term);
                        p_query.Add (subquery, false, false);
                        term_list.Add (term);

                        // Search text properties
                        term = new Term ("PropertyText", query_string_lower);
                        subquery = new LNS.WildcardQuery (term);
                        p_query.Add (subquery, false, false);
                        // Properties can live in either index
                        if (! only_build_primary_query)
                              s_query.Add (subquery.Clone () as LNS.Query, false, false);
                        term_list.Add (term);

                        // Search property keywords
                        term = new Term ("PropertyKeyword", query_string_lower);
                        term_list.Add (term);
                        subquery = new LNS.WildcardQuery (term);
                        p_query.Add (subquery, false, false);
                        // Properties can live in either index
                        if (! only_build_primary_query)
                              s_query.Add (subquery.Clone () as LNS.Query, false, false);

                        primary_query = p_query;
                        if (! only_build_primary_query)
                              secondary_query = s_query;

                        return;
                  }

                  if (abstract_part is QueryPart_Property) {
                        QueryPart_Property part = (QueryPart_Property) abstract_part;

                        string field_name;
                        if (part.Key == QueryPart_Property.AllProperties) {
                              field_name = TypeToWildcardField (part.Type);
                              // FIXME: probably shouldn't just return silently
                              if (field_name == null)
                                    return;
                        } else
                              field_name = PropertyToFieldName (part.Type, part.Key);

                        if (part.Type == PropertyType.Text)
                              primary_query = StringToQuery (field_name, part.Value, term_list);
                        else {
                              Term term;
                              term = new Term (field_name, part.Value.ToLower ());
                              if (term_list != null)
                                    term_list.Add (term);
                              primary_query = new LNS.TermQuery (term);
                        }

                        // Properties can live in either index
                        if (! only_build_primary_query && primary_query != null)
                              secondary_query = primary_query.Clone () as LNS.Query;

                        return;
                  }

                  if (abstract_part is QueryPart_DateRange) {

                        QueryPart_DateRange part = (QueryPart_DateRange) abstract_part;

                        primary_query = GetDateRangeQuery (part, out hit_filter);
                        // Date properties can live in either index
                        if (! only_build_primary_query && primary_query != null)
                              secondary_query = primary_query.Clone () as LNS.Query;

                        // If this is a prohibited part, invert our hit filter.
                        if (part.Logic == QueryPartLogic.Prohibited) {
                              NotHitFilter nhf;
                              nhf = new NotHitFilter (hit_filter);
                              hit_filter = new HitFilter (nhf.HitFilter);
                        }
                        
                        return;
                  }

                  if (abstract_part is QueryPart_Or) {
                        QueryPart_Or part = (QueryPart_Or) abstract_part;
                        
                        // Assemble a new BooleanQuery combining all of the sub-parts.
                        LNS.BooleanQuery p_query;
                        p_query = new LNS.BooleanQuery ();

                        LNS.BooleanQuery s_query = null;
                        if (! only_build_primary_query)
                              s_query = new LNS.BooleanQuery ();

                        primary_query = p_query;
                        secondary_query = s_query;

                        OrHitFilter or_hit_filter = null;
                        
                        foreach (QueryPart  sub_part in part.SubParts) {
                              LNS.Query p_subq, s_subq;
                              HitFilter sub_hit_filter; // FIXME: This is (and must be) ignored
                              // FIXME: Any subpart in an OR which has a hit filter won't work
                              // correctly, because we can't tell which part of an OR we matched
                              // against to filter correctly.  This affects date range queries.
                              QueryPartToQuery (sub_part, only_build_primary_query,
                                            term_list,
                                            out p_subq, out s_subq, out sub_hit_filter);
                              if (p_subq != null)
                                    p_query.Add (p_subq, false, false);
                              if (s_subq != null)
                                    s_query.Add (s_subq, false, false);
                              if (sub_hit_filter != null) {
                                    if (or_hit_filter == null)
                                          or_hit_filter = new OrHitFilter ();
                                    or_hit_filter.Add (sub_hit_filter);
                              }
                        }

                        if (or_hit_filter != null)
                              hit_filter = new HitFilter (or_hit_filter.HitFilter);

                        return;
                  } 

                  throw new Exception ("Unhandled QueryPart type! " + abstract_part.ToString ());
            }

            static protected LNS.Query UriQuery (string field_name, Uri uri)
            {
                  return new LNS.TermQuery (new Term (field_name, UriFu.UriToEscapedString (uri)));
            }

            static protected LNS.Query UriQuery (string field_name, ICollection uri_list)
            {
                  return UriQuery (field_name, uri_list, null);
            }

            static protected LNS.Query UriQuery (string field_name, ICollection uri_list, LNS.Query extra_requirement)
            {
                  if (uri_list.Count == 0)
                        return null;

                  int max_clauses;
                  max_clauses = LNS.BooleanQuery.GetMaxClauseCount ();
                  
                  int N;
                  N = 1 + (uri_list.Count - 1) / max_clauses;
                  
                  LNS.BooleanQuery top_query;
                  top_query = new LNS.BooleanQuery ();

                  int cursor = 0;
                  if (extra_requirement != null) {
                        top_query.Add (extra_requirement, true, false);
                        ++cursor;
                  }

                  ArrayList bottom_queries = null;

                  if (N > 1) {
                        bottom_queries = new ArrayList ();
                        for (int i = 0; i < N; ++i) {
                              LNS.BooleanQuery bq;
                              bq = new LNS.BooleanQuery ();
                              bottom_queries.Add (bq);
                              top_query.Add (bq, false, false);
                        }
                  }

                  foreach (Uri uri in uri_list) {
                        LNS.Query subquery;
                        subquery = UriQuery (field_name, uri);
                        
                        LNS.BooleanQuery target;
                        if (N == 1)
                              target = top_query;
                        else {
                              target = (LNS.BooleanQuery) bottom_queries [cursor];
                              ++cursor;
                              if (cursor >= N)
                                    cursor = 0;
                        }
                        
                        target.Add (subquery, false, false);
                  }

                  return top_query;
            }

            ///////////////////////////////////////////////////////////////////////////////////

            public int SegmentCount {
                  get {
                        DirectoryInfo dir_info;
                        int p_count = 0, s_count = 0;

                        dir_info = new DirectoryInfo (PrimaryIndexDirectory);
                        foreach (FileInfo file_info in dir_info.GetFiles ())
                              if (file_info.Extension == ".cfs")
                                    ++p_count;

                        dir_info = new DirectoryInfo (SecondaryIndexDirectory);
                        foreach (FileInfo file_info in dir_info.GetFiles ())
                              if (file_info.Extension == ".cfs")
                                    ++s_count;

                        return p_count > s_count ? p_count : s_count;
                  }
            }

            ///////////////////////////////////////////////////////////////////////////////////

            // Cache IndexReaders on a per-Lucene index basis, since they
            // are extremely expensive to create.  Note that using this
            // only makes sense in situations where the index only
            // possibly might change from underneath us, but most of the
            // time probably won't.  This means it makes sense to do
            // this in LuceneQueryingDriver.cs, but it doesn't in
            // LuceneIndexingDriver.cs.

            private class ReaderAndVersion {

                  public IndexReader Reader;
                  public long Version;
                  public int Refcount;

                  public ReaderAndVersion (IndexReader reader, long version)
                  {
                        this.Reader = reader;
                        this.Version = version;
                        this.Refcount = 1;
                  }
            }

            static private Hashtable directory_rav_map = new Hashtable ();
            static private Hashtable reader_rav_map = new Hashtable ();

            static public LNS.IndexSearcher GetSearcher (Lucene.Net.Store.Directory directory)
            {
                  IndexReader reader = GetReader (directory);

                  return new LNS.IndexSearcher (reader);
            }

            static public IndexReader GetReader (Lucene.Net.Store.Directory directory)
            {
                  IndexReader reader;
                  long version;

                  lock (reader_rav_map) {
                        ReaderAndVersion rav = (ReaderAndVersion) directory_rav_map [directory];

                        if (rav == null) {
                              version = IndexReader.GetCurrentVersion (directory);
                              reader = IndexReader.Open (directory);

                              rav = new ReaderAndVersion (reader, version);
                              rav.Refcount++;

                              directory_rav_map [directory] = rav;
                              reader_rav_map [reader] = rav;

                              return reader;
                        }

                        version = IndexReader.GetCurrentVersion (directory);
                        
                        if (version != rav.Version) {
                              UnrefReaderAndVersion_Unlocked (rav);

                              reader = IndexReader.Open (directory);

                              rav = new ReaderAndVersion (reader, version);
                              rav.Refcount++;

                              directory_rav_map [directory] = rav;
                              reader_rav_map [reader] = rav;
                        } else
                              rav.Refcount++;

                        return rav.Reader;
                  }
            }

            static private void UnrefReaderAndVersion_Unlocked (ReaderAndVersion rav)
            {
                  rav.Refcount--;

                  if (rav.Refcount == 0) {
                        rav.Reader.Close ();
                        reader_rav_map.Remove (rav.Reader);
                  }
            }

            static public void ReleaseReader (IndexReader reader)
            {
                  lock (reader_rav_map) {
                        ReaderAndVersion rav = (ReaderAndVersion) reader_rav_map [reader];

                        UnrefReaderAndVersion_Unlocked (rav);
                  }
            }

            static public void ReleaseSearcher (LNS.IndexSearcher searcher)
            {
                  IndexReader reader = searcher.GetIndexReader ();

                  searcher.Close ();
                  ReleaseReader (reader);
            }

            ///////////////////////////////////////////////////////////////////////////////////

            //
            // Various ways to grab lots of hits at once.
            // These should never be used for querying, only for utility
            // functions.
            //

            public int GetBlockOfHits (int cookie,
                                 Hit [] block_of_hits)
            {
                  IndexReader primary_reader;
                  IndexReader secondary_reader;
                  primary_reader = GetReader (PrimaryStore);
                  secondary_reader = GetReader (SecondaryStore);

                  int request_size;
                  request_size = block_of_hits.Length;
                  if (request_size > primary_reader.NumDocs ())
                        request_size = primary_reader.NumDocs ();

                  int max_doc;
                  max_doc = primary_reader.MaxDoc ();

                  if (cookie < 0) {
                        Random random;
                        random = new Random ();
                        cookie = random.Next (max_doc);
                  }

                  int original_cookie;
                  original_cookie = cookie;

                  Hashtable primary_docs, secondary_docs;
                  primary_docs = UriFu.NewHashtable ();
                  secondary_docs = UriFu.NewHashtable ();

                  // Load the primary documents
                  for (int i = 0; i < request_size; ++i) {
                        
                        if (! primary_reader.IsDeleted (cookie)) {
                              Document doc;
                              doc = primary_reader.Document (cookie);
                              primary_docs [GetUriFromDocument (doc)] = doc;
                        }
                        
                        ++cookie;
                        if (cookie >= max_doc) // wrap around
                              cookie = 0;

                        // If we somehow end up back where we started,
                        // give up.
                        if (cookie == original_cookie)
                              break;
                  }

                  // If necessary, load the secondary documents
                  if (secondary_reader != null) {
                        LNS.IndexSearcher searcher;
                        searcher = new LNS.IndexSearcher (secondary_reader);
                        
                        LNS.Query uri_query;
                        uri_query = UriQuery ("Uri", primary_docs.Keys);
                        
                        LNS.Hits hits;
                        hits = searcher.Search (uri_query);
                        for (int i = 0; i < hits.Length (); ++i) {
                              Document doc;
                              doc = hits.Doc (i);
                              secondary_docs [GetUriFromDocument (doc)] = doc;
                        }
                        
                        searcher.Close ();
                  }

                  ReleaseReader (primary_reader);
                  ReleaseReader (secondary_reader);

                  // Now assemble the hits
                  int j = 0;
                  foreach (Uri uri in primary_docs.Keys) {
                        Document primary_doc, secondary_doc;
                        primary_doc = primary_docs [uri] as Document;
                        secondary_doc = secondary_docs [uri] as Document;

                        Hit hit;
                        hit = DocumentToHit (primary_doc);
                        if (secondary_doc != null)
                              AddPropertiesToHit (hit, secondary_doc, false);
                        
                        block_of_hits [j] = hit;
                        ++j;
                  }

                  // null-pad the array, if necessary
                  for (; j < block_of_hits.Length; ++j)
                        block_of_hits [j] = null;


                  // Return the new cookie
                  return cookie;
            }

            // For a large index, this will be very slow and will consume
            // a lot of memory.  Don't call it without a good reason!
            // We return a hashtable indexed by Uri.
            public Hashtable GetAllHitsByUri ()
            {
                  Hashtable all_hits;
                  all_hits = UriFu.NewHashtable ();

                  IndexReader primary_reader;
                  IndexReader secondary_reader;
                  primary_reader = GetReader (PrimaryStore);
                  secondary_reader = GetReader (SecondaryStore);

                  // Load everything from the primary index
                  int max_doc;
                  max_doc = primary_reader.MaxDoc ();
                  for (int i = 0; i < max_doc; ++i) {
                        
                        if (primary_reader.IsDeleted (i))
                              continue;

                        Document doc;
                        doc = primary_reader.Document (i);

                        Hit hit;
                        hit = DocumentToHit (doc);
                        all_hits [hit.Uri] = hit;
                  }

                  // Now add in everything from the secondary index, if it exists
                  if (secondary_reader != null) {
                        max_doc = secondary_reader.MaxDoc ();
                        for (int i = 0; i < max_doc; ++i) {

                              if (secondary_reader.IsDeleted (i))
                                    continue;

                              Document doc;
                              doc = secondary_reader.Document (i);
                              
                              Uri uri;
                              uri = GetUriFromDocument (doc);

                              Hit hit;
                              hit = (Hit) all_hits [uri];
                              if (hit != null)
                                    AddPropertiesToHit (hit, doc, false);
                        }
                  }

                  ReleaseReader (primary_reader);
                  ReleaseReader (secondary_reader);

                  return all_hits;
            }
      }
}

Generated by  Doxygen 1.6.0   Back to index