Logo Search packages:      
Sourcecode: beagle version File versions  Download package

SegmentReader.cs

/*
 * Copyright 2004 The Apache Software Foundation
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

using System;
using Document = Lucene.Net.Documents.Document;
using Field = Lucene.Net.Documents.Field;
using DefaultSimilarity = Lucene.Net.Search.DefaultSimilarity;
using Directory = Lucene.Net.Store.Directory;
using IndexInput = Lucene.Net.Store.IndexInput;
using IndexOutput = Lucene.Net.Store.IndexOutput;
using BitVector = Lucene.Net.Util.BitVector;

namespace Lucene.Net.Index
{
      
      /// <version>  $Id: SegmentReader.cs,v 1.7 2006/10/02 17:09:00 joeshaw Exp $
      /// </version>
00031       public class SegmentReader : IndexReader
      {
            private System.String segment;
            
            internal FieldInfos fieldInfos;
            private FieldsReader fieldsReader;
            
            internal TermInfosReader tis;
            internal TermVectorsReader termVectorsReaderOrig = null;
            internal System.LocalDataStoreSlot termVectorsLocal = System.Threading.Thread.AllocateDataSlot();
            
            internal BitVector deletedDocs = null;
            private bool deletedDocsDirty = false;
            private bool normsDirty = false;
            private bool undeleteAll = false;
            
            internal IndexInput freqStream;
            internal IndexInput proxStream;
            
            // Compound File Reader when based on a compound file segment
            internal CompoundFileReader cfsReader = null;
            
        public FieldInfos FieldInfos
        {
            get {   return fieldInfos;  }
        }

            private class Norm
            {
                  private void  InitBlock(SegmentReader enclosingInstance)
                  {
                        this.enclosingInstance = enclosingInstance;
                  }
                  private SegmentReader enclosingInstance;
                  public SegmentReader Enclosing_Instance
                  {
                        get
                        {
                              return enclosingInstance;
                        }
                        
                  }
                  public Norm(SegmentReader enclosingInstance, IndexInput in_Renamed, int number)
                  {
                        InitBlock(enclosingInstance);
                        this.in_Renamed = in_Renamed;
                        this.number = number;
                  }
                  
                  public IndexInput in_Renamed;
                  public byte[] bytes;
                  public bool dirty;
                  public int number;
                  
                  public void  ReWrite()
                  {
                        // NOTE: norms are re-written in regular directory, not cfs
                        IndexOutput out_Renamed = Enclosing_Instance.Directory().CreateOutput(Enclosing_Instance.segment + ".tmp");
                        try
                        {
                              out_Renamed.WriteBytes(bytes, Enclosing_Instance.MaxDoc());
                        }
                        finally
                        {
                              out_Renamed.Close();
                        }
                        System.String fileName;
                        if (Enclosing_Instance.cfsReader == null)
                              fileName = Enclosing_Instance.segment + ".f" + number;
                        else
                        {
                              // use a different file name if we have compound format
                              fileName = Enclosing_Instance.segment + ".s" + number;
                        }
                        Enclosing_Instance.Directory().RenameFile(Enclosing_Instance.segment + ".tmp", fileName);
                        this.dirty = false;
                  }
            }
            
            private System.Collections.Hashtable norms = System.Collections.Hashtable.Synchronized(new System.Collections.Hashtable());
            
            /// <summary>The class which implements SegmentReader. </summary>
00113             private static System.Type IMPL;
            
            public SegmentReader() : base(null)
            {
            }
            
            public static SegmentReader Get(SegmentInfo si)
            {
                  return Get(si.dir, si, null, false, false);
            }
            
            public static SegmentReader Get(SegmentInfos sis, SegmentInfo si, bool closeDir)
            {
                  return Get(si.dir, si, sis, closeDir, true);
            }
            
            public static SegmentReader Get(Directory dir, SegmentInfo si, SegmentInfos sis, bool closeDir, bool ownDir)
            {
                  SegmentReader instance;
                  try
                  {
                        instance = (SegmentReader) System.Activator.CreateInstance(IMPL);
                  }
                  catch (System.Exception e)
                  {
                        throw new System.SystemException("cannot load SegmentReader class: " + e);
                  }
                  instance.Init(dir, sis, closeDir, ownDir);
                  instance.Initialize(si);
                  return instance;
            }
            
            private void  Initialize(SegmentInfo si)
            {
                  segment = si.name;
                  
                  // Use compound file directory for some files, if it exists
                  Directory cfsDir = Directory();
                  if (Directory().FileExists(segment + ".cfs"))
                  {
                        cfsReader = new CompoundFileReader(Directory(), segment + ".cfs");
                        cfsDir = cfsReader;
                  }
                  
                  // No compound file exists - use the multi-file format
                  fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
                  fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
                  
                  tis = new TermInfosReader(cfsDir, segment, fieldInfos);
                  
                  // NOTE: the bitvector is stored using the regular directory, not cfs
                  if (HasDeletions(si))
                        deletedDocs = new BitVector(Directory(), segment + ".del");
                  
                  // make sure that all index files have been read or are kept open
                  // so that if an index update removes them we'll still have them
                  freqStream = cfsDir.OpenInput(segment + ".frq");
                  proxStream = cfsDir.OpenInput(segment + ".prx");
                  OpenNorms(cfsDir);
                  
                  if (fieldInfos.HasVectors())
                  {
                        // open term vector files only as needed
                        termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
                  }
            }
            
        /*  Leaving this here will cause a memory leak under .NET 1.1
        ~SegmentReader()
            {
                  // patch for pre-1.4.2 JVMs, whose ThreadLocals leak
                  //System.Threading.Thread.SetData(termVectorsLocal, null);
            }
            */
            
00188             protected internal override void  DoCommit()
            {
                  if (deletedDocsDirty)
                  {
                        // re-write deleted
                        deletedDocs.Write(Directory(), segment + ".tmp");
                        Directory().RenameFile(segment + ".tmp", segment + ".del");
                  }
                  if (undeleteAll && Directory().FileExists(segment + ".del"))
                  {
                        Directory().DeleteFile(segment + ".del");
                  }
                  if (normsDirty)
                  {
                        // re-write norms
                        System.Collections.IEnumerator values = norms.Values.GetEnumerator();
                        while (values.MoveNext())
                        {
                              Norm norm = (Norm) values.Current;
                              if (norm.dirty)
                              {
                                    norm.ReWrite();
                              }
                        }
                  }
                  deletedDocsDirty = false;
                  normsDirty = false;
                  undeleteAll = false;
            }
            
00218             protected internal override void  DoClose()
            {
                  fieldsReader.Close();
                  tis.Close();
                  
                  if (freqStream != null)
                        freqStream.Close();
                  if (proxStream != null)
                        proxStream.Close();
                  
                  CloseNorms();
                  
                  if (termVectorsReaderOrig != null)
                        termVectorsReaderOrig.Close();
                  
                  if (cfsReader != null)
                        cfsReader.Close();
            }
            
            internal static bool HasDeletions(SegmentInfo si)
            {
                  return si.dir.FileExists(si.name + ".del");
            }
            
00242             public override bool HasDeletions()
            {
                  return deletedDocs != null;
            }
            
            
            internal static bool UsesCompoundFile(SegmentInfo si)
            {
                  return si.dir.FileExists(si.name + ".cfs");
            }
            
            internal static bool HasSeparateNorms(SegmentInfo si)
            {
                  System.String[] result = si.dir.List();
                  System.String pattern = si.name + ".s";
                  int patternLength = pattern.Length;
                  for (int i = 0; i < result.Length; i++)
                  {
                        if (result[i].StartsWith(pattern) && System.Char.IsDigit(result[i][patternLength]))
                              return true;
                  }
                  return false;
            }
            
00266             protected internal override void  DoDelete(int docNum)
            {
                  if (deletedDocs == null)
                        deletedDocs = new BitVector(MaxDoc());
                  deletedDocsDirty = true;
                  undeleteAll = false;
                  deletedDocs.Set(docNum);
            }
            
00275             protected internal override void  DoUndeleteAll()
            {
                  deletedDocs = null;
                  deletedDocsDirty = false;
                  undeleteAll = true;
            }
            
            internal virtual System.Collections.ArrayList Files()
            {
                  System.Collections.ArrayList files = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(16));
                  
                  for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.Length; i++)
                  {
                        System.String name = segment + "." + IndexFileNames.INDEX_EXTENSIONS[i];
                        if (Directory().FileExists(name))
                              files.Add(name);
                  }
                  
                  for (int i = 0; i < fieldInfos.Size(); i++)
                  {
                        FieldInfo fi = fieldInfos.FieldInfo(i);
                        if (fi.isIndexed && !fi.omitNorms)
                        {
                              System.String name;
                              if (cfsReader == null)
                                    name = segment + ".f" + i;
                              else
                                    name = segment + ".s" + i;
                              if (Directory().FileExists(name))
                                    files.Add(name);
                        }
                  }
                  return files;
            }
            
00310             public override TermEnum Terms()
            {
                  return tis.Terms();
            }
            
00315             public override TermEnum Terms(Term t)
            {
                  return tis.Terms(t);
            }
            
00320             public override Document Document(int n)
            {
                  lock (this)
                  {
                        if (IsDeleted(n))
                              throw new System.ArgumentException("attempt to access a deleted document");
                        return fieldsReader.Doc(n);
                  }
            }
            
00330             public override bool IsDeleted(int n)
            {
                  lock (this)
                  {
                        return (deletedDocs != null && deletedDocs.Get(n));
                  }
            }
            
00338             public override TermDocs TermDocs()
            {
                  return new SegmentTermDocs(this);
            }
            
00343             public override TermPositions TermPositions()
            {
                  return new SegmentTermPositions(this);
            }
            
00348             public override int DocFreq(Term t)
            {
                  TermInfo ti = tis.Get(t);
                  if (ti != null)
                        return ti.docFreq;
                  else
                        return 0;
            }
            
00357             public override int NumDocs()
            {
                  int n = MaxDoc();
                  if (deletedDocs != null)
                        n -= deletedDocs.Count();
                  return n;
            }
            
00365             public override int MaxDoc()
            {
                  return fieldsReader.Size();
            }
            
            /// <seealso cref="IndexReader.GetFieldNames()">
            /// </seealso>
            /// <deprecated>  Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
            /// </deprecated>
00374             public override System.Collections.ICollection GetFieldNames()
            {
                  // maintain a unique set of field names
                  System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
                  for (int i = 0; i < fieldInfos.Size(); i++)
                  {
                        FieldInfo fi = fieldInfos.FieldInfo(i);
                        fieldSet.Add(fi.name, fi.name);
                  }
                  return fieldSet;
            }
            
            /// <seealso cref="IndexReader.GetFieldNames(boolean)">
            /// </seealso>
            /// <deprecated>  Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
            /// </deprecated>
00390             public override System.Collections.ICollection GetFieldNames(bool indexed)
            {
                  // maintain a unique set of field names
                  System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
                  for (int i = 0; i < fieldInfos.Size(); i++)
                  {
                        FieldInfo fi = fieldInfos.FieldInfo(i);
                        if (fi.isIndexed == indexed)
                              fieldSet.Add(fi.name, fi.name);
                  }
                  return fieldSet;
            }
            
            /// <seealso cref="IndexReader.GetIndexedFieldNames(Field.TermVector tvSpec)">
            /// </seealso>
            /// <deprecated>  Replaced by {@link #GetFieldNames (IndexReader.FieldOption fldOption)}
            /// </deprecated>
00407             public override System.Collections.ICollection GetIndexedFieldNames(Field.TermVector tvSpec)
            {
                  bool storedTermVector;
                  bool storePositionWithTermVector;
                  bool storeOffsetWithTermVector;
                  
                  if (tvSpec == Field.TermVector.NO)
                  {
                        storedTermVector = false;
                        storePositionWithTermVector = false;
                        storeOffsetWithTermVector = false;
                  }
                  else if (tvSpec == Field.TermVector.YES)
                  {
                        storedTermVector = true;
                        storePositionWithTermVector = false;
                        storeOffsetWithTermVector = false;
                  }
                  else if (tvSpec == Field.TermVector.WITH_POSITIONS)
                  {
                        storedTermVector = true;
                        storePositionWithTermVector = true;
                        storeOffsetWithTermVector = false;
                  }
                  else if (tvSpec == Field.TermVector.WITH_OFFSETS)
                  {                                                                           
                        storedTermVector = true;
                        storePositionWithTermVector = false;
                        storeOffsetWithTermVector = true;
                  }
                  else if (tvSpec == Field.TermVector.WITH_POSITIONS_OFFSETS)
                  {
                        storedTermVector = true;
                        storePositionWithTermVector = true;
                        storeOffsetWithTermVector = true;
                  }
                  else
                  {
                        throw new System.ArgumentException("unknown termVector parameter " + tvSpec);
                  }
                  
                  // maintain a unique set of field names
                  System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
                  for (int i = 0; i < fieldInfos.Size(); i++)
                  {
                        FieldInfo fi = fieldInfos.FieldInfo(i);
                        if (fi.isIndexed && fi.storeTermVector == storedTermVector && fi.storePositionWithTermVector == storePositionWithTermVector && fi.storeOffsetWithTermVector == storeOffsetWithTermVector)
                        {
                              fieldSet.Add(fi.name, fi.name);
                        }
                  }
                  return fieldSet;
            }
            
            /// <seealso cref="IndexReader.GetFieldNames(IndexReader.FieldOption fldOption)">
            /// </seealso>
00463             public override System.Collections.ICollection GetFieldNames(IndexReader.FieldOption fieldOption)
            {
                  System.Collections.Hashtable fieldSet = new System.Collections.Hashtable();
                  for (int i = 0; i < fieldInfos.Size(); i++)
                  {
                        FieldInfo fi = fieldInfos.FieldInfo(i);
                        if (fieldOption == IndexReader.FieldOption.ALL)
                        {
                              fieldSet.Add(fi.name, fi.name);
                        }
                        else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED)
                        {
                              fieldSet.Add(fi.name, fi.name);
                        }
                        else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED)
                        {
                              fieldSet.Add(fi.name, fi.name);
                        }
                        else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR)
                        {
                              fieldSet.Add(fi.name, fi.name);
                        }
                        else if (fi.storeTermVector == true && fi.storePositionWithTermVector == false && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR)
                        {
                              fieldSet.Add(fi.name, fi.name);
                        }
                        else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR)
                        {
                              fieldSet.Add(fi.name, fi.name);
                        }
                        else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION)
                        {
                              fieldSet.Add(fi.name, fi.name);
                        }
                        else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET)
                        {
                              fieldSet.Add(fi.name, fi.name);
                        }
                        else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET)
                        {
                              fieldSet.Add(fi.name, fi.name);
                        }
                  }
                  return fieldSet;
            }
            
            
00510             public override bool HasNorms(System.String field)
            {
                  lock (this)
                  {
                        return norms.ContainsKey(field);
                  }
            }
            
            internal static byte[] CreateFakeNorms(int size)
            {
            byte[] ones = new byte[size];
            byte val = DefaultSimilarity.EncodeNorm(1.0f);
            for (int index = 0; index < size; index++)
                ones.SetValue(val, index);

            return ones;
            }
            
            private byte[] ones;
            private byte[] FakeNorms()
            {
                  if (ones == null)
                        ones = CreateFakeNorms(MaxDoc());
                  return ones;
            }
            
            // can return null if norms aren't stored
            protected internal virtual byte[] GetNorms(System.String field)
            {
                  lock (this)
                  {
                        Norm norm = (Norm) norms[field];
                        if (norm == null)
                              return null; // not indexed, or norms not stored
                        
                        if (norm.bytes == null)
                        {
                              // value not yet read
                              byte[] bytes = new byte[MaxDoc()];
                              Norms(field, bytes, 0);
                              norm.bytes = bytes; // cache it
                        }
                        return norm.bytes;
                  }
            }
            
            // returns fake norms if norms aren't available
00557             public override byte[] Norms(System.String field)
            {
                  lock (this)
                  {
                        byte[] bytes = GetNorms(field);
                        if (bytes == null)
                              bytes = FakeNorms();
                        return bytes;
                  }
            }
            
00568             protected internal override void  DoSetNorm(int doc, System.String field, byte value_Renamed)
            {
                  Norm norm = (Norm) norms[field];
                  if (norm == null)
                  // not an indexed field
                        return ;
                  norm.dirty = true; // mark it dirty
                  normsDirty = true;
                  
                  Norms(field)[doc] = value_Renamed; // set the value
            }
            
            /// <summary>Read norms into a pre-allocated array. </summary>
00581             public override void  Norms(System.String field, byte[] bytes, int offset)
            {
                  lock (this)
                  {
                        
                        Norm norm = (Norm) norms[field];
                        if (norm == null)
                        {
                              Array.Copy(FakeNorms(), 0, bytes, offset, MaxDoc());
                              return ;
                        }
                        
                        if (norm.bytes != null)
                        {
                              // can copy from cache
                              Array.Copy(norm.bytes, 0, bytes, offset, MaxDoc());
                              return ;
                        }
                        
                        IndexInput normStream = (IndexInput) norm.in_Renamed.Clone();
                        try
                        {
                              // read from disk
                              normStream.Seek(0);
                              normStream.ReadBytes(bytes, offset, MaxDoc());
                        }
                        finally
                        {
                              normStream.Close();
                        }
                  }
            }
            
            
            private void  OpenNorms(Directory cfsDir)
            {
                  for (int i = 0; i < fieldInfos.Size(); i++)
                  {
                        FieldInfo fi = fieldInfos.FieldInfo(i);
                        if (fi.isIndexed && !fi.omitNorms)
                        {
                              // look first if there are separate norms in compound format
                              System.String fileName = segment + ".s" + fi.number;
                              Directory d = Directory();
                              if (!d.FileExists(fileName))
                              {
                                    fileName = segment + ".f" + fi.number;
                                    d = cfsDir;
                              }
                              norms[fi.name] = new Norm(this, d.OpenInput(fileName), fi.number);
                        }
                  }
            }
            
            private void  CloseNorms()
            {
                  lock (norms.SyncRoot)
                  {
                        System.Collections.IEnumerator enumerator = norms.Values.GetEnumerator();
                        while (enumerator.MoveNext())
                        {
                              Norm norm = (Norm) enumerator.Current;
                              norm.in_Renamed.Close();
                        }
                  }
            }
            
            /// <summary> Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.</summary>
            /// <returns> TermVectorsReader
            /// </returns>
00651             private TermVectorsReader GetTermVectorsReader()
            {
                  TermVectorsReader tvReader = (TermVectorsReader) System.Threading.Thread.GetData(termVectorsLocal);
                  if (tvReader == null)
                  {
                        tvReader = (TermVectorsReader) termVectorsReaderOrig.Clone();
                        System.Threading.Thread.SetData(termVectorsLocal, tvReader);
                  }
                  return tvReader;
            }
            
            /// <summary>Return a term frequency vector for the specified document and field. The
            /// vector returned contains term numbers and frequencies for all terms in
            /// the specified field of this document, if the field had storeTermVector
            /// flag set.  If the flag was not set, the method returns null.
            /// </summary>
            /// <throws>  IOException </throws>
00668             public override TermFreqVector GetTermFreqVector(int docNumber, System.String field)
            {
                  // Check if this field is invalid or has no stored term vector
                  FieldInfo fi = fieldInfos.FieldInfo(field);
                  if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
                        return null;
                  
                  TermVectorsReader termVectorsReader = GetTermVectorsReader();
                  if (termVectorsReader == null)
                        return null;
                  
                  return termVectorsReader.Get(docNumber, field);
            }
            
            
            /// <summary>Return an array of term frequency vectors for the specified document.
            /// The array contains a vector for each vectorized field in the document.
            /// Each vector vector contains term numbers and frequencies for all terms
            /// in a given vectorized field.
            /// If no such fields existed, the method returns null.
            /// </summary>
            /// <throws>  IOException </throws>
00690             public override TermFreqVector[] GetTermFreqVectors(int docNumber)
            {
                  if (termVectorsReaderOrig == null)
                        return null;
                  
                  TermVectorsReader termVectorsReader = GetTermVectorsReader();
                  if (termVectorsReader == null)
                        return null;
                  
                  return termVectorsReader.Get(docNumber);
            }

        static SegmentReader()
            {
                  {
                        try
                        {
                    System.String name = SupportClass.AppSettings.Get("Lucene.Net.SegmentReader.class", typeof(SegmentReader).FullName);
                              IMPL = System.Type.GetType(name);
                        }
                        catch (System.Security.SecurityException)
                        {
                              try
                              {
                                    IMPL = System.Type.GetType(typeof(SegmentReader).FullName);
                              }
                              catch (System.Exception e)
                              {
                                    throw new System.SystemException("cannot load default SegmentReader class: " + e);
                              }
                        }
                catch (System.Exception e)
                {
                    throw new System.SystemException("cannot load SegmentReader class: " + e);
                }
            }
            }
      }
}

Generated by  Doxygen 1.6.0   Back to index