본문 바로가기

IT-Consultant

색인구조

  private final void writePostings(Posting[] postings, String segment)
          throws IOException {
    IndexOutput freq = null, prox = null;
    TermInfosWriter tis = null;
    TermVectorsWriter termVectorWriter = null;
    try {
      //open files for inverse index storage
      freq = directory.createOutput(segment + ".frq");
      prox = directory.createOutput(segment + ".prx");
     
      tis = new TermInfosWriter(directory, segment, fieldInfos,
                                termIndexInterval);
      TermInfo ti = new TermInfo();
      String currentField = null;

      for (int i = 0; i < postings.length; i++) {
        Posting posting = postings[i];
       

        // add an entry to the dictionary with pointers to prox and freq files
        ti.set(1, freq.getFilePointer(), prox.getFilePointer(), -1);
        tis.add(posting.term, ti);

        // add an entry to the freq file
        int postingFreq = posting.freq;
        if (postingFreq == 1)      // optimize freq=1
          freq.writeVInt(1);     // set low bit of doc num.
        else {
          freq.writeVInt(0);     // the document number
          freq.writeVInt(postingFreq);     // frequency in doc
        }

        int lastPosition = 0;     // write positions
        int[] positions = posting.positions;
        for (int j = 0; j < postingFreq; j++) {    // use delta-encoding
          int position = positions[j];
          prox.writeVInt(position - lastPosition);
          lastPosition = position;
        }
        // check to see if we switched to a new field
        String termField = posting.term.field();
        if (currentField != termField) {
          // changing field - see if there is something to save
          currentField = termField;
          FieldInfo fi = fieldInfos.fieldInfo(currentField);
          if (fi.storeTermVector) {
            if (termVectorWriter == null) {
              termVectorWriter =
                new TermVectorsWriter(directory, segment, fieldInfos);
              termVectorWriter.openDocument();
            }
            termVectorWriter.openField(currentField);

          } else if (termVectorWriter != null) {
            termVectorWriter.closeField();
          }
        }
        if (termVectorWriter != null && termVectorWriter.isFieldOpen()) {
            termVectorWriter.addTerm(posting.term.text(), postingFreq, posting.positions, posting.offsets);
        }
      }
      if (termVectorWriter != null)
        termVectorWriter.closeDocument();
    } finally {
      // make an effort to close all streams we can but remember and re-throw
      // the first exception encountered in this process
      IOException keep = null;
      if (freq != null) try { freq.close(); } catch (IOException e) { if (keep == null) keep = e; }
      if (prox != null) try { prox.close(); } catch (IOException e) { if (keep == null) keep = e; }
      if (tis  != null) try {  tis.close(); } catch (IOException e) { if (keep == null) keep = e; }
      if (termVectorWriter  != null) try {  termVectorWriter.close(); } catch (IOException e) { if (keep == null) keep = e; }
      if (keep != null) throw (IOException) keep.fillInStackTrace();
    }
  }

  private final void writeNorms(String segment) throws IOException {
    for(int n = 0; n < fieldInfos.size(); n++){
      FieldInfo fi = fieldInfos.fieldInfo(n);
      if(fi.isIndexed && !fi.omitNorms){
        float norm = fieldBoosts[n] * similarity.lengthNorm(fi.name, fieldLengths[n]);
        IndexOutput norms = directory.createOutput(segment + ".f" + n);
        try {
          norms.writeByte(Similarity.encodeNorm(norm));
        } finally {
          norms.close();
        }
      }
    }
  }