private final void writePostings(Posting[] postings, String segment)
throws IOException {
IndexOutput freq = null, prox = null;
TermInfosWriter tis = null;
TermVectorsWriter termVectorWriter = null;
try {
//open files for inverse index storage
freq = directory.createOutput(segment + ".frq");
prox = directory.createOutput(segment + ".prx");
tis = new TermInfosWriter(directory, segment, fieldInfos,
termIndexInterval);
TermInfo ti = new TermInfo();
String currentField = null;
for (int i = 0; i < postings.length; i++) {
Posting posting = postings[i];
// add an entry to the dictionary with pointers to prox and freq files
ti.set(1, freq.getFilePointer(), prox.getFilePointer(), -1);
tis.add(posting.term, ti);
// add an entry to the freq file
int postingFreq = posting.freq;
if (postingFreq == 1) // optimize freq=1
freq.writeVInt(1); // set low bit of doc num.
else {
freq.writeVInt(0); // the document number
freq.writeVInt(postingFreq); // frequency in doc
}
int lastPosition = 0; // write positions
int[] positions = posting.positions;
for (int j = 0; j < postingFreq; j++) { // use delta-encoding
int position = positions[j];
prox.writeVInt(position - lastPosition);
lastPosition = position;
}
// check to see if we switched to a new field
String termField = posting.term.field();
if (currentField != termField) {
// changing field - see if there is something to save
currentField = termField;
FieldInfo fi = fieldInfos.fieldInfo(currentField);
if (fi.storeTermVector) {
if (termVectorWriter == null) {
termVectorWriter =
new TermVectorsWriter(directory, segment, fieldInfos);
termVectorWriter.openDocument();
}
termVectorWriter.openField(currentField);
} else if (termVectorWriter != null) {
termVectorWriter.closeField();
}
}
if (termVectorWriter != null && termVectorWriter.isFieldOpen()) {
termVectorWriter.addTerm(posting.term.text(), postingFreq, posting.positions, posting.offsets);
}
}
if (termVectorWriter != null)
termVectorWriter.closeDocument();
} finally {
// make an effort to close all streams we can but remember and re-throw
// the first exception encountered in this process
IOException keep = null;
if (freq != null) try { freq.close(); } catch (IOException e) { if (keep == null) keep = e; }
if (prox != null) try { prox.close(); } catch (IOException e) { if (keep == null) keep = e; }
if (tis != null) try { tis.close(); } catch (IOException e) { if (keep == null) keep = e; }
if (termVectorWriter != null) try { termVectorWriter.close(); } catch (IOException e) { if (keep == null) keep = e; }
if (keep != null) throw (IOException) keep.fillInStackTrace();
}
}
'IT-Consultant' 카테고리의 다른 글
invertDocument(Tokenizes the fields of a document into Postings) (0) | 2008.10.29 |
---|---|
최종적으로 만들어진 Posting List를 어떻게 파일에 쓸까? (0) | 2008.10.29 |
Inverted Index Strategies (0) | 2008.10.29 |
Inverted Index Strategies (0) | 2008.10.29 |
TF, IDF 구현 (0) | 2008.10.29 |