首页 > 其他分享> > TermQuery打分过程

TermQuery打分过程

2021-11-16 14:31:52 作者：互联网

org.apache.lucene.search.IndexSearcher

  protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector)
      throws IOException {

    // TODO: should we make this
    // threaded...?  the Collector could be sync'd?
    // always use single thread:
    for (LeafReaderContext ctx : leaves) { // search each subreader
      final LeafCollector leafCollector;
      try {
        leafCollector = collector.getLeafCollector(ctx);
      } catch (CollectionTerminatedException e) {
        // there is no doc of interest in this reader context
        // continue with the following leaf
        continue;
      }
      BulkScorer scorer = weight.bulkScorer(ctx);
      if (scorer != null) {
        try {
          scorer.score(leafCollector, ctx.reader().getLiveDocs());
        } catch (CollectionTerminatedException e) {
          // collection was terminated prematurely
          // continue with the following leaf
        }
      }
    }
  }

org.apache.lucene.search.Weight bulkScorer

  public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {

    Scorer scorer = scorer(context);
    if (scorer == null) {
      // No docs match
      return null;
    }

    // This impl always scores docs in order, so we can
    // ignore scoreDocsInOrder:
    return new DefaultBulkScorer(scorer);
  }

org.apache.lucene.search.TermQuery TermWeight scorer

    @Override
    public Scorer scorer(LeafReaderContext context) throws IOException {
      assert termStates == null || termStates.wasBuiltFor(ReaderUtil.getTopLevelContext(context)) : "The top-reader used to create Weight is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);;
      final TermsEnum termsEnum = getTermsEnum(context);
      if (termsEnum == null) {
        return null;
      }
      LeafSimScorer scorer = new LeafSimScorer(simScorer, context.reader(), term.field(), scoreMode.needsScores());
      if (scoreMode == ScoreMode.TOP_SCORES) {
        return new TermScorer(this, termsEnum.impacts(PostingsEnum.FREQS), scorer);
      } else {
        return new TermScorer(this, termsEnum.postings(null, scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE), scorer);
      }
    }

protected static class DefaultBulkScorer extends BulkScorer {
    private final Scorer scorer;
    private final DocIdSetIterator iterator;
    private final TwoPhaseIterator twoPhase;

    /** Sole constructor. */
    public DefaultBulkScorer(Scorer scorer) {
      if (scorer == null) {
        throw new NullPointerException();
      }
      this.scorer = scorer;
      this.iterator = scorer.iterator();
      this.twoPhase = scorer.twoPhaseIterator();
    }

    @Override
    public long cost() {
      return iterator.cost();
    }

    @Override
    public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
      collector.setScorer(scorer);
      if (scorer.docID() == -1 && min == 0 && max == DocIdSetIterator.NO_MORE_DOCS) {
        scoreAll(collector, iterator, twoPhase, acceptDocs);
        return DocIdSetIterator.NO_MORE_DOCS;
      } else {
        int doc = scorer.docID();
        if (doc < min) {
          if (twoPhase == null) {
            doc = iterator.advance(min);
          } else {
            doc = twoPhase.approximation().advance(min);
          }
        }
        return scoreRange(collector, iterator, twoPhase, acceptDocs, doc, max);
      }
    }

    /** Specialized method to bulk-score a range of hits; we
     *  separate this from {@link #scoreAll} to help out
     *  hotspot.
     *  See <a href="https://issues.apache.org/jira/browse/LUCENE-5487">LUCENE-5487</a> */
    static int scoreRange(LeafCollector collector, DocIdSetIterator iterator, TwoPhaseIterator twoPhase,
        Bits acceptDocs, int currentDoc, int end) throws IOException {
      if (twoPhase == null) {
        while (currentDoc < end) {
          if (acceptDocs == null || acceptDocs.get(currentDoc)) {
            collector.collect(currentDoc);
          }
          currentDoc = iterator.nextDoc();
        }
        return currentDoc;
      } else {
        final DocIdSetIterator approximation = twoPhase.approximation();
        while (currentDoc < end) {
          if ((acceptDocs == null || acceptDocs.get(currentDoc)) && twoPhase.matches()) {
            collector.collect(currentDoc);
          }
          currentDoc = approximation.nextDoc();
        }
        return currentDoc;
      }
    }
    
    /** Specialized method to bulk-score all hits; we
     *  separate this from {@link #scoreRange} to help out
     *  hotspot.
     *  See <a href="https://issues.apache.org/jira/browse/LUCENE-5487">LUCENE-5487</a> */
    static void scoreAll(LeafCollector collector, DocIdSetIterator iterator, TwoPhaseIterator twoPhase, Bits acceptDocs) throws IOException {
      if (twoPhase == null) {
        for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
          if (acceptDocs == null || acceptDocs.get(doc)) {
            collector.collect(doc);
          }
        }
      } else {
        // The scorer has an approximation, so run the approximation first, then check acceptDocs, then confirm
        final DocIdSetIterator approximation = twoPhase.approximation();
        for (int doc = approximation.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = approximation.nextDoc()) {
          if ((acceptDocs == null || acceptDocs.get(doc)) && twoPhase.matches()) {
            collector.collect(doc);
          }
        }
      }
    }
  }

注意区分simScore 和 Score

simScore是Score的一个属性

Score是为一个倒排链打分的一个工具类

public final class LeafSimScorer {

  private final SimScorer scorer;
  private final NumericDocValues norms;

  /**
   * Sole constructor: Score documents of {@code reader} with {@code scorer}.
   */
  public LeafSimScorer(SimScorer scorer, LeafReader reader, String field, boolean needsScores) throws IOException {
    this.scorer = Objects.requireNonNull(scorer);
    norms = needsScores ? reader.getNormValues(field) : null;
  }

  /** Return the wrapped {@link SimScorer}. */
  public SimScorer getSimScorer() {
    return scorer;
  }

  private long getNormValue(int doc) throws IOException {
    if (norms != null) {
      boolean found = norms.advanceExact(doc);
      assert found;
      return norms.longValue();
    } else {
      return 1L; // default norm
    }
  }

  /** Score the provided document assuming the given term document frequency.
   *  This method must be called on non-decreasing sequences of doc ids.
   *  @see SimScorer#score(float, long) */
  public float score(int doc, float freq) throws IOException {
    return scorer.score(freq, getNormValue(doc));
  }

  /** Explain the score for the provided document assuming the given term document frequency.
   *  This method must be called on non-decreasing sequences of doc ids.
   *  @see SimScorer#explain(Explanation, long) */
  public Explanation explain(int doc, Explanation freqExpl) throws IOException {
    return scorer.explain(freqExpl, getNormValue(doc));
  }

}


/** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
 */
final class TermScorer extends Scorer {
  private final PostingsEnum postingsEnum;
  private final ImpactsEnum impactsEnum;
  private final DocIdSetIterator iterator;
  private final LeafSimScorer docScorer;
  private final ImpactsDISI impactsDisi;

  /**
   * Construct a {@link TermScorer} that will iterate all documents.
   */
  TermScorer(Weight weight, PostingsEnum postingsEnum, LeafSimScorer docScorer) {
    super(weight);
    iterator = this.postingsEnum = postingsEnum;
    impactsEnum = new SlowImpactsEnum(postingsEnum);
    impactsDisi = new ImpactsDISI(impactsEnum, impactsEnum, docScorer.getSimScorer());
    this.docScorer = docScorer;
  }

  /**
   * Construct a {@link TermScorer} that will use impacts to skip blocks of
   * non-competitive documents.
   */
  TermScorer(Weight weight, ImpactsEnum impactsEnum, LeafSimScorer docScorer) {
    super(weight);
    postingsEnum = this.impactsEnum = impactsEnum;
    impactsDisi = new ImpactsDISI(impactsEnum, impactsEnum, docScorer.getSimScorer());
    iterator = impactsDisi;
    this.docScorer = docScorer;
  }

  @Override
  public int docID() {
    return postingsEnum.docID();
  }

  final int freq() throws IOException {
    return postingsEnum.freq();
  }

  @Override
  public DocIdSetIterator iterator() {
    return iterator;
  }

  @Override
  public float score() throws IOException {
    assert docID() != DocIdSetIterator.NO_MORE_DOCS;
    return docScorer.score(postingsEnum.docID(), postingsEnum.freq());
  }

  @Override
  public int advanceShallow(int target) throws IOException {
    return impactsDisi.advanceShallow(target);
  }

  @Override
  public float getMaxScore(int upTo) throws IOException {
    return impactsDisi.getMaxScore(upTo);
  }

  @Override
  public void setMinCompetitiveScore(float minScore) {
    impactsDisi.setMinCompetitiveScore(minScore);
  }

  /** Returns a string representation of this <code>TermScorer</code>. */
  @Override
  public String toString() { return "scorer(" + weight + ")[" + super.toString() + "]"; }

}

标签：return,final,iterator,doc,public,TermQuery,scorer,过程,打分
来源： https://blog.csdn.net/chuanyangwang/article/details/121354874