|
|
(相同用户的9个中间修订版本未显示) |
第1行: |
第1行: |
− | =check the detail of Lucene score= | + | =query = |
− | ==data==
| + | *[[multi query in multi field]] |
− | d0 [{如何,怎么}} {办理,办} {户口,户口本} # 到当地派出所办理 # 如何办理户口
| + | * [[improved search method by rewrited query]] |
− | d1 {办理,办} {户口,户口本} [{流程,步骤}] # 到当地派出所办理 # 如何办理户口
| + | |
− | d2 [{如何,怎么}} {办理,办} {身份证,身份} # 到当地派出所办理 # 如何办理身份证
| + | |
− | d3 {办理,办} {身份证} [{流程,步骤}] # 到当地派出所办理 # 如何办理身份证
| + | |
− | | + | |
− | ==搜索==
| + | |
− | query:"如何办理户口" => question:如何 question:办理户口
| + | |
− | ==result==
| + | |
− | doc=0 score=0.114656925 shardIndex=-1|0.114656925 = (MATCH) product of:
| + | |
− | 0.22931385 = (MATCH) sum of:
| + | |
− | 0.22931385 = (MATCH) weight(question:如何 in 0) [DefaultSimilarity], result of:
| + | |
− | 0.22931385 = score(doc=0,freq=1.0 = termFreq=1.0
| + | |
− | ), product of:
| + | |
− | 0.4748871 = queryWeight, product of:
| + | |
− | 1.287682 = idf(docFreq=2, maxDocs=4)
| + | |
− | 0.3687922 = queryNorm
| + | |
− | 0.48288077 = fieldWeight in 0, product of:
| + | |
− | 1.0 = tf(freq=1.0), with freq of:
| + | |
− | 1.0 = termFreq=1.0
| + | |
− | 1.287682 = idf(docFreq=2, maxDocs=4)
| + | |
− | 0.375 = fieldNorm(doc=0)
| + | |
− | 0.5 = coord(1/2)
| + | |
− | *详细计算流score(query,d0)
| + | |
− | *参考公式:[http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/TFIDFSimilarity.html]
| + | |
− | [[文件:QQ截图20141128164958.png]]
| + | |
− | :* tf("如何" in d0)=sqrt{frequency}= sqrt{1}=1
| + | |
− | :* idf("如何")=<math>1+ln( {numDocs}/{docFreq+1})=1+ln( {4}/{2+1} )
| + | |
− | :* 如何".getboost=1
| + | |
− | :* coord(如何,d0) : 0.5 = coord(1/2)
| + | |
− | coord(t,d)=overlap /maxOverlap .
| + | |
− | overlap - the number of query terms matched in the document
| + | |
− | maxOverlap - the total number of terms in the query
| + | |
− | :* queryNorm(q)= 1/sqrt(sumOfSquaredWeights)=1/sqrt(sum(idf("如何")*1+idf("办理户口")))=1/sqrt(1*(1.287682*1.287682+2.386*2.386))=0.3687.
| + | |
− | sumOfSquaredWeights = q.getBoost()*q.getBoost()*∑( idf(t) *t.getBoost() )^2
| + | |
− | | + | |
− | =mutli =
| + | |
− | ==data==
| + | |
− | d0 [{如何,怎么}} {办理,办} {户口,户口本} # 到当地派出所办理 # 如何办理户口
| + | |
− | d1 {办理,办} {户口,户口本} [{流程,步骤}] # 到当地派出所办理 # 如何办理户口
| + | |
− | d2 [{如何,怎么}} {办理,办} {身份证,身份} # 到当地派出所办理 # 如何办理身份证
| + | |
− | d3 {办理,办} {身份证} [{流程,步骤}] # 到当地派出所办理 # 如何办理身份证
| + | |
− | ==搜索==
| + | |
− | code
| + | |
− | BooleanQuery query = new BooleanQuery();
| + | |
− | query.add(paternQuery, Occur.MUST); // or Occur.SHOULD if this clause is optional
| + | |
− | query.add(ansQuery, Occur.SHOULD); // or Occur.MUST if this clause is required
| + | |
− | query.add(sqQuery, Occur.SHOULD);
| + | |
− | search:
| + | |
− | +((question:如何 question:办理户口)^0.8) ((answer:如何 answer:办理户口)^0.2) ((standardq:如何 standardq:办理户口)^0.2)
| + | |
− | | + | |
− | ==result==
| + | |
− | doc=0 score=0.15459718 shardIndex=-1|0.1545972 = (MATCH) product of:
| + | |
− | 0.23189577 = (MATCH) sum of:[all]
| + | |
− | 0.108532876 = (MATCH) product of:[filed:pattern]
| + | |
− | 0.21706575 = (MATCH) sum of:
| + | |
− | 0.21706575 = (MATCH) weight(question:如何 in 0) [DefaultSimilarity], result of:
| + | |
− | 0.21706575 = score(doc=0,freq=1.0 = termFreq=1.0
| + | |
− | ), product of:
| + | |
− | 0.44952247 = queryWeight, product of:
| + | |
− | 1.287682 = idf(docFreq=2, maxDocs=4)
| + | |
− | 0.3490943 = queryNorm
| + | |
− | 0.48288077 = fieldWeight in 0, product of:
| + | |
− | 1.0 = tf(freq=1.0), with freq of:
| + | |
− | 1.0 = termFreq=1.0
| + | |
− | 1.287682 = idf(docFreq=2, maxDocs=4)
| + | |
− | 0.375 = fieldNorm(doc=0)
| + | |
− | 0.5 = coord(1/2)
| + | |
− | 0.12336289 = (MATCH) sum of:[field:answer]
| + | |
− | 0.032918826 = (MATCH) weight(answer:如何 in 0) [DefaultSimilarity], result of:
| + | |
− | 0.032918826 = score(doc=0,freq=1.0 = termFreq=1.0
| + | |
− | ), product of:
| + | |
− | 0.06779904 = queryWeight, product of:
| + | |
− | 0.7768564 = idf(docFreq=4, maxDocs=4)
| + | |
− | 0.087273575 = queryNorm
| + | |
− | 0.48553526 = fieldWeight in 0, product of:
| + | |
− | 1.0 = tf(freq=1.0), with freq of:
| + | |
− | 1.0 = termFreq=1.0
| + | |
− | 0.7768564 = idf(docFreq=4, maxDocs=4)
| + | |
− | 0.625 = fieldNorm(doc=0)
| + | |
− | 0.090444066 = (MATCH) weight(answer:办理户口 in 0) [DefaultSimilarity], result of:
| + | |
− | 0.090444066 = score(doc=0,freq=1.0 = termFreq=1.0
| + | |
− | ), product of:
| + | |
− | 0.11238062 = queryWeight, product of:
| + | |
− | 1.287682 = idf(docFreq=2, maxDocs=4)
| + | |
− | 0.087273575 = queryNorm
| + | |
− | 0.8048013 = fieldWeight in 0, product of:
| + | |
− | 1.0 = tf(freq=1.0), with freq of:
| + | |
− | 1.0 = termFreq=1.0
| + | |
− | 1.287682 = idf(docFreq=2, maxDocs=4)
| + | |
− | 0.625 = fieldNorm(doc=0)
| + | |
− | 0.6666667 = coord(2/3)
| + | |