Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

independent remote dictionary support. #678

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public class IkAnalyzerProvider extends AbstractIndexAnalyzerProvider<IKAnalyzer
public IkAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings,boolean useSmart) {
super(indexSettings, name, settings);

Configuration configuration=new Configuration(env,settings).setUseSmart(useSmart);
Configuration configuration = new Configuration(env, settings, indexSettings.getIndex().getName()).setUseSmart(useSmart);

analyzer=new IKAnalyzer(configuration);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public class IkTokenizerFactory extends AbstractTokenizerFactory {

public IkTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, settings);
configuration=new Configuration(env,settings);
configuration=new Configuration(env,settings, indexSettings.getIndex().getName());
}

public static IkTokenizerFactory getIkTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
Expand Down
19 changes: 15 additions & 4 deletions src/main/java/org/wltea/analyzer/cfg/Configuration.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,22 @@ public class Configuration {
//是否启用小写处理
private boolean enableLowercase=true;

private String indexName;

private Dictionary dictionary;

@Inject
public Configuration(Environment env,Settings settings) {
public Configuration(Environment env,Settings settings, String indexName) {
this.environment = env;
this.settings=settings;

this.useSmart = settings.get("use_smart", "false").equals("true");
this.enableLowercase = settings.get("enable_lowercase", "true").equals("true");
this.enableRemoteDict = settings.get("enable_remote_dict", "true").equals("true");

Dictionary.initial(this);

}
this.indexName = indexName;
this.dictionary = new Dictionary(this);
}

public Path getConfigInPluginDir() {
return PathUtils
Expand Down Expand Up @@ -72,4 +75,12 @@ public boolean isEnableRemoteDict() {
public boolean isEnableLowercase() {
return enableLowercase;
}

public String getIndexName() {
return indexName;
}

public Dictionary getDictionary() {
return dictionary;
}
}
2 changes: 1 addition & 1 deletion src/main/java/org/wltea/analyzer/core/AnalyzeContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ Lexeme getNextLexeme(){
while(result != null){
//数量词合并
this.compound(result);
if(Dictionary.getSingleton().isStopWord(this.segmentBuff , result.getBegin() , result.getLength())){
if(cfg.getDictionary().isStopWord(this.segmentBuff , result.getBegin() , result.getLength())){
//是停止词继续取列表的下一个
result = this.results.pollFirst();
}else{
Expand Down
10 changes: 6 additions & 4 deletions src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
*/
package org.wltea.analyzer.core;

import org.wltea.analyzer.cfg.Configuration;
import org.wltea.analyzer.dic.Dictionary;
import org.wltea.analyzer.dic.Hit;

Expand All @@ -41,10 +42,11 @@ class CJKSegmenter implements ISegmenter {
static final String SEGMENTER_NAME = "CJK_SEGMENTER";
//待处理的分词hit队列
private List<Hit> tmpHits;
private Configuration configuration;


CJKSegmenter(){
CJKSegmenter(Configuration configuration){
this.tmpHits = new LinkedList<Hit>();
this.configuration = configuration;
}

/* (non-Javadoc)
Expand All @@ -58,7 +60,7 @@ public void analyze(AnalyzeContext context) {
//处理词段队列
Hit[] tmpArray = this.tmpHits.toArray(new Hit[this.tmpHits.size()]);
for(Hit hit : tmpArray){
hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
hit = configuration.getDictionary().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
if(hit.isMatch()){
//输出当前的词
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_CNWORD);
Expand All @@ -77,7 +79,7 @@ public void analyze(AnalyzeContext context) {

//*********************************
//再对当前指针位置的字符进行单字匹配
Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
Hit singleCharHit = configuration.getDictionary().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
if(singleCharHit.isMatch()){//首字成词
//输出当前的词
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import java.util.List;
import java.util.Set;

import org.wltea.analyzer.cfg.Configuration;
import org.wltea.analyzer.dic.Dictionary;
import org.wltea.analyzer.dic.Hit;

Expand Down Expand Up @@ -65,12 +66,15 @@ class CN_QuantifierSegmenter implements ISegmenter{

//待处理的量词hit队列
private List<Hit> countHits;

private Configuration configuration;


CN_QuantifierSegmenter(){
CN_QuantifierSegmenter(Configuration configuration){
nStart = -1;
nEnd = -1;
this.countHits = new LinkedList<Hit>();
this.configuration = configuration;
}

/**
Expand Down Expand Up @@ -153,7 +157,7 @@ private void processCount(AnalyzeContext context){
//处理词段队列
Hit[] tmpArray = this.countHits.toArray(new Hit[this.countHits.size()]);
for(Hit hit : tmpArray){
hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
hit = configuration.getDictionary().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
if(hit.isMatch()){
//输出当前的词
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_COUNT);
Expand All @@ -172,7 +176,7 @@ private void processCount(AnalyzeContext context){

//*********************************
//对当前指针位置的字符进行单字匹配
Hit singleCharHit = Dictionary.getSingleton().matchInQuantifierDict(context.getSegmentBuff(), context.getCursor(), 1);
Hit singleCharHit = configuration.getDictionary().matchInQuantifierDict(context.getSegmentBuff(), context.getCursor(), 1);
if(singleCharHit.isMatch()){//首字成量词词
//输出当前的词
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_COUNT);
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/wltea/analyzer/core/IKSegmenter.java
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ private List<ISegmenter> loadSegmenters(){
//处理字母的子分词器
segmenters.add(new LetterSegmenter());
//处理中文数量词的子分词器
segmenters.add(new CN_QuantifierSegmenter());
segmenters.add(new CN_QuantifierSegmenter(configuration));
//处理中文词的子分词器
segmenters.add(new CJKSegmenter());
segmenters.add(new CJKSegmenter(configuration));
return segmenters;
}

Expand Down
Loading