infinilabs · Serendo · Apr 29, 2020
diff --git a/src/main/java/org/wltea/analyzer/cfg/Configuration.java b/src/main/java/org/wltea/analyzer/cfg/Configuration.java
@@ -23,6 +23,8 @@ public class Configuration {
 
 	//是否启用远程词典加载
 	private boolean enableRemoteDict=false;
+	//自定义词库名
+	private String remoteDictName="";
 
 	//是否启用小写处理
 	private boolean enableLowercase=true;
@@ -36,6 +38,7 @@ public Configuration(Environment env,Settings settings) {
 		this.useSmart = settings.get("use_smart", "false").equals("true");
 		this.enableLowercase = settings.get("enable_lowercase", "true").equals("true");
 		this.enableRemoteDict = settings.get("enable_remote_dict", "true").equals("true");
+		this.remoteDictName = settings.get("custom_dict_name", "");
 
 		Dictionary.initial(this);
 
@@ -68,6 +71,9 @@ public Settings getSettings() {
 	public boolean isEnableRemoteDict() {
 		return enableRemoteDict;
 	}
+	public String getRemoteDictName() {
+		return remoteDictName;
+	}
 
 	public boolean isEnableLowercase() {
 		return enableLowercase;

diff --git a/src/main/java/org/wltea/analyzer/core/AnalyzeContext.java b/src/main/java/org/wltea/analyzer/core/AnalyzeContext.java
@@ -322,7 +322,7 @@ Lexeme getNextLexeme(){
 		while(result != null){
     		//数量词合并
     		this.compound(result);
-    		if(Dictionary.getSingleton().isStopWord(this.segmentBuff ,  result.getBegin() , result.getLength())){
+    		if(Dictionary.getDictionary(cfg.getRemoteDictName()).isStopWord(this.segmentBuff ,  result.getBegin() , result.getLength())){
        			//是停止词继续取列表的下一个
     			result = this.results.pollFirst(); 				
     		}else{

diff --git a/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java b/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
@@ -25,6 +25,7 @@
  */
 package org.wltea.analyzer.core;
 
+import org.wltea.analyzer.cfg.Configuration;
 import org.wltea.analyzer.dic.Dictionary;
 import org.wltea.analyzer.dic.Hit;
 
@@ -39,12 +40,14 @@ class CJKSegmenter implements ISegmenter {
 
 	//子分词器标签
 	static final String SEGMENTER_NAME = "CJK_SEGMENTER";
+	private final Configuration cfg;
 	//待处理的分词hit队列
 	private List<Hit> tmpHits;
 
 
-	CJKSegmenter(){
+	CJKSegmenter(Configuration cfg){
 		this.tmpHits = new LinkedList<Hit>();
+		this.cfg = cfg;
 	}
 
 	/* (non-Javadoc)
@@ -58,7 +61,7 @@ public void analyze(AnalyzeContext context) {
 				//处理词段队列
 				Hit[] tmpArray = this.tmpHits.toArray(new Hit[this.tmpHits.size()]);
 				for(Hit hit : tmpArray){
-					hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
+					hit = Dictionary.getDictionary(cfg.getRemoteDictName()).matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
 					if(hit.isMatch()){
 						//输出当前的词
 						Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_CNWORD);
@@ -77,7 +80,7 @@ public void analyze(AnalyzeContext context) {
 
 			//*********************************
 			//再对当前指针位置的字符进行单字匹配
-			Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
+			Hit singleCharHit = Dictionary.getDictionary(cfg.getRemoteDictName()).matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
 			if(singleCharHit.isMatch()){//首字成词
 				//输出当前的词
 				Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD);

diff --git a/src/main/java/org/wltea/analyzer/core/CN_QuantifierSegmenter.java b/src/main/java/org/wltea/analyzer/core/CN_QuantifierSegmenter.java
@@ -29,6 +29,7 @@
 import java.util.List;
 import java.util.Set;
 
+import org.wltea.analyzer.cfg.Configuration;
 import org.wltea.analyzer.dic.Dictionary;
 import org.wltea.analyzer.dic.Hit;
 
@@ -50,7 +51,9 @@ class CN_QuantifierSegmenter implements ISegmenter{
 			ChnNumberChars.add(nChar);
 		}
 	}
-
+
+	private final Configuration cfg;
+
 	/*
 	 * 词元的开始位置，
 	 * 同时作为子分词器状态标识
@@ -67,10 +70,11 @@ class CN_QuantifierSegmenter implements ISegmenter{
 	private List<Hit> countHits;
 
 
-	CN_QuantifierSegmenter(){
+	CN_QuantifierSegmenter(Configuration cfg){
 		nStart = -1;
 		nEnd = -1;
 		this.countHits  = new LinkedList<Hit>();
+		this.cfg = cfg;
 	}
 
 	/**
@@ -153,7 +157,7 @@ private void processCount(AnalyzeContext context){
 				//处理词段队列
 				Hit[] tmpArray = this.countHits.toArray(new Hit[this.countHits.size()]);
 				for(Hit hit : tmpArray){
-					hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
+					hit = Dictionary.getDictionary(cfg.getRemoteDictName()).matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
 					if(hit.isMatch()){
 						//输出当前的词
 						Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_COUNT);
@@ -172,7 +176,7 @@ private void processCount(AnalyzeContext context){
 
 			//*********************************
 			//对当前指针位置的字符进行单字匹配
-			Hit singleCharHit = Dictionary.getSingleton().matchInQuantifierDict(context.getSegmentBuff(), context.getCursor(), 1);
+			Hit singleCharHit = Dictionary.getDictionary(cfg.getRemoteDictName()).matchInQuantifierDict(context.getSegmentBuff(), context.getCursor(), 1);
 			if(singleCharHit.isMatch()){//首字成量词词
 				//输出当前的词
 				Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_COUNT);

diff --git a/src/main/java/org/wltea/analyzer/core/IKSegmenter.java b/src/main/java/org/wltea/analyzer/core/IKSegmenter.java
@@ -79,9 +79,9 @@ private List<ISegmenter> loadSegmenters(){
 		//处理字母的子分词器
 		segmenters.add(new LetterSegmenter()); 
 		//处理中文数量词的子分词器
-		segmenters.add(new CN_QuantifierSegmenter());
+		segmenters.add(new CN_QuantifierSegmenter(this.configuration));
 		//处理中文词的子分词器
-		segmenters.add(new CJKSegmenter());
+		segmenters.add(new CJKSegmenter(this.configuration));
 		return segmenters;
 	}
 

diff --git a/src/main/java/org/wltea/analyzer/dic/Dictionary.java b/src/main/java/org/wltea/analyzer/dic/Dictionary.java
@@ -67,7 +67,7 @@ public class Dictionary {
 	/*
 	 * 词典单子实例
 	 */
-	private static Dictionary singleton;
+	private static Map<String, Dictionary> dictionaryMap = new HashMap<String, Dictionary>();
 
 	private DictSegment _MainDict;
 
@@ -143,26 +143,25 @@ private String getProperty(String key){
 	 * @return Dictionary
 	 */
 	public static synchronized void initial(Configuration cfg) {
-		if (singleton == null) {
+		if (!dictionaryMap.containsKey(cfg.getRemoteDictName())) {
 			synchronized (Dictionary.class) {
-				if (singleton == null) {
-
-					singleton = new Dictionary(cfg);
-					singleton.loadMainDict();
-					singleton.loadSurnameDict();
-					singleton.loadQuantifierDict();
-					singleton.loadSuffixDict();
-					singleton.loadPrepDict();
-					singleton.loadStopWordDict();
-
+				if (!dictionaryMap.containsKey(cfg.getRemoteDictName())) {
+					Dictionary newDict = new Dictionary(cfg);
+					newDict.loadMainDict();
+					newDict.loadSurnameDict();
+					newDict.loadQuantifierDict();
+					newDict.loadSuffixDict();
+					newDict.loadPrepDict();
+					newDict.loadStopWordDict();
+					dictionaryMap.put(cfg.getRemoteDictName(), newDict);
 					if(cfg.isEnableRemoteDict()){
 						// 建立监控线程
-						for (String location : singleton.getRemoteExtDictionarys()) {
+						for (String location : dictionaryMap.get(cfg.getRemoteDictName()).getRemoteExtDictionarys()) {
 							// 10 秒是初始延迟可以修改的 60是间隔时间 单位秒
-							pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);
+							pool.scheduleAtFixedRate(new Monitor(location, cfg.getRemoteDictName()), 10, 60, TimeUnit.SECONDS);
 						}
-						for (String location : singleton.getRemoteExtStopWordDictionarys()) {
-							pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);
+						for (String location : dictionaryMap.get(cfg.getRemoteDictName()).getRemoteExtStopWordDictionarys()) {
+							pool.scheduleAtFixedRate(new Monitor(location, cfg.getRemoteDictName()), 10, 60, TimeUnit.SECONDS);
 						}
 					}
 
@@ -241,6 +240,9 @@ private List<String> getRemoteExtDictionarys() {
 			String[] filePaths = remoteExtDictCfg.split(";");
 			for (String filePath : filePaths) {
 				if (filePath != null && !"".equals(filePath.trim())) {
+					if (this.configuration.getRemoteDictName()!="") {
+						filePath += "/" + this.configuration.getRemoteDictName();
+					}
 					remoteExtDictFiles.add(filePath);
 
 				}
@@ -274,6 +276,9 @@ private List<String> getRemoteExtStopWordDictionarys() {
 			String[] filePaths = remoteExtStopWordDictCfg.split(";");
 			for (String filePath : filePaths) {
 				if (filePath != null && !"".equals(filePath.trim())) {
+					if (this.configuration.getRemoteDictName()!="") {
+						filePath += "/" + this.configuration.getRemoteDictName();
+					}
 					remoteExtStopWordDictFiles.add(filePath);
 
 				}
@@ -287,16 +292,27 @@ private String getDictRoot() {
 	}
 
 
+//	/**
+//	 * 获取词典单子实例
+//	 *
+//	 * @return Dictionary 单例对象
+//	 */
+//	public static Dictionary getSingleton() {
+//		if (singleton == null) {
+//			throw new IllegalStateException("词典尚未初始化，请先调用initial方法");
+//		}
+//		return singleton;
+//	}
 	/**
-	 * 获取词典单子实例
-	 * 
-	 * @return Dictionary 单例对象
+	 * 获取词典实例
+	 *
+	 * @return Dictionary 对象
 	 */
-	public static Dictionary getSingleton() {
-		if (singleton == null) {
+	public static Dictionary getDictionary(String dictName) {
+		if (!dictionaryMap.containsKey(dictName)) {
 			throw new IllegalStateException("ik dict has not been initialized yet, please call initial method first.");
 		}
-		return singleton;
+		return dictionaryMap.get(dictName);
 	}
 
 
@@ -311,7 +327,7 @@ public void addWords(Collection<String> words) {
 			for (String word : words) {
 				if (word != null) {
 					// 批量加载词条到主内存词典中
-					singleton._MainDict.fillSegment(word.trim().toCharArray());
+					this._MainDict.fillSegment(word.trim().toCharArray());
 				}
 			}
 		}
@@ -325,7 +341,7 @@ public void disableWords(Collection<String> words) {
 			for (String word : words) {
 				if (word != null) {
 					// 批量屏蔽词条
-					singleton._MainDict.disableSegment(word.trim().toCharArray());
+					this._MainDict.disableSegment(word.trim().toCharArray());
 				}
 			}
 		}
@@ -337,7 +353,7 @@ public void disableWords(Collection<String> words) {
 	 * @return Hit 匹配结果描述
 	 */
 	public Hit matchInMainDict(char[] charArray) {
-		return singleton._MainDict.match(charArray);
+		return this._MainDict.match(charArray);
 	}
 
 	/**
@@ -346,7 +362,7 @@ public Hit matchInMainDict(char[] charArray) {
 	 * @return Hit 匹配结果描述
 	 */
 	public Hit matchInMainDict(char[] charArray, int begin, int length) {
-		return singleton._MainDict.match(charArray, begin, length);
+		return this._MainDict.match(charArray, begin, length);
 	}
 
 	/**
@@ -355,7 +371,7 @@ public Hit matchInMainDict(char[] charArray, int begin, int length) {
 	 * @return Hit 匹配结果描述
 	 */
 	public Hit matchInQuantifierDict(char[] charArray, int begin, int length) {
-		return singleton._QuantifierDict.match(charArray, begin, length);
+		return this._QuantifierDict.match(charArray, begin, length);
 	}
 
 	/**
@@ -374,7 +390,7 @@ public Hit matchWithHit(char[] charArray, int currentIndex, Hit matchedHit) {
 	 * @return boolean
 	 */
 	public boolean isStopWord(char[] charArray, int begin, int length) {
-		return singleton._StopWords.match(charArray, begin, length).isMatch();
+		return this._StopWords.match(charArray, begin, length).isMatch();
 	}
 
 	/**
@@ -565,7 +581,7 @@ void reLoadMainDict() {
 		logger.info("start to reload ik dict.");
 		// 新开一个实例加载词典，减少加载过程对当前词典使用的影响
 		Dictionary tmpDict = new Dictionary(configuration);
-		tmpDict.configuration = getSingleton().configuration;
+		tmpDict.configuration = this.configuration;
 		tmpDict.loadMainDict();
 		tmpDict.loadStopWordDict();
 		_MainDict = tmpDict._MainDict;

diff --git a/src/main/java/org/wltea/analyzer/dic/Monitor.java b/src/main/java/org/wltea/analyzer/dic/Monitor.java
@@ -18,6 +18,10 @@ public class Monitor implements Runnable {
 	private static final Logger logger = ESPluginLoggerFactory.getLogger(Monitor.class.getName());
 
 	private static CloseableHttpClient httpclient = HttpClients.createDefault();
+	/*
+	 * 自定义词典名，默认为common
+	 */
+	private final String customRemoteDictName;
 	/*
 	 * 上次更改时间
 	 */
@@ -32,10 +36,11 @@ public class Monitor implements Runnable {
 	 */
 	private String location;
 
-	public Monitor(String location) {
+	public Monitor(String location, String customRemoteDictName) {
 		this.location = location;
 		this.last_modified = null;
 		this.eTags = null;
+		this.customRemoteDictName = customRemoteDictName;
 	}
 
 	public void run() {
@@ -84,7 +89,7 @@ public void runUnprivileged() {
 						||((response.getLastHeader("ETag")!=null) && !response.getLastHeader("ETag").getValue().equalsIgnoreCase(eTags))) {
 
 					// 远程词库有更新,需要重新加载词典，并修改last_modified,eTags
-					Dictionary.getSingleton().reLoadMainDict();
+					Dictionary.getDictionary(customRemoteDictName).reLoadMainDict();
 					last_modified = response.getLastHeader("Last-Modified")==null?null:response.getLastHeader("Last-Modified").getValue();
 					eTags = response.getLastHeader("ETag")==null?null:response.getLastHeader("ETag").getValue();
 				}