2000字范文,分享全网优秀范文,学习好帮手!
2000字范文 > ElasticSearch最新版(8.4.3) IK分词器基于mysql实现热更新词库

ElasticSearch最新版(8.4.3) IK分词器基于mysql实现热更新词库

时间:2021-10-09 14:33:51

相关推荐

ElasticSearch最新版(8.4.3) IK分词器基于mysql实现热更新词库

1 源码阅读思路

阅读Dictionary类下的initial方法,基于该方法进行改造

2 下载源码

/medcl/elasticsearch-analysis-ik/releases/tag/v8.4.3

3 导入idea

4 创建jdbc配置文件

jdbc.url=jdbc:mysql://192.168.38.80:3306/word_db?useUnicode=true&characterEncoding=UTF-8&useSSL=false&serverTimezone=Asia/Shanghaijdbc.user=rootjdbc.password=123456jdbc.reload.word.extend.sql=select word from t_extend_wordjdbc.reload.word.stop.sql=select word from t_stop_wordjdbc.driver.class.name=com.mysql.jdbc.Driver

5 防止初始化数据库驱动报错ExceptionInInitializerError 修改文件resource/plugin-security.policy

添加权限 permission java.lang.RuntimePermission “setContextClassLoader”;

grant {// needed because of the hot reload functionalitypermission .SocketPermission "*", "connect,resolve";permission java.lang.RuntimePermission "setContextClassLoader";};

6 Dictionary 新增方法

void reloadMysqlStopWord(DictSegment _StopWords) {logger.info("start to reload mysql stopWord.");this._StopWords = _StopWords;}void reloadMysqlExtendWord(DictSegment _MainDict) {logger.info("start to reload mysql extendWord.");this._MainDict = _MainDict;}

7 创建WordMysqlRunnable

package org.wltea.analyzer.dic;import org.apache.logging.log4j.Logger;import org.elasticsearch.SpecialPermission;import org.wltea.analyzer.help.ESPluginLoggerFactory;import java.security.AccessController;import java.security.PrivilegedAction;import java.sql.*;import java.util.List;import java.util.Properties;public class WordMysqlRunnable implements Runnable {private static final Logger logger = ESPluginLoggerFactory.getLogger(WordMysqlRunnable.class.getName());/*** mysql配置文件*/private final Properties properties;/*** type: extend、stop*/private final String type;public static final String EXTEND = "extend";public static final String STOP = "stop";public WordMysqlRunnable(Properties properties, String type) {AccessController.doPrivileged((PrivilegedAction<Void>) () -> {try {Class.forName(properties.getProperty("jdbc.driver.class.name"));} catch (ClassNotFoundException e) {throw new RuntimeException(e);}return null;});this.properties = properties;this.type = type;}@Overridepublic void run() {SpecialPermission.check();AccessController.doPrivileged((PrivilegedAction<Void>) () -> {this.loadWord();return null;});}private void loadWord() {logger.info("WordMysqlRunnable invoke run");Connection conn = null;Statement stmt = null;try {conn = DriverManager.getConnection(properties.getProperty("jdbc.url"), properties.getProperty("jdbc.user"), properties.getProperty("jdbc.password"));stmt = conn.createStatement();if (type.equals(EXTEND)) {loadExtendWord(stmt);} else if (type.equals(STOP)) {loadStopWord(stmt);}} catch (Exception e) {logger.error(e);} finally {if (stmt != null) {try {stmt.close();} catch (SQLException e) {logger.error(e);}}if (conn != null) {try {conn.close();} catch (SQLException e) {logger.error(e);}}}}private void loadStopWord(Statement stmt) throws SQLException {logger.info("WordMysqlRunnable invoke loadStopWord");// 建立主词典实例DictSegment _StopWords = new DictSegment((char) 0);//执行sqlString sql = properties.getProperty("jdbc.reload.word.stop.sql");ResultSet resultSet = stmt.executeQuery(sql);while (resultSet.next()) {String word = resultSet.getString("word");_StopWords.fillSegment(word.trim().toLowerCase().toCharArray());}//加载Dictionary.getSingleton().reloadMysqlStopWord(_StopWords);}private void loadExtendWord(Statement stmt) throws SQLException {logger.info("WordMysqlRunnable invoke loadExtendWord");// 建立一个主词典实例DictSegment _MainDict = new DictSegment((char) 0);//执行sqlString sql = properties.getProperty("jdbc.reload.word.extend.sql");ResultSet resultSet = stmt.executeQuery(sql);while (resultSet.next()) {String word = resultSet.getString("word");_MainDict.fillSegment(word.trim().toLowerCase().toCharArray());}//加载Dictionary.getSingleton().reloadMysqlExtendWord(_MainDict);}}

8 修改方法Dictionary initial 方法

public static synchronized void initial(Configuration cfg) {if (singleton == null) {synchronized (Dictionary.class) {if (singleton == null) {singleton = new Dictionary(cfg);singleton.loadMainDict();singleton.loadSurnameDict();singleton.loadQuantifierDict();singleton.loadSuffixDict();singleton.loadPrepDict();singleton.loadStopWordDict();//自定义mysql词汇singleton.loadMysqlWord();if (cfg.isEnableRemoteDict()) {// 建立监控线程for (String location : singleton.getRemoteExtDictionarys()) {// 10 秒是初始延迟可以修改的 60是间隔时间 单位秒pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);}for (String location : singleton.getRemoteExtStopWordDictionarys()) {pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);}}}}}}private void loadMysqlWord() {try {Properties properties = new Properties();Path path = PathUtils.get(getDictRoot(), "jdbc-reload.properties");properties.load(Files.newInputStream(path.toFile().toPath()));pool.scheduleAtFixedRate(new WordMysqlRunnable(properties, WordMysqlRunnable.EXTEND), 10, 60, TimeUnit.SECONDS);pool.scheduleAtFixedRate(new WordMysqlRunnable(properties, WordMysqlRunnable.STOP), 10, 60, TimeUnit.SECONDS);} catch (IOException e) {logger.error("e", e);}}

9 pom版本需要跟ik分词版本一致

10 pom中新增mysql依赖

<!-- MySql --><dependency><groupId>mysql</groupId><artifactId>mysql-connector-java</artifactId><version>5.1.47</version></dependency>

11 修改plugin.xml 添加mysql配置

<dependencySet><outputDirectory>/</outputDirectory><useProjectArtifact>true</useProjectArtifact><useTransitiveFiltering>true</useTransitiveFiltering><includes><include>mysql:mysql-connector-java</include></includes></dependencySet>

12 打包

13 替换、重启

unzip elasticsearch-analysis-ik-8.4.3.zip -d ik-analyzerrm -rf elasticsearch-analysis-ik-8.4.3.zipchmod +777 ik-analyzer/docker restart elasticsearch

14 测试

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。