ubuntu 16.04 64位 coreseek
环境
操作系统:ubuntu 16.04 64位
数据库:mysql
1.安装升级autoconf
因为coreseek需要autoconf 2.65以上版本,所以需要升级autoconf,不然会报错
./bootstrap: 25: ./bootstrap: autoheader: not found
- automake --add-missing --copy ./bootstrap: 26: ./bootstrap: automake: not found
- autoconf ./bootstrap: 27: ./bootstrap: autoconf: not found 从https://lists.gnu.org/archive/html/autotools-announce/2012-04/msg00004.html 下载,下载好autoconf-2.69.tar.gz后,
安装如下:
tar -zxvf autoconf-2.69.tar.gz cd autoconf-2.69 ./configure make sudo make install
下载coreseek
新版本的coreseek将词典和sphinx源程序放在了一个包中,因此只需要下载coreseek包就可以了,这里我用的是coreseek 4.1版本
wget
http://www.coreseek.cn/uploads/csft/4.0/coreseek-4.1-beta.tar.gz
或者
wget
http://files.opstool.com/man/coreseek-4.1-beta.tar.gz
2.安装mmseg(coreseek所使用的词典)
cd mmseg-3.2.14 ./bootstrap ./configure --prefix=/usr/local/mmseg3 make sudo make install cd ..
安装coreseek(sphinx)
ubuntu 12.04及以上版本需要打补丁,否则下面编译的时候会没有configure文件。补丁解决方案如下:
在 csft-4.1/buildconf.sh 文件中,查找
&& aclocal
后加上
&& automake --add-missing
在 csft-4.1/configure.ac 文件中,查找:
AM_INIT_AUTOMAKE([-Wall -Werror foreign]) 改为:
AM_INIT_AUTOMAKE([-Wall foreign]) 查找:
AC_PROG_RANLIB 后面加上
AM_PROG_AR 在 csft-4.1/src/sphinxexpr.cpp 文件中, 替换所有:
T val = ExprEval ( this->m_pArg, tMatch ); 成为:
T val = this->ExprEval ( this->m_pArg, tMatch ); 安装libmysqlclient15-dev,否则会报找不到mysql
sudo apt-get install libmysqlclient15-dev
安装coreseek
cd csft-4.1 sh buildconf.sh
#输出的warning信息可以忽略,如果出现error则需要解决
./configure --prefix=/usr/local/coreseek --without-unixodbc --with-mmseg --with-mmseg-includes=/usr/local/mmseg3/include/mmseg/ --with-mmseg-libs=/usr/local/mmseg3/lib/ --with-mysql make sudo make install cd ..
测试mmseg分词及coreseek搜索
cd testpack cat var/test/test.xml
#此时应该正确显示中文
/usr/local/mmseg3/bin/mmseg -d /usr/local/mmseg3/etc var/test/test.xml /usr/local/coreseek/bin/indexer -c etc/csft.conf --all /usr/local/coreseek/bin/search -c etc/csft.conf
网络搜索 此时正确的应该返回
words: 1.网络: 1 documents, 1 hits 2.搜索: 2 documents, 5 hits
配置coreseek
创建配置文件 /usr/local/coreseek/etc/sphinx.conf,内容示例如下:
source shandian
{
type = mysql
sql_host = 127.0.0.1
sql_user = root
sql_pass = ******
sql_db = map
sql_port = 3306
sql_query_pre = SET NAMES utf8
sql_query = SELECT `id`, 1 AS table_id, `name`, `phone`,`email`, `idcard` FROM shandian
sql_attr_uint = table_id #从SQL读取到的值必须为整数
#sql_attr_timestamp = date_added #从SQL读取到的值必须为整数,作为时间属性
#sql_query_info_pre = SET NAMES utf8 #命令行查询时,设置正确的字符集
#sql_query_info = SELECT * WHERE ID=$id #命令行查询时,从数据库读取原始数据信息
}
source diyi : shandian
{
sql_query = SELECT `id`, 2 AS table_id, `name`, `phone`,`email`, `idcard` FROM diyi
}
source carinfo : shandian
{
sql_query = SELECT `id`, 3 AS table_id, `name`, `phone`,`email`, `idcard` FROM carinfo
}
source customer : shandian
{
sql_query = SELECT `id`, 4 AS table_id, `name`, `phone`,`email`, `idcard` FROM customer
}
source fangzhu : shandian
{
sql_query = SELECT `id`, 5 AS table_id, `name`, `phone`,`email`, `idcard` FROM fangzhu
}
source gimfch : shandian
{
sql_query = SELECT `id`, 6 AS table_id, `name`, `phone`,`email`, `idcard` FROM gimfch
}
source glo : shandian
{
sql_query = SELECT `id`, 7 AS table_id, `name`, `phone`,`email`, `idcard` FROM glo
}
source wubbs : shandian
{
sql_query = SELECT `id`, 8 AS table_id, `name`, `phone`,`email`, `idcard` FROM wubbs
}
source contact : shandian
{
sql_query = SELECT `id`, 9 AS table_id, `name`, `phone`,`email`, `idcard` FROM contact
}
source huanghe : shandian
{
sql_query = SELECT `id`, 10 AS table_id, `name`, `phone`,`email`, `idcard` FROM huanghe
}
source xianjin : shandian
{
sql_query = SELECT `id`, 11 AS table_id, `name`, `phone`,`email`, `idcard` FROM xianjin
}
#index定
index shandian
{
source = shandian #对应的source名称
path = /usr/local/coreseek/var/log/shandian #请修改为实际使用的绝对路径例如:/usr/local/coreseek/var/...
docinfo = extern
mlock = 0
morphology = none
min_word_len = 1
ondisk_dict = 1
html_strip = 0
#中文分词配置,详情请查看:http://www.coreseek.cn/products-install/coreseek_mmseg/
charset_dictpath = /usr/local/mmseg3/etc/ #Windows环境下设置,/符号结尾,最好给出绝对路径,例如:C:/usr/local/coreseek/etc/...
charset_type = zh_cn.utf-8
enable_star = 0 #不使用通配符,默认不启用,可以不写
min_infix_len=1 #使用中缀索引,并且最小索引为1,关于该项作用不知者可以查询手册
infix_fields= name,phone,email,idcard #因为中缀索引会使索引量急剧膨胀,所以最好选择你认为最主要的少量几个字段做中缀索引。(模糊查询)
}
index diyi : shandian
{
source = diyi
path = /usr/local/coreseek/var/log/diyi
}
index carinfo : shandian
{
source = carinfo
path = /usr/local/coreseek/var/log/carinfo
}
index customer : shandian
{
source = customer
path = /usr/local/coreseek/var/log/customer
}
index fangzhu : shandian
{
source = fangzhu
path = /usr/local/coreseek/var/log/fangzhu
}
index gimfch : shandian
{
source = gimfch
path = /usr/local/coreseek/var/log/gimfch
}
index glo : shandian
{
source = glo
path = /usr/local/coreseek/var/log/glo
}
index wubbs : shandian
{
source = wubbs
path = /usr/local/coreseek/var/log/wubbs
}
index contact : shandian
{
source = contact
path = /usr/local/coreseek/var/log/contact
}
index huanghe : shandian
{
source = huanghe
path = /usr/local/coreseek/var/log/huanghe
}
index xianjin : shandian
{
source = xianjin
path = /usr/local/coreseek/var/log/xianjin
}
#全局index定义
indexer
{
mem_limit = 512M #默认32M,不要太大,否则会报错
}
#searchd服务定义
searchd
{
listen = 9312 #查询服务监听端口,开启了才会工作
read_timeout = 5 #超时
max_children = 30 #最大进程
max_matches = 1000 #返回1000条
seamless_rotate = 0
preopen_indexes = 0
unlink_old = 1
pid_file = /usr/local/coreseek/var/log/searchd.pid #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
log = /usr/local/coreseek/var/log/searchd.log #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
query_log = /usr/local/coreseek/var/log/query.log #请修改为实际使用的绝对路径,例如:/usr/local/coreseek/var/...
binlog_path = #关闭binlog日志
compat_sphinxql_magics = 0
}
启动和管理coreseek
生成索引
sudo /usr/local/coreseek/bin/indexer --config /usr/local/coreseek/etc/sphinx.conf --all #重新生成索引
sudo /usr/local/coreseek/bin/indexer --config /usr/local/coreseek/etc/sphinx.conf --all --rotate #启动守护进程,启动后就不需要关闭
sudo /usr/local/coreseek/bin/searchd --config /usr/local/coreseek/etc/sphinx.conf #关闭守护进程
sudo /usr/local/coreseek/bin/searchd --stop 加入开机启动
打开 /etc/rc.local ,添加启动代码
/usr/local/coreseek/bin/searchd --config /usr/local/coreseek/etc/sphinx.conf 定时更新索引
加入计划任务,半小时重建一次索引。
crontab -e 加入下面这句代码
*/30 * * * * /bin/bash /usr/local/coreseek/bin/indexer --config /usr/local/coreseek/etc/sphinx.conf --all --rotate