前面说了如何申请免费的bonsai.io elasticsearch搜索引擎,麻烦的是没有中文分词,而且试用期有限。没办法,自己搭建Elasticsearch服务器来做Ghost搜索吧。
那么,我们怎么从Ghost blog来建立Elasticsearch的搜索引擎呢?
第一步:搭建Elasticsearch服务器并安装ik分词
wget https://download.elastic.co/elasticsearch/release/org/elasticsearch/distribution/tar/elasticsearch/2.3.3/elasticsearch-2.3.3.tar.gz
vi elasticsearch-2.3.3/config/elasticsearch.yml
...
network.host: 162.216.122.100,127.0.0.1
http.port: 12530
...
cd elasticsearch-2.3.3/bin
./elasticsearch -Des.insecure.allow.root=true -d
./plugin install mobz/elasticsearch-head
装ik: 直接编译ik 1.9.3的版本,得到一个zip包,把包解压到elasticsearch的plugin/ik下就完成了,什么都不用改,就ok了
下载:elasticsearch-analysis-ik-1.9.3.zip
注意,上面的ik 1.9.3是用java 1.8编译出来的
然后:
mkdir -p elasticsearch-2.3.3/plugins/ik
unzip -x elasticsearch-analysis-ik-1.9.3.zip -d elasticsearch-2.3.3/plugins/ik
就OK了啊,elasticsearch.yml 不做任何修改!!!
然后测试一下,用浏览器访问下面这个地址:
http://172.16.11.2:9200/_analyze?analyzer=ik&pretty=true&text=我爱北京天安门
第二步:添加个博客的url,列出所有文章:
首先添加ghost的routes,使得能看到所有文章
# vi core/server/routes/frontend.js
...
// redirect to /ghost and let that do the authentication to prevent redirects to /ghost//admin etc.
router.get(/^\/((ghost-admin|admin|wp-admin|dashboard|signin|login)\/?)$/, function redirectToAdmin(req, res) {
utils.redirect301(res, subdir + '/ghost/');
});
// 增加下面这行
router.get('/all_posts/', frontend.all_posts);
然后编辑controllers,在preview前增加all_posts函数
# vi core/server/controllers/frontend/index.js
...
frontendControllers = {
// 增加
all_posts: function all_posts(req, res, next) {
api.posts.browse({
include: 'title,markdown,slug,tags',
page: 1,
limit: 1000
}).then(function (posts) {
if (!posts || posts.length == 0) {
res.end("[]");
} else {
var output = [];
posts.posts.forEach(function (p) {
console.log(p.status)
output.push({
title: p.title,
content: p.markdown,
slug: p.slug,
tags: p.tags.map(function (tag) { return tag.name; }),
updated_at: new Date(p.updated_at)
});
});
res.end(JSON.stringify(output));
}
}).catch(handleError(next));
},
//下面是原来的preview函数
preview: function preview(req, res, next) {
ok,重启。这样我们打开 http://xxx.xxx.xxx.xxx/all_posts/ 就会看到自己博客的所有文章的json格式。
第三步:我们需要写个程序导出数据到自建的elasticsearch去
注意,这里跟bonsai.io不同的地方在于,一是我们提交的数据中有中文了,二是我们没有改变elasticsearch的缺省配置,所以在索引中我们需要强制指定用ik分词。
首先我们发个请求,在elasticsearch上建立posts这个索引
curl -XPUT http://162.216.122.100:12530/posts
然后回到Ghost服务器上随便建个目录,当然不能放到Ghost的主程序下 ,例如/home/ex/ 生成一个run.js
# cat /home/ex/run.js
var httpRequest = require('request'),
getMetaForPosts = function (posts) {
var requestString = '',
meta;
posts.forEach(function (post) {
meta = {
index: {
_index: "posts",
_type: "post",
_id: ~~(Math.random() * 1000)
}
};
requestString += JSON.stringify(meta) + '\
' + '\n' + JSON.stringify(post) + '\
' + '\n';
});
//console.log(requestString);
return requestString;
},
validate = function (err, response, body) {
if (err) throw err;
if (response.statusCode != 200) throw body;
};
// callback hell!!
httpRequest({
uri: 'http://162.216.122.100:12530'
}, function (err, response, body) {
validate(err, response, body);
console.log("Elastic Search running, deleting posts index...");
httpRequest.del({
uri: 'http://162.216.122.100:12530/posts/'
}, function (err, response, body) {
validate(err, response, body)
console.log("Deleted index successfully, recreating 'posts' index...");
httpRequest.put({
uri: 'http://162.216.122.100:12530/posts/'
}, function (err, response, body) {
validate(err, response, body);
console.log("Create mapping...");
httpRequest.post({
uri: 'http://162.216.122.100:12530/posts/post/_mapping',
body: '{ "post": { "_all": { "analyzer": "ik_max_word", "search_analyzer": "ik_max_word", "term_vector": "no", "store": "false" }, "properties": { "content": { "type": "string", "store": "no", "term_vector": "with_positions_offsets", "analyzer": "ik_max_word", "search_analyzer": "ik_max_word", "include_in_all": "true", "boost": 8 } } } }'
}, function (err, response, body) {
validate(err, response, body);
console.log("Fetch posts...")
httpRequest({
uri: 'http://127.0.0.1:2368/all_posts/'
}, function (err, response, body) {
validate(err, response, body);
var posts = JSON.parse(body);
if (!posts) throw new Error("Could not fetch posts!");
console.log("Fetched " + posts.length + " posts, Bulk Indexing posts...");
httpRequest.post({
uri: 'http://162.216.122.100:12530/posts/post/_bulk',
body: getMetaForPosts(posts)
}, function (err, response, body) {
validate(err, response, body);
console.log("Done indexing")
});
});
});
});
});
});
注意上面5个地方的uri地址,换成你自己的哦。
另外跟bonsai不同的两个地方:
一、建索引采用了新的语法
/posts/post/_bulk
{"index":{"_index":"posts","_type":"post","_id":634}}
{"title":"","content":"","slug"......}
{"index":{"_index":"posts","_type":"post","_id":635}}
{"title":"","content":"","slug"......}
二、修改了/posts/post/_mapping,改成了ik分词
{
"post": {
"_all": {
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word",
"term_vector": "no",
"store": "false"
},
"properties": {
"content": {
"type": "string",
"store": "no",
"term_vector": "with_positions_offsets",
"analyzer": "ik_max_word",
"search_analyzer": "ik_max_word",
"include_in_all": "true",
"boost": 8
}
}
}
}
上面的程序用到了request库,所以我们需要装一下:
# cd /home/ex
# npm install request
然后运行这个js,把数据导入elasticsearch服务器:
# cd /home/ex
# node run.js
Elastic Search running, deleting posts index...
Deleted index successfully, recreating 'posts' index...
Fetching posts...
Fetched 164 posts, Bulk Indexing posts...
Done indexing
ok,建立成功,我们去elastic的控制台浏览一下数据:
显示有中文数据就对了:
第四步:直接改造ghost,让它支持elasticsearch
本来是可以中间加个api层的,但是vps太弱,再加东西恐怕起不来,所以直接来,如果是正式大公司的生产环境,中间层是必须的。
首先是添加search路由,在all_posts之下再加个search路由:
# vi core/server/routes/frontend.js
...
router.get('/all_posts/', frontend.all_posts);
router.get('/search/', frontend.search_results);
...
然后编辑controllers,在allposts前增加searchresults函数
# vi core/server/controllers/frontend/index.js
...
search_results: function search_results(req, res, next) {
// Build up the search request
var request_data = {
"fields": ["slug", "title", "tags", "updated_at"],
"query": {
"wildcard": {
"_all": {
"wildcard": req.query.q + "*"
}
}
},
"highlight": {
"fields": {
"title": {},
"tags": {},
"content": {}
}
},
"suggest": {
"suggestions": {
"text": req.query.q,
"term": {
"field": "_all",
"suggest_mode": "always"
}
}
}
};
// create the elastic search request
request_data = JSON.stringify(request_data);
var esRequest = require('http').request({
host: '162.216.122.100',
path: '/posts/post/_search',
port: 12530,
method: "POST",
headers: {
'Content-Type': 'application/json',
//'Content-Length': request_data.length
'Content-Length': Buffer.byteLength(request_data, 'utf8')
}
}, function (esRes) {
var result = '';
esRes.on('data', function (chunk) {
result += chunk;
});
esRes.on('end', function () {
var response = JSON.parse(result);
// render the results
res.render('results', {
results: response,
resultsJSON: JSON.stringify(response, null, 2),
query: req.query.q
});
});
});
// search!
esRequest.write(request_data);
esRequest.end();
},
...
注意:跟bonsai.io的不同,因为提交了中文数据,所以content的长度的计算方法不同啊!
//'Content-Length': request_data.length
'Content-Length': Buffer.byteLength(request_data, 'utf8')
大家看到res.render('results',是渲染到了results的模板去,所以我们再在Ghost的主题目录下,建立一个results.hbs模板:
{{!< default}}
{{> "header"}}
<main id="content" class="content" role="main">
<div id="article" class="box">
<div class="category-all-page">
<div class="category-all-title">
搜索结果:{{query}}
</div>
<ul class="category-list">
<li class="category-item">
<h3 class="category-name" id="category1"><i class="fa fa-coffee"></i>{{query}}</h3>
<ul class="post-list">
{{#foreach results.hits.hits}}
<li><a href="/{{fields.slug}}/">{{fields.title}}</a></li>
{{/foreach}}
</ul>
</li>
</ul>
</div>
</div>
</main>
然后重启ghost, 发个链接
http://xxx.xxx.xxx.xxx/search?q=vpn
网页显示如下就ok了:
最后再在模板上加上搜索框就可以了。
<div class="about-me">
<h5>搜索</h5>
<form action="/search">
<input type="text" name="q"></input>
</form>
</div>
现在,这个博客用的就是这样的搜索,把这个话题扩大化,可以做个爬虫,然后提交到搜索,就可以搞出一个自己关心的搜索引擎了。
`