这是一篇长文......
前面写了好几篇elasticsearch的文章,这跟linux系统管理看起来毫不沾边啊。理由很朴素,想给自己的Ghost blog加个搜索引擎。试了几个都不如意,于是干脆搭建一个elasticsearch自用。
鉴于生产环境的主程序和elasticsearch通常是分开的。不可能直接主程序调用elasticsearch的9200进行查询,通常是会在中间加一层proxy代理,通过api查询。所以有了之前的flask代理api。
现在我们已经从 免费的elasticsearch 有了自己的服务器,我们怎么从Ghost blog来建立Elasticsearch的搜索引擎呢?
第一步:添加个博客的url,列出所有文章:
首先添加ghost的routes,使得能看到所有文章
# vi core/server/routes/frontend.js
...
// redirect to /ghost and let that do the authentication to prevent redirects to /ghost//admin etc.
router.get(/^\/((ghost-admin|admin|wp-admin|dashboard|signin|login)\/?)$/, function redirectToAdmin(req, res) {
utils.redirect301(res, subdir + '/ghost/');
});
// 增加下面这行
router.get('/all_posts/', frontend.all_posts);
然后编辑controllers,在preview前增加all_posts函数
# vi core/server/controllers/frontend/index.js
...
frontendControllers = {
// 增加
all_posts: function all_posts(req, res, next) {
api.posts.browse({
include: 'title,markdown,slug,tags',
page: 1,
limit: 1000
}).then(function (posts) {
if (!posts || posts.length == 0) {
res.end("[]");
} else {
var output = [];
posts.posts.forEach(function (p) {
console.log(p.status)
output.push({
title: p.title,
content: p.markdown,
slug: p.slug,
tags: p.tags.map(function (tag) { return tag.name; }),
updated_at: new Date(p.updated_at)
});
});
res.end(JSON.stringify(output));
}
}).catch(handleError(next));
},
//下面是原来的preview函数
preview: function preview(req, res, next) {
ok,重启。这样我们打开 http://xxx.xxx.xxx.xxx/all_posts/ 就会看到自己博客的所有文章的json格式。
第二步:我们需要写个程序导出数据到bonsai.io的elasticsearch去
首先登陆bonsai.io,打开Interactive Console:
发个请求,建立新索引:
POST /posts
显示ture就ok了
然后打开Manage,记下来那个https打头的地址:
然后回到Ghost服务器上随便建个目录,当然不能放到Ghost的主程序下 ,例如/home/ex/ 生成一个run.js
# cat /home/ex/run.js
var httpRequest = require('request'),
getMetaForPosts = function (posts) {
var requestString = '',
meta;
posts.forEach(function (post) {
meta = {
create: {
_index: "posts",
_type: "post",
_id: ~~(Math.random() * 1000)
}
};
requestString += JSON.stringify(meta) + '\
' + JSON.stringify(post) + '\
';
});
return requestString;
}, validate = function (err, response, body) {
if (err) throw err;
if (response.statusCode != 200) throw body;
};
// callback hell!!
httpRequest({
uri: 'https://xxxxx.bonsai.io'
}, function (err, response, body) {
validate(err, response, body);
console.log("Elastic Search running, deleting posts index...");
httpRequest.del({
uri: 'https://xxxxx.bonsai.io/posts/'
}, function (err, response, body) {
validate(err, response, body)
console.log("Deleted index successfully, recreating 'posts' index...");
httpRequest.put({
uri: 'https://xxxxx.bonsai.io/posts/'
}, function (err, response, body) {
validate(err, response, body);
console.log("Fetching posts...");
httpRequest({
uri: 'http://127.0.0.1:2368/all_posts/'
}, function (err, response, body) {
validate(err, response, body);
var posts = JSON.parse(body);
if (!posts) throw new Error("Could not fetch posts!");
console.log("Fetched " + posts.length + " posts, Bulk Indexing posts...");
httpRequest.post({
uri: 'https://xxxxx.bonsai.io/posts/post/_bulk',
body: getMetaForPosts(posts)
}, function (err, response, body) {
validate(err, response, body);
console.log("Done indexing")
});
});
});
});
});
注意上面四个地方的https地址,换成你自己的哦。
上面的程序用到了request库,所以我们需要装一下:
# cd /home/ex
# npm install request
然后运行这个js,把数据导入bonsai.io:
# cd /home/ex
# node run.js
Elastic Search running, deleting posts index...
Deleted index successfully, recreating 'posts' index...
Fetching posts...
Fetched 115 posts, Bulk Indexing posts...
Done indexing
ok,建立成功,我们去bonsai的控制台发个请求看看:
GET /posts/_search
{
"fields": ["slug", "title", "tags", "updated_at"],
"query": {
"wildcard": {
"_all": {
"wildcard": "vpn*"
}
}
},
"highlight": {
"fields": {
"title": {},
"tags": {},
"content": {}
}
},
"suggest": {
"suggestions": {
"text": "query_text",
"term": {
"field": "_all",
"suggest_mode": "always"
}
}
}
}
显示有数据就对了:
第三步:直接改造ghost,让它支持elasticsearch
本来是可以中间加个api层的,但是vps太弱,再加东西恐怕起不来,所以直接来,如果是正式大公司的生产环境,中间层是必须的。
首先是添加search路由,在all_posts之下再加个search路由:
# vi core/server/routes/frontend.js
...
router.get('/all_posts/', frontend.all_posts);
router.get('/search/', frontend.search_results);
...
然后编辑controllers,在allposts前增加searchresults函数
# vi core/server/controllers/frontend/index.js
...
search_results: function search_results(req, res, next) {
// Build up the search request
var request_data = {
"fields": ["slug", "title", "tags", "updated_at"],
"query": {
"wildcard": {
"_all": {
"wildcard": req.query.q + "*"
}
}
},
"highlight": {
"fields": {
"title": {},
"tags": {},
"content": {}
}
},
"suggest": {
"suggestions": {
"text": req.query.q,
"term": {
"field": "_all",
"suggest_mode": "always"
}
}
}
};
// create the elastic search request
request_data = JSON.stringify(request_data);
var esRequest = require('https').request({
host: 'xxxxxx.bonsai.io',
path: '/posts/_search',
port: 443,
auth: 'xxxxxx:xxxxxx',
method: "POST",
headers: {
'Content-Type': 'application/json',
'Content-Length': request_data.length
}
}, function (esRes) {
var result = '';
esRes.on('data', function (chunk) {
result += chunk;
});
esRes.on('end', function () {
var response = JSON.parse(result);
// render the results
res.render('results', {
results: response,
resultsJSON: JSON.stringify(response, null, 2),
query: req.query.q
});
});
});
// search!
esRequest.write(request_data);
esRequest.end();
},
...
注意上面填写bonsai.io的request方式,其实bonsai的url是分成了好几部分,比如 [https://aaa:bbb@ccc.bonsai.io],那么auth就是aaa:bbb,port就是默认的443,要注意。
大家看到res.render('results',是渲染到了results的模板去,所以我们再在Ghost的主题目录下,建立一个results.hbs模板:
{{!< default}}
{{> "header"}}
<main id="content" class="content" role="main">
<div id="article" class="box">
<div class="category-all-page">
<div class="category-all-title">
搜索结果:{{query}}
</div>
<ul class="category-list">
<li class="category-item">
<h3 class="category-name" id="category1"><i class="fa fa-coffee"></i>{{query}}</h3>
<ul class="post-list">
{{#foreach results.hits.hits}}
<li><a href="/{{fields.slug}}/">{{fields.title}}</a></li>
{{/foreach}}
</ul>
</li>
</ul>
</div>
</div>
</main>
然后重启ghost, 发个链接
http://xxx.xxx.xxx.xxx/search
网页显示如下就ok了:
最后再在模板上加上搜索框就可以了。但是啊,这个还有问题,没有中文分词,哈哈。