全文搜索分析引擎 Elasticsearch
Elasticsearch 是一个分布式的搜索和分析引擎,可以用于全文检索、结构化检索和分析,并能将这三者结合起来。
安装环境
https://www.elastic.co/cn/downloads/elasticsearch
https://www.elastic.co/cn/downloads/kibana
Kibana: 可视化管理ElasticSearch工具
https://github.com/medcl/elasticsearch-analysis-ik
https://github.com/siddontang/go-mysql-elasticsearch
快速上手
安装客户端
1
| composer "elasticsearch/elasticsearch:^6.7"
|
实例化客户端
1 2 3 4
| $client = ClientBuilder::create() ->setHosts(["localhost:9200"]) ->setRetries(0) ->build();
|
索引一个文档
索引一个文档: 写 document 到 ElasticSearch 中,如果不存在,就创建,如果存在,就用新的取代旧的。
1 2 3 4 5 6 7 8 9 10 11 12 13
| $params = [ 'index' => 'test_index_1', 'type' => 'test_type_1', 'id' => 'test_id_1', 'body' => [ 'testField' => 'abc', 'testField2' => 'caoxl', 'testField3' => 'caoxl_123' ], ];
$response = $client->index($params); dd($response);
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
| array:8 [ "_index" => "test_index_1" "_type" => "test_type_1" "_id" => "test_id_1" "_version" => 1 "result" => "created" "_shards" => array:3 [ "total" => 2 "successful" => 1 "failed" => 0 ] "_seq_no" => 0 "_primary_term" => 1 ]
array:8 [ "_index" => "test_index_1" "_type" => "test_type_1" "_id" => "test_id_1" "_version" => 2 "result" => "updated" "_shards" => array:3 [ "total" => 1 "successful" => 1 "failed" => 0 ] "_seq_no" => 1 "_primary_term" => 6 ]
|
获取一个文档
现在获取刚才索引的文档:
1 2 3 4 5 6 7 8
| $_params = [ 'index' => 'test_index_1', 'type' => 'test_type_1', 'id' => 'test_id_1', ];
$_response = $client->get($_params); dd($_response);
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| array:8 [ "_index" => "test_index_1" "_type" => "test_type_1" "_id" => "test_id_1" "_version" => 2 "_seq_no" => 1 "_primary_term" => 6 "found" => true "_source" => array:3 [ "testField" => "abc" "testField2" => "caoxl" "testField3" => "caoxl_123" ] ]
|
搜索一个文档
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| $_params_1 = [ 'index' => 'test_index_1', 'type' => 'test_type_1', 'body' => [ 'query' => [ 'match' => [ 'testField' => 'abc' ] ] ], ];
$_response_1 = $client->search($_params_1); dd($_response_1);
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
| array:4 [ "took" => 25 "timed_out" => false "_shards" => array:4 [ "total" => 2 "successful" => 2 "skipped" => 0 "failed" => 0 ] "hits" => array:3 [ "total" => array:2 [ "value" => 1 "relation" => "eq" ] "max_score" => 0.2876821 "hits" => array:1 [ 0 => array:5 [ "_index" => "test_index_1" "_type" => "test_type_1" "_id" => "test_id_1" "_score" => 0.2876821 "_source" => array:3 [ "testField" => "abc" "testField2" => "caoxl" "testField3" => "caoxl_123" ] ] ] ] ]
|
参数说明
match_all
- 表示取出所有documents,在与filter结合使用时,会经常使用match_all。
match
- 一般在全文检索时使用,首先利用analyzer 对具体查询字符串进行分析,然后进行查询;
term
- 用于精确查找,可用于数值、date、boolean值或not_analyzed string,当使用term时,不会对查询字符串进行分析,进行的是精确查找。
terms
- terms 和 term 类似,但是,terms 里可以指定多个值,只要doc满足terms 里的任意值,就是满足查询条件的。与term 相同,terms 也是用于精确查找。
range
- 类比数据库查找的范围查找
exists
- 用于查找字段含有一个或多个值的document
missing
- 用于查找某字段不存在值的document,可类比关系数据库里的 is not null (exists) 和 is null (missing).
bool
- ElasticSearch 使用bool 子句来将各种子查询关联起来,组成布尔表达式,bool 子句可以随意组合、嵌套。
删除一个文档
删除document。ElasticSearch 会标记删除document,然后,在Lucene 底层进行merge时,会删除标记删除的document。
1 2 3 4 5 6 7 8
| $_params_2 = [ 'index' => 'test_index_1', 'type' => 'test_type_1', 'id' => 'test_id_1' ];
$_response_2 = $client->delete($_params_2); dd($_response_2);
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| array:8 [ "_index" => "test_index_1" "_type" => "test_type_1" "_id" => "test_id_1" "_version" => 3 "result" => "deleted" "_shards" => array:3 [ "total" => 1 "successful" => 1 "failed" => 0 ] "_seq_no" => 2 "_primary_term" => 6 ]
|
删除索引
1 2 3 4 5 6 7
| $delete_index = [ 'index' => 'test_index_1' ];
$response_delete = $client->indices()->delete($delete_index); dd($response_delete);
|
1 2 3
| array:1 [ "acknowledged" => true ]
|
创建索引
1 2 3 4 5 6 7 8 9 10 11 12 13
| $create_index = [ 'index' => 'test_index_1', 'body' => [ 'settings' => [ 'number_of_shards' => 2, 'number_of_replicas' => 0 ] ], ];
$_response_create = $client->indices()->create($create_index); dd($_response_create);
|
1 2 3 4 5
| array:3 [ "acknowledged" => true "shards_acknowledged" => true "index" => "test_index_1" ]
|
管理索引
设置API
1 2 3 4 5 6 7 8 9 10 11 12
| $set_api = [ 'index' => 'test_index_1', 'body' => [ 'settings' => [ 'number_of_replicas' => 0, 'refresh_interval' => -1, ], ], ];
$response_api = $client->indices()->putSettings($set_api); dd($response_api);
|
1 2 3
| array:1 [ "acknowledged" => true ]
|
获取设置API
1 2 3 4 5 6 7 8 9
| $get_api = ['index' => 'test_index_1'];
$get_api_2 = [ 'index' => ['test_index_1', 'index_help'] ];
$response_api = $client->indices()->getSettings($get_api_2); dd($response_api);
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
| array:2 [ "index_help" => array:1 [ "settings" => array:1 [ "index" => array:6 [ "creation_date" => "1562135548539" "number_of_shards" => "1" "number_of_replicas" => "1" "uuid" => "eZ_xGOcOTCmqvozVLMUNDA" "version" => array:1 [ "created" => "7010199" ] "provided_name" => "index_help" ] ] ] "test_index_1" => array:1 [ "settings" => array:1 [ "index" => array:7 [ "refresh_interval" => "-1" "number_of_shards" => "2" "provided_name" => "test_index_1" "creation_date" => "1562643744343" "number_of_replicas" => "0" "uuid" => "HUyT8pFISH6n1yf9cSgbrQ" "version" => array:1 [ "created" => "7010199" ] ] ] ] ]
|
映射API
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
| $mapping_params = [ 'index' => 'test_index_1', 'type' => 'test_type_1', 'body' => [ 'test_type_1' => [ '_source' => [ 'enabled' => true ], 'properties' => [ 'testField' => [ 'type' => 'text', ], 'testField2' => [ 'type' => 'text' ], 'testField3' => [ 'type' => 'text' ] ] ] ], 'custom' => [ 'include_type_name' => true ] ];
$response_map = $client->indices()->putMapping($mapping_params); dd($response_map);
|
1 2 3
| array:1 [ "acknowledged" => true ]
|
获取映射API
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
| $response_map = $client->indices()->getMapping();
$map_index = ['index' => 'test_index_1']; $response_map = $client->indices()->getMapping($map_index);
$map_type = [ 'type' => 'test_type_1', 'custom' => [ 'include_type_name' => true ] ]; $response_map = $client->indices()->getMapping($map_type);
$map_params = [ 'index' => 'test_index_1', 'type' => 'test_type_1', 'custom' => [ 'include_type_name' => true ] ]; $response_map = $client->indices()->getMapping($map_params);
$map_2index = [ 'index' => ['test_index_1', 'index_help', 'index_app'] ];
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
| array:1 [ "test_index_1" => array:1 [ "mappings" => array:1 [ "test_type_1" => array:1 [ "properties" => array:3 [ "testField" => array:2 [ "type" => "text" "fields" => array:1 [ "keyword" => array:2 [ "type" => "keyword" "ignore_above" => 256 ] ] ] "testField2" => array:2 [ "type" => "text" "fields" => array:1 [ "keyword" => array:2 [ "type" => "keyword" "ignore_above" => 256 ] ] ] "testField3" => array:2 [ "type" => "text" "fields" => array:1 [ "keyword" => array:2 [ "type" => "keyword" "ignore_above" => 256 ] ] ] ] ] ] ] ]
|
执行搜索
Match 查询
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| $search_params = [ 'index' => 'test_index_1', 'type' => 'test_type_1', 'body' => [ 'query' => [ 'match' => [ 'testField' => 'caoxl', ], ], ], ];
$response_search = $client->search($search_params); dd($response_search);
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
| array:4 [ "took" => 79 "timed_out" => false "_shards" => array:4 [ "total" => 2 "successful" => 2 "skipped" => 0 "failed" => 0 ] "hits" => array:3 [ "total" => array:2 [ "value" => 1 "relation" => "eq" ] "max_score" => 0.2876821 "hits" => array:1 [ 0 => array:5 [ "_index" => "test_index_1" "_type" => "test_type_1" "_id" => "test_id_1" "_score" => 0.2876821 "_source" => array:3 [ "testField" => "caoxl" "testField2" => "caoxl2" "testField3" => "caoxl3" ] ] ] ] ]
|
Bool查询
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
| $search_params = [ 'index' => 'test_index_1', 'type' => 'test_type_1', 'body' => [ 'query' => [ 'bool' => [ 'must' => [ ['match' => ['testField' => 'caoxl']], ['match' => ['testField2' => 'caoxl2']], ], ], ], ], ];
$response_search = $client->search($search_params); dd($response_search);
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
| array:4 [ "took" => 191 "timed_out" => false "_shards" => array:4 [ "total" => 2 "successful" => 2 "skipped" => 0 "failed" => 0 ] "hits" => array:3 [ "total" => array:2 [ "value" => 1 "relation" => "eq" ] "max_score" => 0.5753642 "hits" => array:1 [ 0 => array:5 [ "_index" => "test_index_1" "_type" => "test_type_1" "_id" => "test_id_1" "_score" => 0.5753642 "_source" => array:3 [ "testField" => "caoxl" "testField2" => "caoxl2" "testField3" => "caoxl3" ] ] ] ] ]
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
| $search_params = [ 'index' => 'test_index_1', 'type' => 'test_type_1', 'body' => [ 'query' => [ 'bool' => [ 'filter' => [ 'term' => ['testField' => 'caoxl'] ], 'should' => [ 'match' => ['testField2' => 'caoxl2'] ], ] ], ], ];
$response_search = $client->search($search_params); dd($response_search);
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
| array:4 [ "took" => 1 "timed_out" => false "_shards" => array:4 [ "total" => 2 "successful" => 2 "skipped" => 0 "failed" => 0 ] "hits" => array:3 [ "total" => array:2 [ "value" => 1 "relation" => "eq" ] "max_score" => 0.2876821 "hits" => array:1 [ 0 => array:5 [ "_index" => "test_index_1" "_type" => "test_type_1" "_id" => "test_id_1" "_score" => 0.2876821 "_source" => array:3 [ "testField" => "caoxl" "testField2" => "caoxl2" "testField3" => "caoxl3" ] ] ] ] ]
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
| $search_params = [ 'scroll' => '30s', 'size' => 50, 'index' => 'test_index_1', 'body' => [ 'query' => [ 'match_all' => new \stdClass() ], ], ];
$response_search = $client->search($search_params); dd($response_search);
while(isset($response_search['hits']['hits']) && count($response_search['hits']['hits']) > 0 ) { $scroll_id = $response_search['_scroll_id'];
$response_search = $client->scroll([ 'scroll_id' => $scroll_id, 'scroll' => '30s' ]); }
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
| array:5 [ "_scroll_id" => "DnF1ZXJ5VGhlbkZldGNoAgAAAAAAAAAVFlJxaEFQVFFEVDd5RVVqUFVvRTB4V1EAAAAAAAAAFhZScWhBUFRRRFQ3eUVValBVb0UweFdR" "took" => 39 "timed_out" => false "_shards" => array:4 [ "total" => 2 "successful" => 2 "skipped" => 0 "failed" => 0 ] "hits" => array:3 [ "total" => array:2 [ "value" => 1 "relation" => "eq" ] "max_score" => 1.0 "hits" => array:1 [ 0 => array:5 [ "_index" => "test_index_1" "_type" => "test_type_1" "_id" => "test_id_1" "_score" => 1.0 "_source" => array:3 [ "testField" => "caoxl" "testField2" => "caoxl2" "testField3" => "caoxl3" ] ] ] ] ]
|
Go-MySQL-Elasticsearch
参考: 使用 go-mysql-elasticsearch 把 MySQL 中的业务日志导入 Elasticsearch
注意: 需要把binlog模式修改成ROW
其他操作
1 2 3 4 5 6 7 8 9 10 11 12 13
| $alias_params['body'] = [ 'actions' => [ [ 'add' => [ 'index' => 'test_index_1', 'alias' => 'alias_index_1' ] ] ], ];
$response_alias = $client->indices()->updateAliases($alias_params); dd($response_alias);
|
1 2 3
| array:1 [ "acknowledged" => true ]
|
常见问题
Types cannot be provided in put mapping requests, unless the include_type_name parameter is set to true
参考上面映射API&&获取映射API中加入如下参数
1 2 3
| 'custom' => [ 'include_type_name' => true ]
|
参考