ElasticSearchノート整理(三):Java APIはESと中国語の単語を使用します.
pom.xml
mavenプロジェクトを使ってES Java APIを構築するテスト項目は、以下のように利用されています.
org.elasticsearch
elasticsearch
2.3.0
com.fasterxml.jackson.core
jackson-databind
2.7.0
org.dom4j
dom4j
2.0.0
org.projectlombok
lombok
1.16.10
ES APIの基本的な添削と調査junnitを使ってテストします.グローバル変数とsetUp関数は以下の通りです.
private TransportClient client;
private String index = "bigdata"; // "bigdata"
private String type = "product"; // "product"
@Before
public void setup() throws UnknownHostException {
// ES , ,
Settings settings = Settings.builder().put("cluster.name", "bigdata-08-28").build();
client = TransportClient.builder().settings(settings).build();
TransportAddress ta1 = new InetSocketTransportAddress(InetAddress.getByName("uplooking01"), 9300);
TransportAddress ta2 = new InetSocketTransportAddress(InetAddress.getByName("uplooking02"), 9300);
TransportAddress ta3 = new InetSocketTransportAddress(InetAddress.getByName("uplooking03"), 9300);
client.addTransportAddresses(ta1, ta2, ta3);
/*settings = client.settings();
Map asMap = settings.getAsMap();
for(Map.Entry setting : asMap.entrySet()) {
System.out.println(setting.getKey() + "::" + setting.getValue());
}*/
}
インデックス追加:JSON方式/**
* : es 4
* 1.JSON
* 2.Map
* 3.Java Bean
* 4.XContentBuilder
*
* 1.JSON
*/
@Test
public void testAddJSON() {
String source = "{\"name\":\"sqoop\", \"author\": \"apache\", \"version\": \"1.4.6\"}";
IndexResponse response = client.prepareIndex(index, type, "4").setSource(source).get();
System.out.println(response.isCreated());
}
索引の追加:Map方式/**
* :
* 2.Map
*/
@Test
public void testAddMap() {
Map source = new HashMap();
source.put("name", "flume");
source.put("author", "Cloudera");
source.put("version", "1.8.0");
IndexResponse response = client.prepareIndex(index, type, "5").setSource(source).get();
System.out.println(response.isCreated());
}
インデックス追加:Java Bean方式/**
* :
* 3.Java Bean
*
* json , :
* The number of object passed must be even but was [1]
*/
@Test
public void testAddObj() throws JsonProcessingException {
Product product = new Product("kafka", "linkedIn", "0.10.0.1", "kafka.apache.org");
ObjectMapper objectMapper = new ObjectMapper();
String json = objectMapper.writeValueAsString(product);
System.out.println(json);
IndexResponse response = client.prepareIndex(index, type, "6").setSource(json).get();
System.out.println(response.isCreated());
}
インデックス追加:XContentBuilder方式/**
* :
* 4.XContentBuilder
*/
@Test
public void testAddXContentBuilder() throws IOException {
XContentBuilder source = XContentFactory.jsonBuilder();
source.startObject()
.field("name", "redis")
.field("author", "redis")
.field("version", "3.2.0")
.field("url", "redis.cn")
.endObject();
IndexResponse response = client.prepareIndex(index, type, "7").setSource(source).get();
System.out.println(response.isCreated());
}
インデックスクエリ/**
*
*/
@Test
public void testGet() {
GetResponse response = client.prepareGet(index, type, "6").get();
Map map = response.getSource();
/*for(Map.Entry me : map.entrySet()) {
System.out.println(me.getKey() + "=" + me.getValue());
}*/
// lambda ,jdk 1.8
map.forEach((k, v) -> System.out.println(k + "=" + v));
// map.keySet().forEach(key -> System.out.println(key + "xxx"));
}
インデックス更新/**
* curl
* curl -XPOST http://uplooking01:9200/bigdata/product/AWA184kojrSrzszxL-Zs/_update -d' {"doc":{"name":"sqoop", "author":"apache"}}'
*
* , prepareUpdate, prepareIndex
*/
@Test
public void testUpdate() throws Exception {
/*String source = "{\"doc\":{\"url\": \"http://flume.apache.org\"}}";
UpdateResponse response = client.prepareUpdate(index, type, "4").setSource(source.getBytes()).get();*/
//
String source = "{\"url\": \"http://flume.apache.org\"}";
UpdateResponse response = client.prepareUpdate(index, type, "4").setDoc(source.getBytes()).get();
System.out.println(response.getVersion());
}
インデックスの削除/**
*
*/
@Test
public void testDelete() {
DeleteResponse response = client.prepareDelete(index, type, "5").get();
System.out.println(response.getVersion());
}
一括操作/**
*
*/
@Test
public void testBulk() {
IndexRequestBuilder indexRequestBuilder = client.prepareIndex(index, type, "8")
.setSource("{\"name\":\"elasticsearch\", \"url\":\"http://www.elastic.co\"}");
UpdateRequestBuilder updateRequestBuilder = client.prepareUpdate(index, type, "1").setDoc("{\"url\":\"http://hadoop.apache.org\"}");
BulkRequestBuilder bulk = client.prepareBulk();
BulkResponse bulkResponse = bulk.add(indexRequestBuilder).add(updateRequestBuilder).get();
Iterator it = bulkResponse.iterator();
while(it.hasNext()) {
BulkItemResponse response = it.next();
System.out.println(response.getId() + "" + response.getVersion());
}
}
インデックスレコードの取得数/**
*
*/
@Test
public void testCount() {
CountResponse response = client.prepareCount(index).get();
System.out.println(" :" + response.getCount());
}
ES APIのシニアクエリjunnitに基づいてテストを行います.setUp関数とshowResult関数は以下の通りです.
グローバル変数とsetUp:
private TransportClient client;
private String index = "bigdata";
private String type = "product";
private String[] indics = {"bigdata", "bank"};
@Before
public void setUp() throws UnknownHostException {
Settings settings = Settings.builder().put("cluster.name", "bigdata-08-28").build();
client = TransportClient.builder().settings(settings).build();
TransportAddress ta1 = new InetSocketTransportAddress(InetAddress.getByName("uplooking01"), 9300);
TransportAddress ta2 = new InetSocketTransportAddress(InetAddress.getByName("uplooking02"), 9300);
TransportAddress ta3 = new InetSocketTransportAddress(InetAddress.getByName("uplooking03"), 9300);
client.addTransportAddresses(ta1, ta2, ta3);
}
show Result:/**
*
* @param response
*/
private void showResult(SearchResponse response) {
SearchHits searchHits = response.getHits();
float maxScore = searchHits.getMaxScore(); //
System.out.println("maxScore: " + maxScore);
long totalHits = searchHits.getTotalHits(); //
System.out.println("totalHits: " + totalHits);
SearchHit[] hits = searchHits.getHits(); //
System.out.println(" :" + hits.length);
for (SearchHit hit : hits) {
long version = hit.version();
String id = hit.getId();
String index = hit.getIndex();
String type = hit.getType();
float score = hit.getScore();
System.out.println("===================================================");
String source = hit.getSourceAsString();
System.out.println("version: " + version);
System.out.println("id: " + id);
System.out.println("index: " + index);
System.out.println("type: " + type);
System.out.println("score: " + score);
System.out.println("source: " + source);
}
}
ESクエリの種類説明クエリーの種類は以下の4つです.
query and fetch( )( N )
query then fetch( )
DFS query and fetch
DFS query then fetch( 。)
APIのコメントは以下の通りです./**
* Same as {@link #QUERY_THEN_FETCH}, except for an initial scatter phase which goes and computes the distributed
* term frequencies for more accurate scoring.
*/
DFS_QUERY_THEN_FETCH((byte) 0),
/**
* The query is executed against all shards, but only enough information is returned (not the document content).
* The results are then sorted and ranked, and based on it, only the relevant shards are asked for the actual
* document content. The return number of hits is exactly as specified in size, since they are the only ones that
* are fetched. This is very handy when the index has a lot of shards (not replicas, shard id groups).
*/
QUERY_THEN_FETCH((byte) 1),
/**
* Same as {@link #QUERY_AND_FETCH}, except for an initial scatter phase which goes and computes the distributed
* term frequencies for more accurate scoring.
*/
DFS_QUERY_AND_FETCH((byte) 2),
/**
* The most naive (and possibly fastest) implementation is to simply execute the query on all relevant shards
* and return the results. Each shard returns size results. Since each shard already returns size hits, this
* type actually returns size times number of shards results back to the caller.
*/
QUERY_AND_FETCH((byte) 3),
DFSについての説明:DFS ?
D Distributed,F frequency , S Scatter ,
。
?
es , ,
, , 。
DFS_QUERY_THEN_FETCH , , , 3 。 , DFS ,
。
まとめ: , QUERY_AND_FETCH ,DFS_QUERY_THEN_FETCH 。 ,DFS
DFS 。
正確なクエリー/**
* 1.
* termQuery
* term
*/
@Test
public void testSearch1() {
SearchRequestBuilder searchQuery = client.prepareSearch(indics) // prepareSearch() ,
.setSearchType(SearchType.DEFAULT) // , QUERY_AND_FETCH QUERY_THEN_FETCH DFS_QUERY_AND_FETCH DFS_QUERY_THEN_FETCH
.setQuery(QueryBuilders.termQuery("author", "apache"))// query, ,termQuery :name doc field,value
;
// ,
SearchResponse response = searchQuery.get();
showResult(response);
}
あいまい検索/**
* 2.
* prefixQuery
*/
@Test
public void testSearch2() {
SearchResponse response = client.prepareSearch(indics).setSearchType(SearchType.QUERY_THEN_FETCH)
.setQuery(QueryBuilders.prefixQuery("name", "h"))
.get();
showResult(response);
}
ページ別に照会する/**
* 3.
* bank
* (25, 35]
*
* :
* ,
* 10
*
* 4
* setFrom(30=(4-1)*size)
* setSize(10)
* N :(N - 1) * pageSize
*/
@Test
public void testSearch3() {
// QUERY_THEN_FETCH QUERY_AND_FETCH , 10 , 50 (5 )
SearchResponse response = client.prepareSearch(indics).setSearchType(SearchType.DFS_QUERY_THEN_FETCH)
.setQuery(QueryBuilders.rangeQuery("age").gt(25).lte(35))
// setFrom setSize
.setFrom(0)
.setSize(5)
.get();
showResult(response);
}
クエリーをハイライト表示/**
* 4.
* ,
* apache, author , url, name
* author or url --->booleanQuery should
* and --->booleanQuery must
* not --->booleanQuery mustNot
* match , keyword , ,
*/
@Test
public void testSearch4() {
SearchResponse response = client.prepareSearch(indics).setSearchType(SearchType.DEFAULT)
// .setQuery(QueryBuilders.multiMatchQuery("apache", "author", "url"))
// .setQuery(QueryBuilders.regexpQuery("url", ".*apache.*"))
// .setQuery(QueryBuilders.termQuery("author", "apache"))
.setQuery(QueryBuilders.boolQuery()
.should(QueryBuilders.regexpQuery("url", ".*apache.*"))
.should(QueryBuilders.termQuery("author", "apache")))
// --->
.setHighlighterPreTags("")
.setHighlighterPostTags("")
//
.addHighlightedField("author")
.addHighlightedField("url")
.get();
SearchHits searchHits = response.getHits();
float maxScore = searchHits.getMaxScore(); //
System.out.println("maxScore: " + maxScore);
long totalHits = searchHits.getTotalHits(); //
System.out.println("totalHits: " + totalHits);
SearchHit[] hits = searchHits.getHits(); //
System.out.println(" :" + hits.length);
for(SearchHit hit : hits) {
System.out.println("========================================================");
Map highlightFields = hit.getHighlightFields();
for(Map.Entry me : highlightFields.entrySet()) {
System.out.println("--------------------------------------");
String key = me.getKey();
HighlightField highlightField = me.getValue();
String name = highlightField.getName();
System.out.println("key: " + key + ", name: " + name);
Text[] texts = highlightField.fragments();
String value = "";
for(Text text : texts) {
// System.out.println("text: " + text.toString());
value += text.toString();
}
System.out.println("value: " + value);
}
}
}
クエリーの並べ替え/**
* 5.
*
* balance( )
*/
@Test
public void testSearch5() {
// QUERY_THEN_FETCH QUERY_AND_FETCH , 10 , 50 (5 )
SearchResponse response = client.prepareSearch(indics).setSearchType(SearchType.DFS_QUERY_THEN_FETCH)
.setQuery(QueryBuilders.rangeQuery("age").gt(25).lte(35))
.addSort("balance", SortOrder.DESC)
// setFrom setSize
.setFrom(0)
.setSize(5)
.get();
showResult(response);
}
集約クエリ:平均を計算する/**
* 6. :
*/
@Test
public void testSearch6() {
indics = new String[]{"bank"};
// QUERY_THEN_FETCH QUERY_AND_FETCH , 10 , 50 (5 )
SearchResponse response = client.prepareSearch(indics).setSearchType(SearchType.DFS_QUERY_THEN_FETCH)
.setQuery(QueryBuilders.rangeQuery("age").gt(25).lte(35))
/*
select avg(age) as avg_name from person;
avg("balance")---> avg_name
*/
.addAggregation(AggregationBuilders.avg("avg_balance").field("balance"))
.addAggregation(AggregationBuilders.max("max").field("balance"))
.get();
// System.out.println(response);
/*
response Aggregations
"aggregations" : {
"max" : {
"value" : 49741.0
},
"avg_balance" : {
"value" : 25142.137373737372
}
}
aggregation :
{
"value" : 49741.0
}
*/
Aggregations aggregations = response.getAggregations();
List aggregationList = aggregations.asList();
for(Aggregation aggregation : aggregationList) {
System.out.println("========================================");
String name = aggregation.getName();
// Map map = aggregation.getMetaData();
System.out.println("name: " + name);
// System.out.println(map);
Object obj = aggregation.getProperty("value");
System.out.println(obj);
}
/*Aggregation avgBalance = aggregations.get("avg_balance");
Object obj = avgBalance.getProperty("value");
System.out.println(obj);*/
}
ES中国語分詞の集積IK分詞私達のデータが中国語を含む場合、中国語の分詞検索をサポートしたいと思いますが、ES自体がLuceneの分詞に依存して中国語に対してよくないです.ここで説明するIK中国語の分詞を使って、ESにまとめるステップは以下の通りです.
1) :
https://github.com/medcl/elasticsearch-analysis-ik
2) maven (mvn clean install -DskipTests)(package)
3) target/releases zip ES_HOME/plugins/analysis-ik ,
4) ik conf/ik ES_HOME/config
5) ES_HOME/config/elasticsearch.yml , index.analysis.analyzer.default.type: ik
( IK , )
6) es
7)
なお、データは新たに挿入され、ik分詞を使用して、中国語分詞IKを使用したいインデックスライブラリを再構築する必要がある.テストコードは以下の通りです.
package cn.xpleaf.bigdata.elasticsearch;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.highlight.HighlightField;
import org.elasticsearch.search.sort.SortOrder;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.List;
import java.util.Map;
/**
* Java API es
* Transport
*
* TransportClient
*
* prepareSearch
*
*/
public class ElasticSearchTest3 {
private TransportClient client;
private String index = "bigdata";
private String type = "product";
private String[] indics = {"chinese"};
@Before
public void setUp() throws UnknownHostException {
Settings settings = Settings.builder().put("cluster.name", "bigdata-08-28").build();
client = TransportClient.builder().settings(settings).build();
TransportAddress ta1 = new InetSocketTransportAddress(InetAddress.getByName("uplooking01"), 9300);
TransportAddress ta2 = new InetSocketTransportAddress(InetAddress.getByName("uplooking02"), 9300);
TransportAddress ta3 = new InetSocketTransportAddress(InetAddress.getByName("uplooking03"), 9300);
client.addTransportAddresses(ta1, ta2, ta3);
}
/**
*
* 1. " " ,
* 2. “ ” , 0
* 3. “ ” , 1
* 4. “ ” , 0
* :
* China is the greatest country~
* :
*
* ×××
*
*
*
*
*
*
*
* :
*
* IK
*
*/
@Test
public void testSearch1() {
SearchResponse response = client.prepareSearch(indics) // prepareSearch() ,
.setSearchType(SearchType.DEFAULT) // , QUERY_AND_FETCH QUERY_THEN_FETCH DFS_QUERY_AND_FETCH DFS_QUERY_THEN_FETCH
//.setQuery(QueryBuilders.prefixQuery("content", " "))// query, ,termQuery :name doc field,value
// .setQuery(QueryBuilders.regexpQuery("content", ".* .*"))
.setQuery(QueryBuilders.prefixQuery("content", " "))
.get();
showResult(response);
}
/**
*
* @param response
*/
private void showResult(SearchResponse response) {
SearchHits searchHits = response.getHits();
float maxScore = searchHits.getMaxScore(); //
System.out.println("maxScore: " + maxScore);
long totalHits = searchHits.getTotalHits(); //
System.out.println("totalHits: " + totalHits);
SearchHit[] hits = searchHits.getHits(); //
System.out.println(" :" + hits.length);
for (SearchHit hit : hits) {
long version = hit.version();
String id = hit.getId();
String index = hit.getIndex();
String type = hit.getType();
float score = hit.getScore();
System.out.println("===================================================");
String source = hit.getSourceAsString();
System.out.println("version: " + version);
System.out.println("id: " + id);
System.out.println("index: " + index);
System.out.println("type: " + type);
System.out.println("score: " + score);
System.out.println("source: " + source);
}
}
@After
public void cleanUp() {
client.close();
}
}
関連テストコードはGitHubにアップロードされました.https://github.com/xpleaf/elasticsearch-study