ElasticSearchノート整理(三):Java APIはESと中国語の単語を使用します.

23362 ワード

[TOC]
pom.xml
mavenプロジェクトを使ってES Java APIを構築するテスト項目は、以下のように利用されています.

    org.elasticsearch
    elasticsearch
    2.3.0


    com.fasterxml.jackson.core
    jackson-databind
    2.7.0


    org.dom4j
    dom4j
    2.0.0



    org.projectlombok
    lombok
    1.16.10
ES APIの基本的な添削と調査
junnitを使ってテストします.グローバル変数とsetUp関数は以下の通りです.
private TransportClient client;
private String index = "bigdata";   //         "bigdata"
private String type = "product";    //        "product"

@Before
public void setup() throws UnknownHostException {
    //     ES  ,          ,         
    Settings settings = Settings.builder().put("cluster.name", "bigdata-08-28").build();
    client = TransportClient.builder().settings(settings).build();
    TransportAddress ta1 = new InetSocketTransportAddress(InetAddress.getByName("uplooking01"), 9300);
    TransportAddress ta2 = new InetSocketTransportAddress(InetAddress.getByName("uplooking02"), 9300);
    TransportAddress ta3 = new InetSocketTransportAddress(InetAddress.getByName("uplooking03"), 9300);
    client.addTransportAddresses(ta1, ta2, ta3);
    /*settings = client.settings();
        Map asMap = settings.getAsMap();
        for(Map.Entry setting : asMap.entrySet()) {
            System.out.println(setting.getKey() + "::" + setting.getValue());
        }*/
}
インデックス追加:JSON方式
/**
     *   : es      4   
     * 1.JSON
     * 2.Map
     * 3.Java Bean
     * 4.XContentBuilder
     *
     * 1.JSON  
     */
@Test
public void testAddJSON() {
    String source = "{\"name\":\"sqoop\", \"author\": \"apache\", \"version\": \"1.4.6\"}";
    IndexResponse response = client.prepareIndex(index, type, "4").setSource(source).get();
    System.out.println(response.isCreated());
}
索引の追加:Map方式
/**
     *     :
     * 2.Map  
     */
@Test
public void testAddMap() {
    Map source = new HashMap();
    source.put("name", "flume");
    source.put("author", "Cloudera");
    source.put("version", "1.8.0");
    IndexResponse response = client.prepareIndex(index, type, "5").setSource(source).get();
    System.out.println(response.isCreated());
}
インデックス追加:Java Bean方式
/**
     *     :
     * 3.Java Bean  
     *
     *          json   ,        :
     * The number of object passed must be even but was [1]
     */
@Test
public void testAddObj() throws JsonProcessingException {
    Product product = new Product("kafka", "linkedIn", "0.10.0.1", "kafka.apache.org");
    ObjectMapper objectMapper = new ObjectMapper();
    String json = objectMapper.writeValueAsString(product);
    System.out.println(json);
    IndexResponse response = client.prepareIndex(index, type, "6").setSource(json).get();
    System.out.println(response.isCreated());
}
インデックス追加:XContentBuilder方式
/**
     *     :
     * 4.XContentBuilder  
     */
@Test
public void testAddXContentBuilder() throws IOException {
    XContentBuilder source = XContentFactory.jsonBuilder();
    source.startObject()
        .field("name", "redis")
        .field("author", "redis")
        .field("version", "3.2.0")
        .field("url", "redis.cn")
        .endObject();
    IndexResponse response = client.prepareIndex(index, type, "7").setSource(source).get();
    System.out.println(response.isCreated());
}
インデックスクエリ
/**
     *          
     */
@Test
public void testGet() {
    GetResponse response = client.prepareGet(index, type, "6").get();
    Map map = response.getSource();
    /*for(Map.Entry me : map.entrySet()) {
            System.out.println(me.getKey() + "=" + me.getValue());
        }*/
    // lambda   ,jdk 1.8  
    map.forEach((k, v) -> System.out.println(k + "=" + v));
    //        map.keySet().forEach(key -> System.out.println(key + "xxx"));
}
インデックス更新
/**
     *        curl       
     * curl -XPOST http://uplooking01:9200/bigdata/product/AWA184kojrSrzszxL-Zs/_update -d' {"doc":{"name":"sqoop", "author":"apache"}}'
     *
     *         ,   prepareUpdate,     prepareIndex
     */
@Test
public void testUpdate() throws Exception {
    /*String source = "{\"doc\":{\"url\": \"http://flume.apache.org\"}}";
        UpdateResponse response = client.prepareUpdate(index, type, "4").setSource(source.getBytes()).get();*/
    //              
    String source = "{\"url\": \"http://flume.apache.org\"}";
    UpdateResponse response = client.prepareUpdate(index, type, "4").setDoc(source.getBytes()).get();
    System.out.println(response.getVersion());
}
インデックスの削除
/**
     *     
     */
@Test
public void testDelete() {
    DeleteResponse response = client.prepareDelete(index, type, "5").get();
    System.out.println(response.getVersion());
}
一括操作
/**
     *     
     */
@Test
public void testBulk() {
    IndexRequestBuilder indexRequestBuilder = client.prepareIndex(index, type, "8")
        .setSource("{\"name\":\"elasticsearch\", \"url\":\"http://www.elastic.co\"}");
    UpdateRequestBuilder updateRequestBuilder = client.prepareUpdate(index, type, "1").setDoc("{\"url\":\"http://hadoop.apache.org\"}");
    BulkRequestBuilder bulk = client.prepareBulk();
    BulkResponse bulkResponse = bulk.add(indexRequestBuilder).add(updateRequestBuilder).get();
    Iterator it = bulkResponse.iterator();
    while(it.hasNext()) {
        BulkItemResponse response = it.next();
        System.out.println(response.getId() + "" + response.getVersion());
    }
}
インデックスレコードの取得数
/**
     *        
     */
@Test
public void testCount() {
    CountResponse response = client.prepareCount(index).get();
    System.out.println("     :" + response.getCount());
}
ES APIのシニアクエリ
junnitに基づいてテストを行います.setUp関数とshowResult関数は以下の通りです.
グローバル変数とsetUp:
private TransportClient client;
private String index = "bigdata";
private String type = "product";
private String[] indics = {"bigdata", "bank"};

@Before
public void setUp() throws UnknownHostException {
    Settings settings = Settings.builder().put("cluster.name", "bigdata-08-28").build();
    client = TransportClient.builder().settings(settings).build();
    TransportAddress ta1 = new InetSocketTransportAddress(InetAddress.getByName("uplooking01"), 9300);
    TransportAddress ta2 = new InetSocketTransportAddress(InetAddress.getByName("uplooking02"), 9300);
    TransportAddress ta3 = new InetSocketTransportAddress(InetAddress.getByName("uplooking03"), 9300);
    client.addTransportAddresses(ta1, ta2, ta3);
}
show Result:
/**
     *          
     * @param response
     */
private void showResult(SearchResponse response) {
    SearchHits searchHits = response.getHits();
    float maxScore = searchHits.getMaxScore();  //             
    System.out.println("maxScore: " + maxScore);
    long totalHits = searchHits.getTotalHits(); //         
    System.out.println("totalHits: " + totalHits);
    SearchHit[] hits = searchHits.getHits();    //     
    System.out.println("          :" + hits.length);
    for (SearchHit hit : hits) {
        long version = hit.version();
        String id = hit.getId();
        String index = hit.getIndex();
        String type = hit.getType();
        float score = hit.getScore();
        System.out.println("===================================================");
        String source = hit.getSourceAsString();
        System.out.println("version: " + version);
        System.out.println("id: " + id);
        System.out.println("index: " + index);
        System.out.println("type: " + type);
        System.out.println("score: " + score);
        System.out.println("source: " + source);
    }
}
ESクエリの種類説明
クエリーの種類は以下の4つです.
query and fetch(    )(  N    )
query then fetch(       )
DFS query and fetch
DFS query then fetch(              。)
APIのコメントは以下の通りです.
/**
     * Same as {@link #QUERY_THEN_FETCH}, except for an initial scatter phase which goes and computes the distributed
     * term frequencies for more accurate scoring.
     */
DFS_QUERY_THEN_FETCH((byte) 0),
/**
     * The query is executed against all shards, but only enough information is returned (not the document content).
     * The results are then sorted and ranked, and based on it, only the relevant shards are asked for the actual
     * document content. The return number of hits is exactly as specified in size, since they are the only ones that
     * are fetched. This is very handy when the index has a lot of shards (not replicas, shard id groups).
     */
QUERY_THEN_FETCH((byte) 1),
/**
     * Same as {@link #QUERY_AND_FETCH}, except for an initial scatter phase which goes and computes the distributed
     * term frequencies for more accurate scoring.
     */
DFS_QUERY_AND_FETCH((byte) 2),
/**
     * The most naive (and possibly fastest) implementation is to simply execute the query on all relevant shards
     * and return the results. Each shard returns size results. Since each shard already returns size hits, this
     * type actually returns size times number of shards results back to the caller.
     */
QUERY_AND_FETCH((byte) 3),
DFSについての説明:
DFS     ?
  D   Distributed,F   frequency   ,  S   Scatter   ,              
         。

              ?
 es           ,                   ,                  
 ,          ,                       。      
DFS_QUERY_THEN_FETCH      ,      ,      ,     3   。 ,  DFS  ,  
        。
まとめ:
    ,     QUERY_AND_FETCH    ,DFS_QUERY_THEN_FETCH    。         ,DFS 
  DFS      。
正確なクエリー
/**
     * 1.    
     * termQuery
     * term      
     */
@Test
public void testSearch1() {
    SearchRequestBuilder searchQuery = client.prepareSearch(indics)    //  prepareSearch()         ,              
        .setSearchType(SearchType.DEFAULT)  //       , QUERY_AND_FETCH  QUERY_THEN_FETCH  DFS_QUERY_AND_FETCH  DFS_QUERY_THEN_FETCH
        .setQuery(QueryBuilders.termQuery("author", "apache"))//      query,    ,termQuery     :name doc     field,value         
        ;
    //           ,      
    SearchResponse response = searchQuery.get();

    showResult(response);
}
あいまい検索
/**
     * 2.    
     * prefixQuery
     */
@Test
public void testSearch2() {
    SearchResponse response = client.prepareSearch(indics).setSearchType(SearchType.QUERY_THEN_FETCH)
        .setQuery(QueryBuilders.prefixQuery("name", "h"))
        .get();
    showResult(response);
}
ページ別に照会する
/**
     * 3.    
     *      bank 
     *    (25, 35]       
     *
     *     :
     *            ,       
     *              10   
     *
     *         4    
     *          setFrom(30=(4-1)*size)
     *          setSize(10)
     *          N      :(N - 1) * pageSize
     */
@Test
public void testSearch3() {
    //   QUERY_THEN_FETCH   QUERY_AND_FETCH         ,    10 ,   50 (5   )
    SearchResponse response = client.prepareSearch(indics).setSearchType(SearchType.DFS_QUERY_THEN_FETCH)
        .setQuery(QueryBuilders.rangeQuery("age").gt(25).lte(35))
        //   setFrom setSize            
        .setFrom(0)
        .setSize(5)
        .get();
    showResult(response);
}
クエリーをハイライト表示
/**
     * 4.      
     *     ,
     *    apache,   author  ,    url, name      
     *  author or url   --->booleanQuery  should  
     *         and   --->booleanQuery  must  
     *         not   --->booleanQuery  mustNot  
     *     match  ,          keyword           ,    ,    
     */
@Test
public void testSearch4() {
    SearchResponse response = client.prepareSearch(indics).setSearchType(SearchType.DEFAULT)
        //                .setQuery(QueryBuilders.multiMatchQuery("apache", "author", "url"))
        //                .setQuery(QueryBuilders.regexpQuery("url", ".*apache.*"))
        //                .setQuery(QueryBuilders.termQuery("author", "apache"))
        .setQuery(QueryBuilders.boolQuery()
                  .should(QueryBuilders.regexpQuery("url", ".*apache.*"))
                  .should(QueryBuilders.termQuery("author", "apache")))
        //       --->              
        .setHighlighterPreTags("")
        .setHighlighterPostTags("")
        //           
        .addHighlightedField("author")
        .addHighlightedField("url")
        .get();
    SearchHits searchHits = response.getHits();
    float maxScore = searchHits.getMaxScore();  //             
    System.out.println("maxScore: " + maxScore);
    long totalHits = searchHits.getTotalHits(); //         
    System.out.println("totalHits: " + totalHits);
    SearchHit[] hits = searchHits.getHits();    //     
    System.out.println("          :" + hits.length);
    for(SearchHit hit : hits) {
        System.out.println("========================================================");
        Map highlightFields = hit.getHighlightFields();
        for(Map.Entry me : highlightFields.entrySet()) {
            System.out.println("--------------------------------------");
            String key = me.getKey();
            HighlightField highlightField = me.getValue();
            String name = highlightField.getName();
            System.out.println("key: " + key + ", name: " + name);
            Text[] texts = highlightField.fragments();
            String value = "";
            for(Text text : texts) {
                // System.out.println("text: " + text.toString());
                value += text.toString();
            }
            System.out.println("value: " + value);
        }
    }
}
クエリーの並べ替え
/**
     * 5.    
     *         
     *  balance(  )    
     */
@Test
public void testSearch5() {
    //   QUERY_THEN_FETCH   QUERY_AND_FETCH         ,    10 ,   50 (5   )
    SearchResponse response = client.prepareSearch(indics).setSearchType(SearchType.DFS_QUERY_THEN_FETCH)
        .setQuery(QueryBuilders.rangeQuery("age").gt(25).lte(35))
        .addSort("balance", SortOrder.DESC)
        //   setFrom setSize            
        .setFrom(0)
        .setSize(5)
        .get();
    showResult(response);
}
集約クエリ:平均を計算する
/**
     * 6.    :     
     */
@Test
public void testSearch6() {
    indics = new String[]{"bank"};
    //   QUERY_THEN_FETCH   QUERY_AND_FETCH         ,    10 ,   50 (5   )
    SearchResponse response = client.prepareSearch(indics).setSearchType(SearchType.DFS_QUERY_THEN_FETCH)
        .setQuery(QueryBuilders.rangeQuery("age").gt(25).lte(35))
        /*
                    select avg(age) as avg_name from person;
                         avg("balance")--->      avg_name    
                 */
        .addAggregation(AggregationBuilders.avg("avg_balance").field("balance"))
        .addAggregation(AggregationBuilders.max("max").field("balance"))
        .get();
    //        System.out.println(response);
    /*
            response    Aggregations
                "aggregations" : {
                    "max" : {
                      "value" : 49741.0
                    },
                    "avg_balance" : {
                      "value" : 25142.137373737372
                    }
                  }
                     aggregation :
                  {
                      "value" : 49741.0
                    }
         */
    Aggregations aggregations = response.getAggregations();
    List aggregationList = aggregations.asList();
    for(Aggregation aggregation : aggregationList) {
        System.out.println("========================================");
        String name = aggregation.getName();
        // Map map = aggregation.getMetaData();
        System.out.println("name: " + name);
        // System.out.println(map);
        Object obj = aggregation.getProperty("value");
        System.out.println(obj);
    }
    /*Aggregation avgBalance = aggregations.get("avg_balance");
        Object obj = avgBalance.getProperty("value");
        System.out.println(obj);*/
}
ES中国語分詞の集積IK分詞
私達のデータが中国語を含む場合、中国語の分詞検索をサポートしたいと思いますが、ES自体がLuceneの分詞に依存して中国語に対してよくないです.ここで説明するIK中国語の分詞を使って、ESにまとめるステップは以下の通りです.
  1)    :
    https://github.com/medcl/elasticsearch-analysis-ik
  2)  maven        (mvn clean install -DskipTests)(package)
  3)     target/releases  zip        ES_HOME/plugins/analysis-ik    ,    
  4)    ik    conf/ik     ES_HOME/config 
  5)  ES_HOME/config/elasticsearch.yml  ,  index.analysis.analyzer.default.type: ik
  ( IK        ,       )
  6)  es  
  7)      
なお、データは新たに挿入され、ik分詞を使用して、中国語分詞IKを使用したいインデックスライブラリを再構築する必要がある.
テストコードは以下の通りです.
package cn.xpleaf.bigdata.elasticsearch;

import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.highlight.HighlightField;
import org.elasticsearch.search.sort.SortOrder;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.List;
import java.util.Map;

/**
 *   Java API   es  
 * Transport
 *        
 *              TransportClient
 * 

* prepareSearch * */ public class ElasticSearchTest3 { private TransportClient client; private String index = "bigdata"; private String type = "product"; private String[] indics = {"chinese"}; @Before public void setUp() throws UnknownHostException { Settings settings = Settings.builder().put("cluster.name", "bigdata-08-28").build(); client = TransportClient.builder().settings(settings).build(); TransportAddress ta1 = new InetSocketTransportAddress(InetAddress.getByName("uplooking01"), 9300); TransportAddress ta2 = new InetSocketTransportAddress(InetAddress.getByName("uplooking02"), 9300); TransportAddress ta3 = new InetSocketTransportAddress(InetAddress.getByName("uplooking03"), 9300); client.addTransportAddresses(ta1, ta2, ta3); } /** * * 1. " " , * 2. “ ” , 0 * 3. “ ” , 1 * 4. “ ” , 0 * : * China is the greatest country~ * : * * ××× * * * * * * * * : * * IK * */ @Test public void testSearch1() { SearchResponse response = client.prepareSearch(indics) // prepareSearch() , .setSearchType(SearchType.DEFAULT) // , QUERY_AND_FETCH QUERY_THEN_FETCH DFS_QUERY_AND_FETCH DFS_QUERY_THEN_FETCH //.setQuery(QueryBuilders.prefixQuery("content", " "))// query, ,termQuery :name doc field,value // .setQuery(QueryBuilders.regexpQuery("content", ".* .*")) .setQuery(QueryBuilders.prefixQuery("content", " ")) .get(); showResult(response); } /** * * @param response */ private void showResult(SearchResponse response) { SearchHits searchHits = response.getHits(); float maxScore = searchHits.getMaxScore(); // System.out.println("maxScore: " + maxScore); long totalHits = searchHits.getTotalHits(); // System.out.println("totalHits: " + totalHits); SearchHit[] hits = searchHits.getHits(); // System.out.println(" :" + hits.length); for (SearchHit hit : hits) { long version = hit.version(); String id = hit.getId(); String index = hit.getIndex(); String type = hit.getType(); float score = hit.getScore(); System.out.println("==================================================="); String source = hit.getSourceAsString(); System.out.println("version: " + version); System.out.println("id: " + id); System.out.println("index: " + index); System.out.println("type: " + type); System.out.println("score: " + score); System.out.println("source: " + source); } } @After public void cleanUp() { client.close(); } }

関連テストコードはGitHubにアップロードされました.https://github.com/xpleaf/elasticsearch-study