springboot整合elasticsearch实现类似于mysql的like查询

目录

一、ES分页查询常用方式

二、引入es的依赖

三、es配置文件

四、es工具类

五、分页查询示例


一、ES分页查询常用方式

1.from + size

from表示从第几行开始,size表示查询多少条文档。from默认为0,size默认为10,最灵活的分页方式。

2.scroll

不适合用来做实时搜索,而更适用于后台批处理任务,如日志导出。暂存搜索结果,每次传入scroll_id。scroll_id会占用大量资源,用于非实时处理大量数据的情况。可以通过scroll 初始化查询后,指定scroll_id 结合from+size的方式来实现分页。

3. search_after

根据上一页的最后一条数据来确定下一页的位置。需要使用一个唯一值的字段作为排序字段。不能自由跳到一个随机页面。要想实现翻页,需要每次记录最后查询的sort。实现方式比较麻烦,需要去记录上一次查询的排序字段的值用于下一页分页查询。

此处采用第一种from + size方式来实现类似于mysql的like模糊查询。

二、引入es的依赖

  <!-- https://mvnrepository.com/artifact/org.springframework.boot/spring-boot-starter-data-elasticsearch -->
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-data-elasticsearch</artifactId>
        </dependency>

        <!-- elasticsearch-rest-high-level-client -->
        <dependency>
            <groupId>org.elasticsearch.client</groupId>
            <artifactId>elasticsearch-rest-high-level-client</artifactId>
        </dependency>

三、es配置文件

package com.crcm.elasticsearch.config;

import org.apache.http.HttpHost;
import org.apache.http.client.config.RequestConfig;
import org.elasticsearch.client.Node;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

import java.util.Arrays;
import java.util.Objects;

/**
 *Elasticsearch配置类
 */
@Configuration
public class ElasticsearchConfig {
 
    private static final int ADDRESS_LENGTH = 2;
    @Value("${spring.elasticsearch.rest.uris}")
    private String ipAddress;
 
    @Bean
    public RestClientBuilder restClientBuilder() {

        HttpHost[] hosts = Arrays.stream(ipAddress.split(","))
                .map(this::makeHttpHost)
                .filter(Objects::nonNull)
                .toArray(HttpHost[]::new);
        RestClientBuilder restClientBuilder = RestClient.builder(hosts);
        // 设置一个监听器,每次节点出现故障时都会收到通知,以防需要采取措施,
        // 当启用故障嗅探时在内部使用。
        restClientBuilder.setFailureListener(new RestClient.FailureListener() {
            @Override
            public void onFailure(Node node) {
 
            }
        });
        // 设置允许修改默认请求配置的回调
        //(例如请求超时,身份验证或org.apache.http.client.config.RequestConfig.Builder允许设置的任何内容)。
        restClientBuilder.setRequestConfigCallback(new RestClientBuilder.RequestConfigCallback() {
            @Override
            public RequestConfig.Builder customizeRequestConfig(RequestConfig.Builder requestConfigBuilder) {
                return requestConfigBuilder
                        .setConnectionRequestTimeout(1000)
                        .setSocketTimeout(1000)
                        .setConnectTimeout(1000);
            }
        });
        return restClientBuilder;
    }
 
    @Bean(name = "highLevelClient")
    public RestHighLevelClient highLevelClient(@Autowired RestClientBuilder restClientBuilder) {
        return new RestHighLevelClient(restClientBuilder);
    }
 
    /**
     * 根据配置创建HttpHost
     * @param s
     * @return
     */
    private HttpHost makeHttpHost(String s) {
        String[] address = s.split(":");
        if (address.length == ADDRESS_LENGTH) {
            String ip = address[0];
            int port = Integer.parseInt(address[1]);
            return new HttpHost(ip, port);
        } else {
            return null;
        }
    }
 
}

四、es工具类

package com.crcm.elasticsearch.util;

import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.support.WriteRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.core.CountRequest;
import org.elasticsearch.client.core.CountResponse;
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.sort.FieldSortBuilder;
import org.elasticsearch.search.sort.ScoreSortBuilder;
import org.elasticsearch.search.sort.SortOrder;
import org.springframework.stereotype.Component;

import javax.annotation.Resource;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.TimeUnit;

/**
 * es RestHighLevelClient 工具类
 */
@Slf4j
@Component
public class EsUtil {

    private static final long TIME_OUT = 60;

    @Resource
    RestHighLevelClient highLevelClient;

    /**
     * 根据条件获取索引总数
     * @param index
     * @param query
     * @return
     */
    public long getTotalHits(String index,QueryBuilder query){
        CountRequest countRequest = new CountRequest();
        CountResponse response;
        // 绑定索引名
        countRequest.indices(index);
        countRequest.query(query);
        try {
            response = highLevelClient.count(countRequest, RequestOptions.DEFAULT);
            return response.getCount();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return 0;
    }

    /**
     * 根据条件分页查询指定索引数据
     * @param index ES索引
     * @param from 开始数
     * @param size 大小
     * @param orderName 排序字段名
     * @param sortOrder 排序方式
     * @param includes 返回字段
     * @param query 查询条件
     * @return
     */
    public Map<String, Object> searchPage(String index, int from, int size, String orderName, String sortOrder,
                                                 String[] includes, BoolQueryBuilder query) {
        Map<String, Object> resultMap = new HashMap<>();
        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
        sourceBuilder.query(query).from((from - 1) * size).size(size)
                .timeout(new TimeValue(TIME_OUT, TimeUnit.SECONDS));
        if (StringUtils.isNotBlank(orderName) && StringUtils.isNotBlank(sortOrder)) {
            sourceBuilder.sort(new FieldSortBuilder(orderName).order(SortOrder.fromString(sortOrder)));
        } else {
            sourceBuilder.sort(new ScoreSortBuilder().order(SortOrder.DESC));
        }
        // 返回列和排除列,排除列可设置为null
        if (!CollectionUtils.isEmpty(includes)) {
            sourceBuilder.fetchSource(includes,null);
        }
        SearchRequest searchRequest = new SearchRequest(index).source(sourceBuilder);
        try{
            log.info("dsl:" + searchRequest.toString());
            SearchResponse response = highLevelClient.search(searchRequest, RequestOptions.DEFAULT);
            List<String> data = new ArrayList<>();
            for(SearchHit hit : response.getHits().getHits()){
                String hitSourceAsString = hit.getSourceAsString();
                data.add(hitSourceAsString);
            }
            resultMap.put("records",data);
            //总数
            resultMap.put("total",getTotalHits(index,query));
            return resultMap;
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }
}

五、分页查询示例

1、在service中注入es工具类

@Resource
private EsUtil esUtil;



2、gatewayLog实体类

package com.crcm.admin.gateway.model.entity;

import com.baomidou.mybatisplus.annotation.*;
import com.crcm.cloud.start.data.mybatis.bean.BaseEntity;
import com.fasterxml.jackson.annotation.JsonFormat;
import lombok.Getter;
import lombok.Setter;
import lombok.ToString;

import java.util.Date;
/**
 * 网关日志记录对象 t_gateway_log
 * true
 */
@Setter
@Getter
@ToString
@TableName("t_gateway_log")
public class GatewayLog extends BaseEntity{
    private static final long serialVersionUID = 1L;

    /** id */
    @TableId(value = "id", type = IdType.ASSIGN_UUID)
    private String id;

    /** 访问实例 */
    private String targetServer;

    /** 请求方法 */
    private String requestMethod;

    /** 请求路径 */
    private String requestPath;

    /** 协议 */
    private String protocol;

    /** 请求体 */
    private String requestBody;

    /** 响应体 */
    private String responseData;

    /** 请求ip */
    private String ip;

    /** 请求时间 */
    @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
    private Date requestTime;


    /***
     * 是否删除
     */
    @TableLogic
    @TableField(fill = FieldFill.INSERT)
    
    private String startTime;
    private String endTime;

}

 3、构建查询,使用esutil工具类进行分页查询        

public PageT<GatewayLog> findTGatewayLogPage(PageT page, GatewayLog gatewayLog) {
        //构建查询条件
        //查询条件-多字段模糊查询
        BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
        if(StringUtils.isNotBlank(gatewayLog.getTargetServer())){
            boolQuery.must(QueryBuilders.wildcardQuery("targetServer.keyword", "*"+gatewayLog.getTargetServer()+"*" ));
        }
        if(StringUtils.isNotBlank(gatewayLog.getRequestPath())){
            boolQuery.must(QueryBuilders.wildcardQuery("requestPath.keyword", "*"+gatewayLog.getRequestPath() +"*"));
        }
        //查询条件-时间段
        if(StringUtils.isNotBlank(gatewayLog.getStartTime())&& StringUtils.isNotBlank(gatewayLog.getEndTime())){
            //注意页面传的时间格式要和es里存的时间格式一致
            boolQuery.must(QueryBuilders.rangeQuery("requestTime").from(gatewayLog.getStartTime()).to(gatewayLog.getEndTime()));
        }
        //查询并返回结果
        Map<String, Object> map = esUtil.searchPage("gateway_log", (int) page.getCurrent(), (int) page.getSize(), "requestTime",
                "DESC", null, boolQuery);
        PageT<GatewayLog> outPage = new PageT<>();
        outPage.setRecords(new ArrayList<>());
        List<GatewayLog> records = new ArrayList<>();
        ((List<String>)map.get("records")).forEach(record->{
            records.add(JSONObject.parseObject(record,GatewayLog.class));
        });
        outPage.setRecords(records);
        outPage.setTotal((Long) map.get("total"));
        return outPage;
    }

总结:

from+size的大小不能超过index.max_result_window这个参数的设置,默认为10,000。如果搜索from+size大于10000,需要设置index.max_result_window参数(最大为10亿)。数据量越大,越往后翻页,性能越低。搜索引擎深度分页问题,任何查询都不要返回特别大的结果,如google,百度的搜索分页不会超过100页。如果from+size方式满足不了需求,可以通过from+size 加上sort字段获取到sort值,再结合search_after实现达到max_result_window后的继续分页。


版权声明:本文为ctyyy12原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。