java使⽤ElasticSearch完成百万级数据查询附近的⼈功能
上⼀篇⽂章介绍了ElasticSearch使⽤Repository和ElasticSearchTemplate完成构建复杂查询条件,简单介绍了ElasticSearch 使⽤地理位置的功能。
这⼀篇我们来看⼀下使⽤ElasticSearch完成⼤数据量查询附近的⼈功能,搜索N⽶范围的内的数据。
准备环境
本机测试使⽤了ElasticSearch最新版5.5.1,SpringBoot1.5.4,spring-data-ElasticSearch2.1.4.
新建Springboot项⽬,勾选ElasticSearch和web。
pom⽂件如下
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="maven.apache/POM/4.0.0" xmlns:xsi="www.w3/2001/XMLSchema-instance"
xsi:schemaLocation="maven.apache/POM/4.0.0 maven.apache/xsd/maven-4.0.0.xs
d">
<modelVersion>4.0.0</modelVersion>
<groupId>com.tianyalei</groupId>
<artifactId>elasticarch</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>elasticarch</name>
<description>Demo project for Spring Boot</description>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>1.5.4.RELEASE</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
&porting.outputEncoding>UTF-8</porting.outputEncoding>
<java.version>1.8</java.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticarch</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.sun.jna</groupId>
<artifactId>jna</artifactId>
<version>3.0.9</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>
新建model类Person
package com.del;
import org.springframework.data.annotation.Id;
import org.springframework.data.elasticarch.annotations.Document;
import org.springframework.data.elasticarch.annotations.GeoPointField;
import java.io.Serializable;
/**
* model类世界毒枭排名
*/
@Document(indexName="elastic_arch_project",type="person",indexStoreType="fs",shards=5,replicas=1,refreshInterval="-1")
public class Person implements Serializable {
@Id
private int id;
private String name;
private String phone;
/**
* 地理位置经纬度
* lat纬度,lon经度 "40.715,-74.011"
* 如果⽤数组则相反[-73.983, 40.719]
*/
@GeoPointField
private String address;
public int getId() {
return id;
}
public void tId(int id) {
this.id = id;
}
public String getName() {
return name;
}
public void tName(String name) {
this.name = name;
}
public String getPhone() {
return phone;
}
public void tPhone(String phone) {
this.phone = phone;
}
public String getAddress() {
return address;
}
public void tAddress(String address) {
this.address = address;
}
}
我⽤address字段表⽰经纬度位置。注意,使⽤String[]和String分别来表⽰经纬度时是不同的,见注释。
import com.del.Person;
import org.springframework.pository.ElasticarchRepository;
public interface PersonRepository extends ElasticarchRepository<Person, Integer> {
}
看⼀下Service类,完成插⼊测试数据的功能,查询的功能我放在Controller⾥了,为了⽅便查看,正常是应该放在Service⾥package com.tianyalei.elasticarch.rvice;
import com.del.Person;
左书右息import com.pository.PersonRepository;
import org.springframework.ElasticarchTemplate;
import org.springframework.query.IndexQuery;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List;
@Service
public class PersonService {
@Autowired
PersonRepository personRepository;
@Autowired
ElasticarchTemplate elasticarchTemplate;
private static final String PERSON_INDEX_NAME = "elastic_arch_project";
private static final String PERSON_INDEX_TYPE = "person";
public Person add(Person person) {航空炸弹
return personRepository.save(person);
}
public void bulkIndex(List<Person> personList) {
int counter = 0;
try {
if (!elasticarchTemplate.indexExists(PERSON_INDEX_NAME)) {
}
List<IndexQuery> queries = new ArrayList<>();
for (Person person : personList) {小猫小狗
IndexQuery indexQuery = new IndexQuery();
indexQuery.Id() + "");
indexQuery.tObject(person);
indexQuery.tIndexName(PERSON_INDEX_NAME);
indexQuery.tType(PERSON_INDEX_TYPE);
/
/上⾯的那⼏步也可以使⽤IndexQueryBuilder来构建
//IndexQuery index = new IndexQueryBuilder().Id() + "").withObject(person).build();
queries.add(indexQuery);
if (counter % 500 == 0) {
elasticarchTemplate.bulkIndex(queries);
queries.clear();
System.out.println("bulkIndex counter : " + counter);
}
counter++;
}
if (queries.size() > 0) {
elasticarchTemplate.bulkIndex(queries);
}
System.out.println("bulkIndex completed.");
} catch (Exception e) {
System.out.println("IndexerService.bulkIndex e;" + e.getMessage());
throw e;
}
}
}
注意看bulkIndex⽅法,这个是批量插⼊数据⽤的,bulk也是ES官⽅推荐使⽤的批量插⼊数据的⽅法。这⾥是每逢500的整数倍就bulk插⼊⼀次。
package com.ller;
import com.del.Person;
import com.tianyalei.elasticarch.rvice.PersonService;
import unit.DistanceUnit;
import org.elasticarch.index.query.GeoDistanceQueryBuilder;
import org.elasticarch.index.query.QueryBuilders;
import org.elasticarch.arch.sort.GeoDistanceSortBuilder;
import org.elasticarch.arch.sort.SortBuilders;
import org.elasticarch.arch.sort.SortOrder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import org.springframework.ElasticarchTemplate;
import org.springframework.query.NativeSearchQueryBuilder;
import org.springframework.query.SearchQuery;
import org.springframework.web.bind.annotation.GetMapping;
DecimalFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
@RestController
public class PersonController {
@Autowired
PersonService personService;
@Autowired
时间造句ElasticarchTemplate elasticarchTemplate;
@GetMapping("/add")
public Object add() {
double lat = 39.929986;
double lon = 116.395645;
风尘仆仆什么意思List<Person> personList = new ArrayList<>(900000);
for (int i = 100000; i < 1000000; i++) {
double max = 0.00001;
小书法家double min = 0.000001;
Random random = new Random();
double s = Double() % (max - min + 1) + max;
DecimalFormat df = new DecimalFormat(">#0.000000");// System.out.println(s);
String lons = df.format(s + lon);
String lats = df.format(s + lat);
Double dlon = Double.valueOf(lons);
Double dlat = Double.valueOf(lats);
Person person = new Person();
person.tId(i);
person.tName("名字" + i);
person.tPhone("电话" + i);
person.tAddress(dlat + "," + dlon);
personList.add(person);
}
personService.bulkIndex(personList);爱情箴言
// SearchQuery archQuery = new NativeSearchQueryBuilder().withQuery(QueryBuilders.queryStringQuery("spring boot OR 书籍")).build(); // List<Article> articles = elas、ticarchTemplate.queryForList(、archQuery, Article.class);
// for (Article article : articles) {
// System.out.String());
// }
return "添加数据";
}
/**
*
geo_distance: 查找距离某个中⼼点距离在⼀定范围内的位置
geo_bounding_box: 查找某个长⽅形区域内的位置
geo_distance_range: 查找距离某个中⼼的距离在min和max之间的位置
geo_polygon: 查找位于多边形内的地点。
sort可以⽤来排序
*/
@GetMapping("/query")
public Object query() {
double lat = 39.929986;
double lon = 116.395645;
Long nowTime = System.currentTimeMillis();
//查询某经纬度100⽶范围内
GeoDistanceQueryBuilder builder = DistanceQuery("address").point(lat, lon)
.distance(100, DistanceUnit.METERS);
GeoDistanceSortBuilder sortBuilder = DistanceSort("address")
.point(lat, lon)
.unit(DistanceUnit.METERS)
.order(SortOrder.ASC);
Pageable pageable = new PageRequest(0, 50);
NativeSearchQueryBuilder builder1 = new NativeSearchQueryBuilder().withFilter(builder).withSort(sortBuilder).withPageable(pageable);
SearchQuery archQuery = builder1.build();
//queryForList默认是分页,⾛的是queryForPage,默认10个
List<Person> personList = elasticarchTemplate.queryForList(archQuery, Person.class);
System.out.println("耗时:" + (System.currentTimeMillis() - nowTime));
return personList;
}
看Controller类,在add⽅法中,我们插⼊90万条测试数据,随机产⽣不同的经纬度地址。
在查询⽅法中,我们构建了⼀个查询100⽶范围内、按照距离远近排序,分页每页50条的查询条件。如果不指明Pageable的话,ESTemplate的queryForList默认是10条,通过源码可以看到。
启动项⽬,先执⾏add,等待百万数据插⼊,⼤概⼏⼗秒。
然后执⾏查询,看⼀下结果。
第⼀次查询花费300多ms,再次查询后时间就⼤幅下降,到30ms左右,因为ES已经⾃动缓存到内存了。
可见,ES完成地理位置的查询还是⾮常快的。适⽤于查询附近的⼈、范围查询之类的功能。
后记,在后来的使⽤中,Elasticarch2.3版本时,按上⾯的写法出现了geo类型⽆法索引的情况,进⼊es的为String,⽽不是标注的geofiled。在此记录⼀下解决⽅法,将String类型修改为GeoPoint,且是
org.springframework.eo.GeoPoint包下的。然后需要在创建index时,显式调⽤⼀下mapping⽅法,才能正确的映射为geofield。
如下
if (!elasticarchTemplate.indexExists("abc")) {
elasticarchTemplate.putMapping(Person.class);
}
以上就是本⽂的全部内容,希望对⼤家的学习有所帮助,也希望⼤家多多⽀持。