Elasticsearch搜索引擎深度实战
Elasticsearch概述
Elasticsearch是一个基于Lucene的开源分布式搜索引擎,提供全文检索、结构化搜索、分析等功能。它是ELK(Elasticsearch、Logstash、Kibana)栈的核心组件。
核心概念
- Index:文档集合,类似数据库
- Type:文档类型(ES 7.x后已废弃)
- Document:JSON文档,类似记录
- Field:文档字段
- Mapping:字段类型定义
- Shard:索引分片,支持分布式
- Replica:分片副本,提高可用性
索引管理
创建索引
PUT /products
{
"settings": {
"number_of_shards": 3,
"number_of_replicas": 1,
"analysis": {
"analyzer": {
"my_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["lowercase", "stop"]
}
}
}
},
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "my_analyzer",
"fields": {
"keyword": { "type": "keyword" }
}
},
"description": { "type": "text" },
"price": { "type": "float" },
"category": { "type": "keyword" },
"tags": { "type": "keyword" },
"created_at": { "type": "date" }
}
}
}
文档操作
# 创建文档(自动生成ID)
POST /products/_doc
{
"title": "iPhone 15 Pro",
"description": "Latest Apple smartphone",
"price": 999.99,
"category": "electronics",
"tags": ["phone", "apple", "5G"]
}
# 创建文档(指定ID)
PUT /products/_doc/1
{
"title": "MacBook Pro",
"description": "Professional laptop",
"price": 2499.00,
"category": "electronics"
}
# 批量操作
POST /_bulk
{"index": {"_index": "products"}}
{"title": "Product 1", "price": 100}
{"index": {"_index": "products"}}
{"title": "Product 2", "price": 200}
{"delete": {"_index": "products", "_id": "123"}}
查询DSL
全文检索
# match查询
GET /products/_search
{
"query": {
"match": {
"title": "iPhone smartphone"
}
}
}
# match_phrase精确短语
GET /products/_search
{
"query": {
"match_phrase": {
"title": "iPhone 15"
}
}
}
# multi_match多字段搜索
GET /products/_search
{
"query": {
"multi_match": {
"query": "Apple phone",
"fields": ["title^2", "description"],
"type": "best_fields"
}
}
}
精确查询
# term精确匹配
GET /products/_search
{
"query": {
"term": {
"category": "electronics"
}
}
}
# range范围查询
GET /products/_search
{
"query": {
"range": {
"price": {
"gte": 100,
"lte": 1000
}
}
}
}
# bool复合查询
GET /products/_search
{
"query": {
"bool": {
"must": [
{ "match": { "title": "phone" } }
],
"filter": [
{ "term": { "category": "electronics" } },
{ "range": { "price": { "lte": 1500 } } }
],
"should": [
{ "term": { "tags": "5G" } }
],
"must_not": [
{ "term": { "tags": "discontinued" } }
]
}
}
}
聚合分析
# 统计聚合
GET /products/_search
{
"size": 0,
"aggs": {
"avg_price": { "avg": { "field": "price" } },
"max_price": { "max": { "field": "price" } },
"price_stats": { "stats": { "field": "price" } }
}
}
# 分组聚合
GET /products/_search
{
"size": 0,
"aggs": {
"categories": {
"terms": { "field": "category", "size": 10 },
"aggs": {
"avg_price": { "avg": { "field": "price" } }
}
}
}
}
# 日期直方图
GET /orders/_search
{
"size": 0,
"aggs": {
"sales_over_time": {
"date_histogram": {
"field": "created_at",
"calendar_interval": "month"
},
"aggs": {
"revenue": { "sum": { "field": "amount" } }
}
}
}
}
Python客户端
from elasticsearch import Elasticsearch, helpers
# 连接
es = Elasticsearch(['http://localhost:9200'])
# 创建索引
if not es.indices.exists(index='products'):
es.indices.create(index='products', body={
'mappings': {
'properties': {
'title': {'type': 'text'},
'price': {'type': 'float'}
}
}
})
# 索引文档
es.index(index='products', body={
'title': 'iPhone 15',
'price': 999.99
})
# 批量索引
actions = [
{'_index': 'products', '_source': {'title': f'Product {i}', 'price': i * 10}}
for i in range(1000)
]
helpers.bulk(es, actions)
# 搜索
result = es.search(index='products', body={
'query': {
'match': {'title': 'iPhone'}
},
'size': 10,
'sort': [{'price': 'desc'}]
})
for hit in result['hits']['hits']:
print(hit['_source'])
性能优化
- 合理设置分片数:每个分片10-50GB为宜
- 使用批量API:减少网络开销
- 优化映射:避免不必要的text字段
- 使用filter缓存:filter不计算分数
- 控制结果集:避免深度分页
Elasticsearch是构建搜索应用的强大工具,掌握其核心概念和查询语法至关重要。
本文链接:https://www.kkkliao.cn/?id=752 转载需授权!
版权声明:本文由廖万里的博客发布,如需转载请注明出处。



手机流量卡
免费领卡
号卡合伙人
产品服务
关于本站
