业务场景:电商商品搜索与评论分析系统
假设我们有一个电商平台的商品表和评论表,需要实现复杂的全文检索功能。
第一步:创建支持全文检索的表
1. 创建商品表(支持全文检索)
CREATE TABLE tmp.products (
product_id BIGINT,--产品id
product_name STRING,--产品名称
product_description TEXT,--产品描述
brand STRING,--品牌
category STRING,--分类
tags ARRAY<STRING>,--标签
attributes MAP<STRING, STRING>,--属性
price DECIMAL(10, 2),--价格
stock INT,--库存
create_time DATETIME --时间
) ENGINE = OLAP
DUPLICATE KEY(product_id)
DISTRIBUTED BY HASH(product_id) BUCKETS 10
PROPERTIES (
"replication_num" = "1"
);
2. 为商品描述创建倒排索引(支持短语搜索)
CREATE INDEX idx_product_desc ON tmp.products(product_description) USING INVERTED
PROPERTIES(
"parser" = "english",
"support_phrase" = "true"
);
3. 为商品名称创建索引
CREATE INDEX idx_product_name ON tmp.products(product_name) USING INVERTED
PROPERTIES("parser" = "english");
4. 创建商品评论表
CREATE TABLE tmp.product_reviews_search (
review_id BIGINT,--评论id
product_id BIGINT,--产品id
user_id BIGINT,--用户id
review_title STRING,--评论标题
review_content TEXT,--评论内容
rating INT,--评分
review_date DATE --评论日期
) ENGINE = OLAP
DUPLICATE KEY(review_id)
DISTRIBUTED BY HASH(review_id) BUCKETS 10
PROPERTIES("replication_num" = "1");
5.为评论内容创建全文索引
CREATE INDEX idx_review_content ON tmp.product_reviews_search(review_content) USING INVERTED
PROPERTIES(
"parser" = "english",
"support_phrase" = "true"
);
6.插入商品和商品评论表数据
#插入商品表数据
INSERT INTO tmp.products VALUES
(1, 'iPhone 15 Pro Max', 'The new iPhone 15 Pro Max features an A17 Pro chip, titanium design, advanced camera system with 5x optical zoom, and USB-C connector. Perfect for photography and gaming enthusiasts.', 'Apple', 'Smartphones', ['apple', 'iphone', 'premium', '5g'], map('color', 'black', 'storage', '512GB'), 1299.99, 100, '2026-01-01 10:00:00'),
(2, 'Samsung Galaxy S24 Ultra', 'Samsung Galaxy S24 Ultra comes with AI features, 200MP camera, S Pen support, and long battery life. Great for business and creativity.', 'Samsung', 'Smartphones', ['samsung', 'android', 'premium', 'ai'], map('color', 'titanium', 'st