|
@@ -6,7 +6,6 @@ import com.storlead.es.pojo.vo.EsQuerySimilarityVO;
|
|
|
import com.storlead.es.pojo.vo.FieldConfig;
|
|
import com.storlead.es.pojo.vo.FieldConfig;
|
|
|
import com.storlead.es.pojo.vo.IndexFieldConfig;
|
|
import com.storlead.es.pojo.vo.IndexFieldConfig;
|
|
|
import com.storlead.es.server.EsSearchCustomerService;
|
|
import com.storlead.es.server.EsSearchCustomerService;
|
|
|
-import com.storlead.frame.core.assemble.Result;
|
|
|
|
|
import org.apache.commons.lang3.tuple.Pair;
|
|
import org.apache.commons.lang3.tuple.Pair;
|
|
|
import org.elasticsearch.index.query.BoolQueryBuilder;
|
|
import org.elasticsearch.index.query.BoolQueryBuilder;
|
|
|
import org.elasticsearch.index.query.QueryBuilders;
|
|
import org.elasticsearch.index.query.QueryBuilders;
|
|
@@ -28,12 +27,6 @@ import javax.annotation.Resource;
|
|
|
import java.util.*;
|
|
import java.util.*;
|
|
|
import java.util.stream.Collectors;
|
|
import java.util.stream.Collectors;
|
|
|
|
|
|
|
|
-/**
|
|
|
|
|
- * @program: sp-sales-platform
|
|
|
|
|
- * @description:
|
|
|
|
|
- * @author: chenkq
|
|
|
|
|
- * @create: 2025-04-16 09:57
|
|
|
|
|
- */
|
|
|
|
|
@Service
|
|
@Service
|
|
|
public class EsSearchCustomerServiceImpl implements EsSearchCustomerService {
|
|
public class EsSearchCustomerServiceImpl implements EsSearchCustomerService {
|
|
|
|
|
|
|
@@ -41,37 +34,25 @@ public class EsSearchCustomerServiceImpl implements EsSearchCustomerService {
|
|
|
private ElasticsearchRestTemplate elasticsearchTemplate;
|
|
private ElasticsearchRestTemplate elasticsearchTemplate;
|
|
|
|
|
|
|
|
private static String similarityScript = "\n" +
|
|
private static String similarityScript = "\n" +
|
|
|
- "def field = params.field;\n" + // 使用参数传入字段名
|
|
|
|
|
|
|
+ "def field = params.field;\n" +
|
|
|
"if (!doc.containsKey(field) || doc[field].size() == 0 || doc[field].value == null) {\n" +
|
|
"if (!doc.containsKey(field) || doc[field].size() == 0 || doc[field].value == null) {\n" +
|
|
|
" return 0;\n" +
|
|
" return 0;\n" +
|
|
|
"}\n" +
|
|
"}\n" +
|
|
|
"if (params.query == null) return 0;\n" +
|
|
"if (params.query == null) return 0;\n" +
|
|
|
"def docValue = doc[field].value;\n" +
|
|
"def docValue = doc[field].value;\n" +
|
|
|
- "\n" +
|
|
|
|
|
"String s1 = docValue.toString().toLowerCase();\n" +
|
|
"String s1 = docValue.toString().toLowerCase();\n" +
|
|
|
"String s2 = params.query?.toString().toLowerCase();\n" +
|
|
"String s2 = params.query?.toString().toLowerCase();\n" +
|
|
|
- "\n" +
|
|
|
|
|
- "// 空值检查\n" +
|
|
|
|
|
"if (s1 == null || s2 == null) return 0;\n" +
|
|
"if (s1 == null || s2 == null) return 0;\n" +
|
|
|
- "\n" +
|
|
|
|
|
- "// 完全匹配直接返回100\n" +
|
|
|
|
|
"if (s1.equals(s2)) return 100;\n" +
|
|
"if (s1.equals(s2)) return 100;\n" +
|
|
|
- "\n" +
|
|
|
|
|
- "// 计算编辑距离(内联实现)\n" +
|
|
|
|
|
"int s1_len = s1.length();\n" +
|
|
"int s1_len = s1.length();\n" +
|
|
|
"int s2_len = s2.length();\n" +
|
|
"int s2_len = s2.length();\n" +
|
|
|
- "\n" +
|
|
|
|
|
- "// 快速检查\n" +
|
|
|
|
|
"if (s1_len == 0 || s2_len == 0) return 0;\n" +
|
|
"if (s1_len == 0 || s2_len == 0) return 0;\n" +
|
|
|
"if (s1_len == 0) return (int)(100 * (1 - (s2_len / Math.max(1, s2_len))));\n" +
|
|
"if (s1_len == 0) return (int)(100 * (1 - (s2_len / Math.max(1, s2_len))));\n" +
|
|
|
"if (s2_len == 0) return (int)(100 * (1 - (s1_len / Math.max(1, s1_len))));\n" +
|
|
"if (s2_len == 0) return (int)(100 * (1 - (s1_len / Math.max(1, s1_len))));\n" +
|
|
|
- "\n" +
|
|
|
|
|
- "// 使用单数组优化空间复杂度\n" +
|
|
|
|
|
"int[] costs = new int[s2_len + 1];\n" +
|
|
"int[] costs = new int[s2_len + 1];\n" +
|
|
|
"for (int j = 0; j <= s2_len; j++) {\n" +
|
|
"for (int j = 0; j <= s2_len; j++) {\n" +
|
|
|
" costs[j] = j;\n" +
|
|
" costs[j] = j;\n" +
|
|
|
"}\n" +
|
|
"}\n" +
|
|
|
- "\n" +
|
|
|
|
|
"for (int i = 1; i <= s1_len; i++) {\n" +
|
|
"for (int i = 1; i <= s1_len; i++) {\n" +
|
|
|
" costs[0] = i;\n" +
|
|
" costs[0] = i;\n" +
|
|
|
" int prev = i - 1;\n" +
|
|
" int prev = i - 1;\n" +
|
|
@@ -91,36 +72,17 @@ public class EsSearchCustomerServiceImpl implements EsSearchCustomerService {
|
|
|
" costs[j] = current;\n" +
|
|
" costs[j] = current;\n" +
|
|
|
" }\n" +
|
|
" }\n" +
|
|
|
"}\n" +
|
|
"}\n" +
|
|
|
- "\n" +
|
|
|
|
|
"int distance = costs[s2_len];\n" +
|
|
"int distance = costs[s2_len];\n" +
|
|
|
"int maxLen = s1_len > s2_len ? s1_len : s2_len;\n" +
|
|
"int maxLen = s1_len > s2_len ? s1_len : s2_len;\n" +
|
|
|
"if (maxLen == 0) return 100;\n"+
|
|
"if (maxLen == 0) return 100;\n"+
|
|
|
- "\n" +
|
|
|
|
|
- "// 计算相似度百分比(0-100)\n" +
|
|
|
|
|
"double similarity = 100 * (1 - ((double)distance / maxLen));\n" +
|
|
"double similarity = 100 * (1 - ((double)distance / maxLen));\n" +
|
|
|
"return (int) Math.round(similarity);";
|
|
"return (int) Math.round(similarity);";
|
|
|
|
|
|
|
|
@Override
|
|
@Override
|
|
|
public List<EsQuerySimilarityVO> listComparisonSimilarity(EsGenericVO dto) {
|
|
public List<EsQuerySimilarityVO> listComparisonSimilarity(EsGenericVO dto) {
|
|
|
-
|
|
|
|
|
-// Map<String, List<String>> indexFieldsMap = Map.of(
|
|
|
|
|
-// "liaison_data_index", Arrays.asList("email.keyword","email1.keyword","email2.keyword","email3.keyword"),
|
|
|
|
|
-// "customer_company_data_index", Arrays.asList("email.keyword","email1.keyword","email2.keyword","email3.keyword")
|
|
|
|
|
-// );
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
- // 保存索引名称和查询的列表
|
|
|
|
|
List<IndexFieldConfig> indexFields= dto.getIndexFields();
|
|
List<IndexFieldConfig> indexFields= dto.getIndexFields();
|
|
|
-// Map<String, List<FieldConfig>> indexFieldsMap = indexFields.stream()
|
|
|
|
|
-// .collect(Collectors.toMap(
|
|
|
|
|
-// IndexFieldConfig::getIndexName,
|
|
|
|
|
-// IndexFieldConfig::getFields
|
|
|
|
|
-// ));
|
|
|
|
|
-
|
|
|
|
|
Map<String, IndexFieldConfig> indexFieldsMap = indexFields.stream().collect(Collectors.toMap(IndexFieldConfig::getIndexName, IndexFieldConfig -> IndexFieldConfig));
|
|
Map<String, IndexFieldConfig> indexFieldsMap = indexFields.stream().collect(Collectors.toMap(IndexFieldConfig::getIndexName, IndexFieldConfig -> IndexFieldConfig));
|
|
|
-
|
|
|
|
|
List<Map> combinedResults = new ArrayList<>();
|
|
List<Map> combinedResults = new ArrayList<>();
|
|
|
-
|
|
|
|
|
float minScore = Float.parseFloat(dto.getMinScore());
|
|
float minScore = Float.parseFloat(dto.getMinScore());
|
|
|
List<Pair<String, NativeSearchQuery>> indexQueries = new ArrayList<>();
|
|
List<Pair<String, NativeSearchQuery>> indexQueries = new ArrayList<>();
|
|
|
indexFieldsMap.forEach((indexName, indexField) -> {
|
|
indexFieldsMap.forEach((indexName, indexField) -> {
|
|
@@ -132,31 +94,19 @@ public class EsSearchCustomerServiceImpl implements EsSearchCustomerService {
|
|
|
scriptParams.put("field", field.getFieldName());
|
|
scriptParams.put("field", field.getFieldName());
|
|
|
scriptParams.put("query", dto.getQueryText());
|
|
scriptParams.put("query", dto.getQueryText());
|
|
|
scriptParams.put("targetIndex", indexName);
|
|
scriptParams.put("targetIndex", indexName);
|
|
|
- indexQuery.mustNot(QueryBuilders.termQuery("is_delete", 1)); // Exclude deleted records
|
|
|
|
|
|
|
+ indexQuery.mustNot(QueryBuilders.termQuery("is_delete", 1));
|
|
|
if(!Objects.isNull(dto.getCustomerForm())){
|
|
if(!Objects.isNull(dto.getCustomerForm())){
|
|
|
- indexQuery.must(QueryBuilders.termQuery("customer_form", dto.getCustomerForm())); // Exclude deleted records
|
|
|
|
|
|
|
+ indexQuery.must(QueryBuilders.termQuery("customer_form", dto.getCustomerForm()));
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
-// indexQuery.should(
|
|
|
|
|
-// QueryBuilders.scriptScoreQuery(
|
|
|
|
|
-// QueryBuilders.matchAllQuery(),
|
|
|
|
|
-// new Script(
|
|
|
|
|
-// ScriptType.INLINE,
|
|
|
|
|
-// "painless",
|
|
|
|
|
-// similarityScript,
|
|
|
|
|
-// scriptParams
|
|
|
|
|
-// )
|
|
|
|
|
-// ).setMinScore(minScore)
|
|
|
|
|
-// );
|
|
|
|
|
ScriptScoreQueryBuilder scriptScoreQuery = QueryBuilders.scriptScoreQuery(
|
|
ScriptScoreQueryBuilder scriptScoreQuery = QueryBuilders.scriptScoreQuery(
|
|
|
- indexQuery, // 先应用 mustNot 过滤
|
|
|
|
|
|
|
+ indexQuery,
|
|
|
new Script(
|
|
new Script(
|
|
|
ScriptType.INLINE,
|
|
ScriptType.INLINE,
|
|
|
"painless",
|
|
"painless",
|
|
|
similarityScript,
|
|
similarityScript,
|
|
|
scriptParams
|
|
scriptParams
|
|
|
)
|
|
)
|
|
|
- ).setMinScore(minScore); // 只返回 _score >= minScore 的数据
|
|
|
|
|
|
|
+ ).setMinScore(minScore);
|
|
|
NativeSearchQuery indexSearchQuery = new NativeSearchQueryBuilder()
|
|
NativeSearchQuery indexSearchQuery = new NativeSearchQueryBuilder()
|
|
|
.withQuery(scriptScoreQuery)
|
|
.withQuery(scriptScoreQuery)
|
|
|
.withSort(SortBuilders.scoreSort().order(SortOrder.DESC))
|
|
.withSort(SortBuilders.scoreSort().order(SortOrder.DESC))
|
|
@@ -181,28 +131,16 @@ public class EsSearchCustomerServiceImpl implements EsSearchCustomerService {
|
|
|
}
|
|
}
|
|
|
});
|
|
});
|
|
|
|
|
|
|
|
-// 执行查询并合并结果
|
|
|
|
|
-
|
|
|
|
|
-// for (Pair<String, NativeSearchQuery> pair : indexQueries) {
|
|
|
|
|
-// String indexName = pair.getLeft();
|
|
|
|
|
-// NativeSearchQuery query = pair.getRight();
|
|
|
|
|
-//
|
|
|
|
|
-// }
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
- // 按相似度分数排序
|
|
|
|
|
combinedResults.sort((a, b) -> {
|
|
combinedResults.sort((a, b) -> {
|
|
|
float scoreA = (float) a.getOrDefault("similarityScore", 0f);
|
|
float scoreA = (float) a.getOrDefault("similarityScore", 0f);
|
|
|
float scoreB = (float) b.getOrDefault("similarityScore", 0f);
|
|
float scoreB = (float) b.getOrDefault("similarityScore", 0f);
|
|
|
- return Float.compare(scoreB, scoreA); // 降序排序
|
|
|
|
|
|
|
+ return Float.compare(scoreB, scoreA);
|
|
|
});
|
|
});
|
|
|
|
|
|
|
|
List<EsQuerySimilarityVO> vos = convertToVoList(combinedResults,indexFieldsMap);
|
|
List<EsQuerySimilarityVO> vos = convertToVoList(combinedResults,indexFieldsMap);
|
|
|
return vos;
|
|
return vos;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-
|
|
|
|
|
public List<EsQuerySimilarityVO> convertToVoList(List<Map> combinedResults,Map<String, IndexFieldConfig> indexFieldsMap) {
|
|
public List<EsQuerySimilarityVO> convertToVoList(List<Map> combinedResults,Map<String, IndexFieldConfig> indexFieldsMap) {
|
|
|
if (CollectionUtils.isEmpty(combinedResults)) {
|
|
if (CollectionUtils.isEmpty(combinedResults)) {
|
|
|
return new ArrayList<>();
|
|
return new ArrayList<>();
|
|
@@ -210,30 +148,20 @@ public class EsSearchCustomerServiceImpl implements EsSearchCustomerService {
|
|
|
return combinedResults.stream()
|
|
return combinedResults.stream()
|
|
|
.map(result -> {
|
|
.map(result -> {
|
|
|
EsQuerySimilarityVO vo = new EsQuerySimilarityVO();
|
|
EsQuerySimilarityVO vo = new EsQuerySimilarityVO();
|
|
|
-
|
|
|
|
|
- // 1. 设置基础字段
|
|
|
|
|
- vo.setId(Long.valueOf(result.get("id").toString())); // 假设id字段存在
|
|
|
|
|
|
|
+ vo.setId(Long.valueOf(result.get("id").toString()));
|
|
|
if (result.containsKey("customer_id") && Objects.nonNull(result.get("customer_id"))) {
|
|
if (result.containsKey("customer_id") && Objects.nonNull(result.get("customer_id"))) {
|
|
|
- vo.setCustomerId(Long.valueOf(result.get("customer_id").toString())); // 假设customer_id字段存在
|
|
|
|
|
|
|
+ vo.setCustomerId(Long.valueOf(result.get("customer_id").toString()));
|
|
|
}
|
|
}
|
|
|
if (result.containsKey("owner_by") && Objects.nonNull(result.get("owner_by"))) {
|
|
if (result.containsKey("owner_by") && Objects.nonNull(result.get("owner_by"))) {
|
|
|
- vo.setOwnerBy(Long.valueOf(result.get("owner_by").toString())); // 假设customer_id字段存在
|
|
|
|
|
|
|
+ vo.setOwnerBy(Long.valueOf(result.get("owner_by").toString()));
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
- vo.setObject(result); // 保留原始数据
|
|
|
|
|
-
|
|
|
|
|
- // 2. 设置相似度分数(转换为百分比字符串)
|
|
|
|
|
|
|
+ vo.setObject(result);
|
|
|
Float score = (Float) result.get("similarityScore");
|
|
Float score = (Float) result.get("similarityScore");
|
|
|
vo.setSimilarityScore(score.toString());
|
|
vo.setSimilarityScore(score.toString());
|
|
|
- // 3. 设置索引名称
|
|
|
|
|
vo.setIndexName((String) result.get("matchedIndex"));
|
|
vo.setIndexName((String) result.get("matchedIndex"));
|
|
|
IndexFieldConfig indexField = indexFieldsMap.get(vo.getIndexName());
|
|
IndexFieldConfig indexField = indexFieldsMap.get(vo.getIndexName());
|
|
|
vo.setResourceType(indexField.getResourceType().toString());
|
|
vo.setResourceType(indexField.getResourceType().toString());
|
|
|
-//
|
|
|
|
|
-// vo.setMatchedField(matchedField);
|
|
|
|
|
- // 4. 找出匹配的字段名
|
|
|
|
|
findMatchedField(vo,result, vo.getIndexName(),indexFieldsMap);
|
|
findMatchedField(vo,result, vo.getIndexName(),indexFieldsMap);
|
|
|
-// vo.setMatchedField(matchedField);
|
|
|
|
|
if (StrUtil.isNotBlank(vo.getMatchedField())) {
|
|
if (StrUtil.isNotBlank(vo.getMatchedField())) {
|
|
|
vo.setDuplicateValue(result.get(vo.getMatchedField()).toString());
|
|
vo.setDuplicateValue(result.get(vo.getMatchedField()).toString());
|
|
|
}
|
|
}
|
|
@@ -242,28 +170,19 @@ public class EsSearchCustomerServiceImpl implements EsSearchCustomerService {
|
|
|
.collect(Collectors.toList());
|
|
.collect(Collectors.toList());
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // 辅助方法:找出实际匹配的字段
|
|
|
|
|
private void findMatchedField(EsQuerySimilarityVO vo,Map<String, Object> result, String indexName,Map<String, IndexFieldConfig> indexConfigMap) {
|
|
private void findMatchedField(EsQuerySimilarityVO vo,Map<String, Object> result, String indexName,Map<String, IndexFieldConfig> indexConfigMap) {
|
|
|
- // 获取该索引对应的字段列表
|
|
|
|
|
IndexFieldConfig indexConfig = indexConfigMap.get(indexName);
|
|
IndexFieldConfig indexConfig = indexConfigMap.get(indexName);
|
|
|
List<FieldConfig> fields = indexConfig.getFields();
|
|
List<FieldConfig> fields = indexConfig.getFields();
|
|
|
- // 遍历字段,找出有值的字段
|
|
|
|
|
for (FieldConfig field : fields) {
|
|
for (FieldConfig field : fields) {
|
|
|
- // 移除.keyword后缀(如果需要)
|
|
|
|
|
String cpField = result.get("matchedField").toString();
|
|
String cpField = result.get("matchedField").toString();
|
|
|
if (cpField.equals(field.getFieldName())) {
|
|
if (cpField.equals(field.getFieldName())) {
|
|
|
String baseField = cpField.replace(".keyword", "");
|
|
String baseField = cpField.replace(".keyword", "");
|
|
|
if (result.containsKey(baseField) && result.get(baseField) != null) {
|
|
if (result.containsKey(baseField) && result.get(baseField) != null) {
|
|
|
vo.setMatchedField(baseField);
|
|
vo.setMatchedField(baseField);
|
|
|
vo.setDuplicateField(baseField);
|
|
vo.setDuplicateField(baseField);
|
|
|
-// vo.setErrorMsg(field.getFieldDescribe()+"重复");
|
|
|
|
|
vo.setErrorMsg(field.getFieldDescribe());
|
|
vo.setErrorMsg(field.getFieldDescribe());
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|