package com.tangguo.common.utils;

import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.date.DateUtil;
import cn.hutool.core.date.TimeInterval;
import cn.hutool.core.util.StrUtil;
import cn.hutool.dfa.FoundWord;
import cn.hutool.dfa.WordTree;
import lombok.extern.slf4j.Slf4j;

import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

@Slf4j
public class SensitiveWordUtils {

    private static final WordTree SENSITIVE_TREE = new WordTree();


    /**
     * 添加敏感词
     *
     * @param word 敏感词
     */
    public static void addWords(String word) {
        if (StrUtil.isNotBlank(word)) {
            SENSITIVE_TREE.addWords(word);
            log.info("=> 敏感词工具添加敏感：{}", word);
        }
    }


    /**
     * 添加敏感词
     *
     * @param words 敏感词
     */
    public static void addWords(Collection<String> words) {
        if (CollUtil.isNotEmpty(words)) {
            List<String> filterWords = words.stream().filter(StrUtil::isNotBlank).collect(Collectors.toList());
            SENSITIVE_TREE.addWords(filterWords);
            log.info("=> 敏感词工具添加敏感数量：{}", words.size());
        }
    }


    /**
     * 清空敏感词
     */
    public static void clearWords() {
        SENSITIVE_TREE.clear();
        log.info("=> 敏感词工具清空敏感词");
    }


    /**
     * 重新加载敏感词
     *
     * @param words 敏感词
     */
    public static void reloadWords(Collection<String> words) {
        SENSITIVE_TREE.clear();
        if (CollUtil.isNotEmpty(words)) {
            List<String> filterWords = words.stream().filter(StrUtil::isNotBlank).collect(Collectors.toList());
            SENSITIVE_TREE.addWords(filterWords);
            log.info("=> 敏感词工具重新加载敏感数量：{}", words.size());
        }
    }


    /**
     * 查找敏感词，返回找到的第一个敏感词
     *
     * @param text 文本
     * @return 敏感词
     * @since 5.5.3
     */
    public static FoundWord getFoundFirstSensitive(String text) {
        return SENSITIVE_TREE.matchWord(text);
    }


    /**
     * 查找敏感词，返回找到的所有敏感词
     *
     * @param text 文本
     * @return 敏感词
     */
    public static List<FoundWord> getFoundAllSensitive(String text) {
        return SENSITIVE_TREE.matchAllWords(text);
    }


    /**
     * 查找敏感词，返回找到的所有敏感词
     *
     * @param text 文本
     * @param isDensityMatch 是否使用密集匹配原则
     * @param isGreedMatch 是否使用贪婪匹配（最长匹配）原则
     * @return 敏感词
     */
    public static List<FoundWord> getFoundAllSensitive(String text, boolean isDensityMatch, boolean isGreedMatch) {
        return SENSITIVE_TREE.matchAllWords(text, -1, isDensityMatch, isGreedMatch);
    }


    /**
     * 处理过滤文本中的敏感词，默认替换成*
     *
     * @param text 文本
     * @param isGreedMatch 贪婪匹配（最长匹配）原则：假如关键字a,ab，最长匹配将匹配[a, ab]
     * @return 敏感词过滤处理后的文本
     */
    public static String sensitiveFilter(String text, boolean isGreedMatch) {
        if (StrUtil.isEmpty(text)) {
            return text;
        }

        TimeInterval timer = DateUtil.timer();
        //敏感词过滤场景下，不需要密集匹配
        List<FoundWord> foundWordList = getFoundAllSensitive(text, false, isGreedMatch);
        if (CollUtil.isEmpty(foundWordList)) {
            return text;
        }

        Map<Integer, FoundWord> foundWordMap = new HashMap<>(foundWordList.size());
        foundWordList.forEach(foundWord -> foundWordMap.put(foundWord.getStartIndex(), foundWord));
        int length = text.length();
        StringBuilder textStringBuilder = new StringBuilder();
        for (int i = 0; i < length; i++) {
            FoundWord fw = foundWordMap.get(i);
            if (fw != null) {
                int fwl = fw.getFoundWord().length();
                StringBuilder sb = new StringBuilder(fwl);
                for (int m = 0; m < fwl; m++) {
                    sb.append("*");
                }
                textStringBuilder.append(sb);
                i = fw.getEndIndex();
            } else {
                textStringBuilder.append(text.charAt(i));
            }
        }
	    log.info("过滤敏感词, 耗时: {} ms", timer.intervalMs());
        return textStringBuilder.toString();
    }

}
