package cc.lechun.framework.common.utils.string;

import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.tokenizer.StandardTokenizer;
import java.math.BigInteger;
import java.util.HashMap;
import java.util.List;
import org.apache.logging.log4j.message.StructuredDataId;
import org.jsoup.Jsoup;
import org.jsoup.safety.Whitelist;

/* loaded from: input_file:BOOT-INF/lib/common-3.4.0-SNAPSHOT.jar:cc/lechun/framework/common/utils/string/TextNewUtils.class */
public class TextNewUtils {
    private String tokens;
    private BigInteger strSimHash;
    private int hashbits;

    public TextNewUtils(String str) {
        this.hashbits = 64;
        this.tokens = str;
        this.strSimHash = simHash();
    }

    private TextNewUtils(String str, int i) {
        this.hashbits = 64;
        this.tokens = str;
        this.hashbits = i;
        this.strSimHash = simHash();
    }

    private String cleanResume(String str) {
        String lowerCase = org.apache.commons.lang3.StringUtils.lowerCase(Jsoup.clean(str, Whitelist.none()));
        for (String str2 : new String[]{" ", "\n", "\r", "\t", "\\r", "\\n", "\\t", "&nbsp;"}) {
            lowerCase = lowerCase.replaceAll(str2, "");
        }
        return lowerCase;
    }

    private BigInteger simHash() {
        this.tokens = cleanResume(this.tokens);
        int[] iArr = new int[this.hashbits];
        List<Term> segment = StandardTokenizer.segment(this.tokens);
        HashMap hashMap = new HashMap();
        hashMap.put("n", 2);
        HashMap hashMap2 = new HashMap();
        hashMap2.put("w", "");
        HashMap hashMap3 = new HashMap();
        for (Term term : segment) {
            String str = term.word;
            String nature = term.nature.toString();
            if (hashMap3.containsKey(str)) {
                int intValue = ((Integer) hashMap3.get(str)).intValue();
                if (intValue <= 5) {
                    hashMap3.put(str, Integer.valueOf(intValue + 1));
                }
            } else {
                hashMap3.put(str, 1);
            }
            if (!hashMap2.containsKey(nature)) {
                BigInteger hash = hash(str);
                for (int i = 0; i < this.hashbits; i++) {
                    BigInteger shiftLeft = new BigInteger("1").shiftLeft(i);
                    int intValue2 = hashMap.containsKey(nature) ? ((Integer) hashMap.get(nature)).intValue() : 1;
                    if (hash.and(shiftLeft).signum() != 0) {
                        int i2 = i;
                        iArr[i2] = iArr[i2] + intValue2;
                    } else {
                        int i3 = i;
                        iArr[i3] = iArr[i3] - intValue2;
                    }
                }
            }
        }
        BigInteger bigInteger = new BigInteger("0");
        for (int i4 = 0; i4 < this.hashbits; i4++) {
            if (iArr[i4] >= 0) {
                bigInteger = bigInteger.add(new BigInteger("1").shiftLeft(i4));
            }
        }
        return bigInteger;
    }

    private BigInteger hash(String str) {
        if (str == null || str.length() == 0) {
            return new BigInteger("0");
        }
        while (str.length() < 3) {
            str = str + str.charAt(0);
        }
        char[] charArray = str.toCharArray();
        BigInteger valueOf = BigInteger.valueOf(charArray[0] << 7);
        BigInteger bigInteger = new BigInteger("1000003");
        BigInteger subtract = new BigInteger("2").pow(this.hashbits).subtract(new BigInteger("1"));
        for (char c : charArray) {
            valueOf = valueOf.multiply(bigInteger).xor(BigInteger.valueOf(c)).and(subtract);
        }
        BigInteger xor = valueOf.xor(new BigInteger(String.valueOf(str.length())));
        if (xor.equals(new BigInteger(StructuredDataId.RESERVED))) {
            xor = new BigInteger("-2");
        }
        return xor;
    }

    private int hammingDistance(TextNewUtils textNewUtils) {
        int i = 0;
        for (BigInteger and = this.strSimHash.xor(textNewUtils.strSimHash).and(new BigInteger("1").shiftLeft(this.hashbits).subtract(new BigInteger("1"))); and.signum() != 0; and = and.and(and.subtract(new BigInteger("1")))) {
            i++;
        }
        return i;
    }

    public double getSemblance(TextNewUtils textNewUtils) {
        return 1.0d - (hammingDistance(textNewUtils) / this.hashbits);
    }

    public static double compareText(String str, String str2) {
        return new TextNewUtils(str, 64).getSemblance(new TextNewUtils(str2, 64));
    }

    public static void main(String[] strArr) {
        System.out.println(compareText("先来快快自我介绍下，我叫艾琳。在乐纯小岛的主要工作是「用户体验调研官」，同时负责跟进我们重要伙伴的每一个关键体验。希望在你的帮助下，我能努力打造好小岛生态。 在后续的相处中还要向你多多请教��", "HI 李群 先来快快自我介绍下，我叫艾琳。在乐纯小岛的主要工作是「用户体验调研官」，同时负责跟进我们重要伙伴的每一个关键体验。希望在你的帮助下，我能努力打造好小岛生态。 在后续的相处中还要向你多多请教��"));
    }
}
