/*
 * Decompiled with CFR 0.152.
 */
package cc.lechun.framework.common.utils.string;

import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.tokenizer.StandardTokenizer;
import java.math.BigInteger;
import java.util.HashMap;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.safety.Safelist;

public class TextNewUtils {
    private String tokens;
    private BigInteger strSimHash;
    private int hashbits = 64;

    public TextNewUtils(String tokens) {
        this.tokens = tokens;
        this.strSimHash = this.simHash();
    }

    private TextNewUtils(String tokens, int hashbits) {
        this.tokens = tokens;
        this.hashbits = hashbits;
        this.strSimHash = this.simHash();
    }

    private String cleanResume(String content) {
        String[] strings;
        content = Jsoup.clean((String)content, (Safelist)Safelist.none());
        content = StringUtils.lowerCase((String)content);
        for (String s : strings = new String[]{" ", "\n", "\r", "\t", "\\r", "\\n", "\\t", "&nbsp;"}) {
            content = content.replaceAll(s, "");
        }
        return content;
    }

    private BigInteger simHash() {
        this.tokens = this.cleanResume(this.tokens);
        int[] v = new int[this.hashbits];
        List termList = StandardTokenizer.segment((String)this.tokens);
        HashMap<String, Integer> weightOfNature = new HashMap<String, Integer>();
        weightOfNature.put("n", 2);
        HashMap<String, String> stopNatures = new HashMap<String, String>();
        stopNatures.put("w", "");
        int overCount = 5;
        HashMap<String, Integer> wordCount = new HashMap<String, Integer>();
        for (Term term : termList) {
            String word = term.word;
            String nature = term.nature.toString();
            if (wordCount.containsKey(word)) {
                int count = (Integer)wordCount.get(word);
                if (count > overCount) continue;
                wordCount.put(word, count + 1);
            } else {
                wordCount.put(word, 1);
            }
            if (stopNatures.containsKey(nature)) continue;
            BigInteger t = this.hash(word);
            for (int i = 0; i < this.hashbits; ++i) {
                BigInteger bitmask = new BigInteger("1").shiftLeft(i);
                int weight = 1;
                if (weightOfNature.containsKey(nature)) {
                    weight = (Integer)weightOfNature.get(nature);
                }
                if (t.and(bitmask).signum() != 0) {
                    int n = i;
                    v[n] = v[n] + weight;
                    continue;
                }
                int n = i;
                v[n] = v[n] - weight;
            }
        }
        BigInteger fingerprint = new BigInteger("0");
        for (int i = 0; i < this.hashbits; ++i) {
            if (v[i] < 0) continue;
            fingerprint = fingerprint.add(new BigInteger("1").shiftLeft(i));
        }
        return fingerprint;
    }

    private BigInteger hash(String source) {
        if (source == null || ((String)source).length() == 0) {
            return new BigInteger("0");
        }
        while (((String)source).length() < 3) {
            source = (String)source + ((String)source).charAt(0);
        }
        char[] sourceArray = ((String)source).toCharArray();
        BigInteger x = BigInteger.valueOf((long)sourceArray[0] << 7);
        BigInteger m = new BigInteger("1000003");
        BigInteger mask = new BigInteger("2").pow(this.hashbits).subtract(new BigInteger("1"));
        for (char item : sourceArray) {
            BigInteger temp = BigInteger.valueOf(item);
            x = x.multiply(m).xor(temp).and(mask);
        }
        if ((x = x.xor(new BigInteger(String.valueOf(((String)source).length())))).equals(new BigInteger("-1"))) {
            x = new BigInteger("-2");
        }
        return x;
    }

    private int hammingDistance(TextNewUtils other) {
        BigInteger m = new BigInteger("1").shiftLeft(this.hashbits).subtract(new BigInteger("1"));
        BigInteger x = this.strSimHash.xor(other.strSimHash).and(m);
        int tot = 0;
        while (x.signum() != 0) {
            ++tot;
            x = x.and(x.subtract(new BigInteger("1")));
        }
        return tot;
    }

    public double getSemblance(TextNewUtils s2) {
        double i = this.hammingDistance(s2);
        return 1.0 - i / (double)this.hashbits;
    }

    public static double compareText(String old, String nw) {
        TextNewUtils hash1 = new TextNewUtils(old, 64);
        TextNewUtils hash2 = new TextNewUtils(nw, 64);
        return hash1.getSemblance(hash2);
    }

    public static void main(String[] args) {
        String s1 = "\u5148\u6765\u5feb\u5feb\u81ea\u6211\u4ecb\u7ecd\u4e0b\uff0c\u6211\u53eb\u827e\u7433\u3002\u5728\u4e50\u7eaf\u5c0f\u5c9b\u7684\u4e3b\u8981\u5de5\u4f5c\u662f\u300c\u7528\u6237\u4f53\u9a8c\u8c03\u7814\u5b98\u300d\uff0c\u540c\u65f6\u8d1f\u8d23\u8ddf\u8fdb\u6211\u4eec\u91cd\u8981\u4f19\u4f34\u7684\u6bcf\u4e00\u4e2a\u5173\u952e\u4f53\u9a8c\u3002\u5e0c\u671b\u5728\u4f60\u7684\u5e2e\u52a9\u4e0b\uff0c\u6211\u80fd\u52aa\u529b\u6253\u9020\u597d\u5c0f\u5c9b\u751f\u6001\u3002 \u5728\u540e\u7eed\u7684\u76f8\u5904\u4e2d\u8fd8\u8981\u5411\u4f60\u591a\u591a\u8bf7\u6559\ud83d\udc7c";
        String s2 = "HI \u674e\u7fa4 \u5148\u6765\u5feb\u5feb\u81ea\u6211\u4ecb\u7ecd\u4e0b\uff0c\u6211\u53eb\u827e\u7433\u3002\u5728\u4e50\u7eaf\u5c0f\u5c9b\u7684\u4e3b\u8981\u5de5\u4f5c\u662f\u300c\u7528\u6237\u4f53\u9a8c\u8c03\u7814\u5b98\u300d\uff0c\u540c\u65f6\u8d1f\u8d23\u8ddf\u8fdb\u6211\u4eec\u91cd\u8981\u4f19\u4f34\u7684\u6bcf\u4e00\u4e2a\u5173\u952e\u4f53\u9a8c\u3002\u5e0c\u671b\u5728\u4f60\u7684\u5e2e\u52a9\u4e0b\uff0c\u6211\u80fd\u52aa\u529b\u6253\u9020\u597d\u5c0f\u5c9b\u751f\u6001\u3002 \u5728\u540e\u7eed\u7684\u76f8\u5904\u4e2d\u8fd8\u8981\u5411\u4f60\u591a\u591a\u8bf7\u6559\ud83d\udc7c";
        System.out.println(TextNewUtils.compareText(s1, s2));
    }
}

