java配置多個過濾器優先級以及幾個常用過濾器操作

Posted on 2021-07-04 by WalkonNet

目錄

過濾器配置：
常用過濾器之一：
常用過濾器之二：

敏感詞過濾工具類

一個項目中不出意外的話會有兩個以上的過濾器，但是直接不配置的話他會按照你的過濾器名字排序執行，這樣的話可能會導致一些性能上或者邏輯上的問題。那麼，控制一下執行順序是我們所必須要做的。

java封裝瞭一個FilterRegistrationBean對象，可以把他比作一個容器，將過濾器套入這個對象中，可以對這個對象進行優先級設置、過濾規則設置等屬性，下面是幾個常用的過濾器以及過濾器配置。

過濾器配置：

package cn.ask.filter;
import javax.servlet.DispatcherType; 
import org.springframework.boot.web.servlet.FilterRegistrationBean;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.web.filter.DelegatingFilterProxy;
 
/**
 * Title:      FilterConfig
 * @date       2018年10月25日
 * @version    V1.0
 * Description: 過濾器配置
 */
@SuppressWarnings("all")
@Configuration
public class FilterConfig {
	/**
	 * @Description 防xss攻擊過濾器
	 * @date 2018年8月16日下午4:55:44
	 */
	@Bean
	public FilterRegistrationBean xssFilterRegistration() {
		FilterRegistrationBean registration = new FilterRegistrationBean();
		registration.setDispatcherTypes(DispatcherType.REQUEST);
		registration.setFilter(new XssFilter());
		registration.addUrlPatterns("/*");
		registration.setName("xssFilter");
        //order數字越小越先執行
		registration.setOrder(1);
		return registration;
	}
	
	/**  
	* @Description 獲取登錄信息 
	* @date 2018年10月25日上午9:14:48
	*/
	@Bean
	public FilterRegistrationBean cookieRegistration() {
		FilterRegistrationBean registration = new FilterRegistrationBean();
		registration.setDispatcherTypes(DispatcherType.REQUEST);
		registration.setFilter(new CookieFilter());
		registration.addUrlPatterns("/*");
		registration.setName("firstFilter");
		registration.setOrder(2);
		return registration;
	}	
}

常用過濾器之一：

sso單點登錄（使用cookie記錄跨站數據）

package cn.ask.filter; 
import java.io.IOException;
import java.net.URLDecoder; 
import javax.servlet.Filter;
import javax.servlet.FilterChain;
import javax.servlet.FilterConfig;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;
import javax.servlet.http.Cookie;
import javax.servlet.http.HttpServletRequest; 
import org.apache.commons.lang3.StringUtils;
 
/**
 * Servlet Filter implementation class FirstFilter
 */
public class CookieFilter implements Filter {
 
    /**
     * Default constructor. 
     */
    public CookieFilter() {
        // TODO Auto-generated constructor stub
    }
 
	/**
	 * @see Filter#destroy()
	 */
	public void destroy() {
		// TODO Auto-generated method stub
	}
 
	/**
	 * @see Filter#doFilter(ServletRequest, ServletResponse, FilterChain)
	 */
	public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain) throws IOException, ServletException {
		HttpServletRequest req = (HttpServletRequest) request;
		String tokenHeader = req.getHeader("token");
		if(StringUtils.isBlank(tokenHeader)) {
			Cookie[] cookies = req.getCookies();
			if (cookies != null && cookies.length > 0) {
				for (int i = 0; i < cookies.length; i++) {
					String name = cookies[i].getName();
					String value = cookies[i].getValue();
					if ("user_accesstoken".equals(name)) { // 用戶token
						req.setAttribute("token", URLDecoder.decode(value, "utf-8"));
					}
					if ("user_avatar".equals(name)) { // 頭像
						req.setAttribute("avatar", URLDecoder.decode(value, "utf-8"));
					}
					if ("user_nickname".equals(name)) { // 昵稱
						req.setAttribute("nickname", URLDecoder.decode(value, "utf-8"));
					}
					String token = (String) req.getAttribute("token");
					if (StringUtils.isBlank(token)) {
						req.setAttribute("isLogin", "no");
					} else {
						req.setAttribute("isLogin", "yes");
					}
				}
			}else {
				req.removeAttribute("token");
				req.removeAttribute("avatar");
				req.removeAttribute("nickname");
				req.setAttribute("isLogin", "no");
			}
		} else {
			req.setAttribute("token", tokenHeader);
		}
		
		chain.doFilter(request, response);
	}
 
	/**
	 * @see Filter#init(FilterConfig)
	 */
	public void init(FilterConfig fConfig) throws ServletException {
		// TODO Auto-generated method stub
	} 
}

常用過濾器之二：

xss過濾以及防html註入過濾（包括過濾敏感詞）

package cn.ask.filter; 
import java.io.IOException; 
import javax.servlet.Filter;
import javax.servlet.FilterChain;
import javax.servlet.FilterConfig;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;
import javax.servlet.http.HttpServletRequest;  
/**
 * Title:      XssFilter
 * @date       2018年8月16日
 * @version    V1.0
 * Description: xss過濾
 */
public class XssFilter implements Filter {
 
	@Override
	public void init(FilterConfig config) throws ServletException {
	}
 
	@Override
	public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain)
            throws IOException, ServletException {
		XssHttpServletRequestWrapper xssRequest = new XssHttpServletRequestWrapper((HttpServletRequest) request);
		chain.doFilter(xssRequest, response);
	}
 
	@Override
	public void destroy() {
	} 
}

package cn.ask.filter;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.Map;
import javax.servlet.ReadListener;
import javax.servlet.ServletInputStream;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletRequestWrapper; 
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType; 
 
/**
 * Title:      XssHttpServletRequestWrapper
 * @date       2018年8月16日
 * @version    V1.0
 * Description:  XSS過濾處理
 */
public class XssHttpServletRequestWrapper extends HttpServletRequestWrapper {
    /**沒被包裝過的HttpServletRequest（特殊場景，需要自己過濾*/
    HttpServletRequest orgRequest;
    /**html過濾*/
    private final static HtmlFilter HTMLFILTER = new HtmlFilter();
 
    public XssHttpServletRequestWrapper(HttpServletRequest request) {
        super(request);
        orgRequest = request;
    }
 
    @Override
    public ServletInputStream getInputStream() throws IOException {
        //非json類型，直接返回
        if(!super.getHeader(HttpHeaders.CONTENT_TYPE).equalsIgnoreCase(MediaType.APPLICATION_JSON_VALUE)){
            return super.getInputStream();
        }
 
        //為空，直接返回
        String json = IOUtils.toString(super.getInputStream(), "utf-8");
        if (StringUtils.isBlank(json)) {
            return super.getInputStream();
        }
 
        //xss過濾
        json = xssEncode(json);
        final ByteArrayInputStream bis = new ByteArrayInputStream(json.getBytes());
        return new ServletInputStream() {
            @Override
            public boolean isFinished() {
                return true;
            }
 
            @Override
            public boolean isReady() {
                return true;
            }
 
            @Override
            public void setReadListener(ReadListener readListener) {
 
            }
 
            @Override
            public int read() throws IOException {
                return bis.read();
            }
        };
    } 
    @Override
    public String getParameter(String name) {
        String value = super.getParameter(xssEncode(name));
        if (StringUtils.isNotBlank(value)) {
            value = xssEncode(value);
        }
        return value;
    } 
    @Override
    public String[] getParameterValues(String name) {
        String[] parameters = super.getParameterValues(name);
        if (parameters == null || parameters.length == 0) {
            return null;
        }
 
        for (int i = 0; i < parameters.length; i++) {
            parameters[i] = xssEncode(parameters[i]);
        }
        return parameters;
    } 
    @Override
    public Map<String,String[]> getParameterMap() {
        Map<String,String[]> map = new LinkedHashMap<>();
        Map<String,String[]> parameters = super.getParameterMap();
        for (String key : parameters.keySet()) {
            String[] values = parameters.get(key);
            for (int i = 0; i < values.length; i++) {
                values[i] = xssEncode(values[i]);
            }
            map.put(key, values);
        }
        return map;
    } 
    @Override
    public String getHeader(String name) {
        String value = super.getHeader(xssEncode(name));
        if (StringUtils.isNotBlank(value)) {
            value = xssEncode(value);
        }
        return value;
    }
    //富文本內容放行
    private String xssEncode(String input) {
    	if(!input.startsWith("<p")&&!input.startsWith("<ol")&&!input.startsWith("<ul")&&!input.startsWith("<hr/>")) {
    		input=HTMLFILTER.filter(input);
    	}
        //敏感詞過濾
        input=SensitiveWordUtils.getSensitiveWordUtils().replaceSensitiveWord(input, '*', 2);
        return input;        
    } 
    /**
     * 獲取最原始的request
     */
    public HttpServletRequest getOrgRequest() {
        return orgRequest;
    } 
    /**
     * 獲取最原始的request
     */
    public static HttpServletRequest getOrgRequest(HttpServletRequest request) {
        if (request instanceof XssHttpServletRequestWrapper) {
            return ((XssHttpServletRequestWrapper) request).getOrgRequest();
        } 
        return request;
    } 
}

package cn.ask.filter; 
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern; 
/**
 * Title:      HTMLFilter
 * @date       2018年8月16日
 * @version    V1.0
 * Description: 防html註入
 */
public final class HtmlFilter { 
    /** regex flag union representing /si modifiers in php **/
    private static final int REGEX_FLAGS_SI = Pattern.CASE_INSENSITIVE | Pattern.DOTALL;
    private static final Pattern P_COMMENTS = Pattern.compile("<!--(.*?)-->", Pattern.DOTALL);
    private static final Pattern P_COMMENT = Pattern.compile("^!--(.*)--$", REGEX_FLAGS_SI);
    private static final Pattern P_TAGS = Pattern.compile("<(.*?)>", Pattern.DOTALL);
    private static final Pattern P_END_TAG = Pattern.compile("^/([a-z0-9]+)", REGEX_FLAGS_SI);
    private static final Pattern P_START_TAG = Pattern.compile("^([a-z0-9]+)(.*?)(/?)$", REGEX_FLAGS_SI);
    private static final Pattern P_QUOTED_ATTRIBUTES = Pattern.compile("([a-z0-9]+)=([\"'])(.*?)\\2", REGEX_FLAGS_SI);
    private static final Pattern P_UNQUOTED_ATTRIBUTES = Pattern.compile("([a-z0-9]+)(=)([^\"\\s']+)", REGEX_FLAGS_SI);
    private static final Pattern P_PROTOCOL = Pattern.compile("^([^:]+):", REGEX_FLAGS_SI);
    private static final Pattern P_ENTITY = Pattern.compile("&#(\\d+);?");
    private static final Pattern P_ENTITY_UNICODE = Pattern.compile("&#x([0-9a-f]+);?");
    private static final Pattern P_ENCODE = Pattern.compile("%([0-9a-f]{2});?");
    private static final Pattern P_VALID_ENTITIES = Pattern.compile("&([^&;]*)(?=(;|&|$))");
    private static final Pattern P_VALID_QUOTES = Pattern.compile("(>|^)([^<]+?)(<|$)", Pattern.DOTALL);
    private static final Pattern P_END_ARROW = Pattern.compile("^>");
    private static final Pattern P_BODY_TO_END = Pattern.compile("<([^>]*?)(?=<|$)");
    private static final Pattern P_XML_CONTENT = Pattern.compile("(^|>)([^<]*?)(?=>)");
    private static final Pattern P_STRAY_LEFT_ARROW = Pattern.compile("<([^>]*?)(?=<|$)");
    private static final Pattern P_STRAY_RIGHT_ARROW = Pattern.compile("(^|>)([^<]*?)(?=>)");
    private static final Pattern P_AMP = Pattern.compile("&");
    private static final Pattern P_QUOTE = Pattern.compile("<");
    private static final Pattern P_LEFT_ARROW = Pattern.compile("<");
    private static final Pattern P_RIGHT_ARROW = Pattern.compile(">");
    private static final Pattern P_BOTH_ARROWS = Pattern.compile("<>");
    private static final String SS="#//";
 
    /** @xxx could grow large... maybe use sesat's ReferenceMap */
    private static final ConcurrentMap<String,Pattern> P_REMOVE_PAIR_BLANKS = new ConcurrentHashMap<String, Pattern>();
    private static final ConcurrentMap<String,Pattern> P_REMOVE_SELF_BLANKS = new ConcurrentHashMap<String, Pattern>();
 
    /** set of allowed html elements, along with allowed attributes for each element **/
    private final Map<String, List<String>> vAllowed;
    /** counts of open tags for each (allowable) html element **/
    private final Map<String, Integer> vTagCounts = new HashMap<String, Integer>();
 
    /** html elements which must always be self-closing (e.g. "<img />") **/
    private final String[] vSelfClosingTags;
    /** html elements which must always have separate opening and closing tags (e.g. "<b></b>") **/
    private final String[] vNeedClosingTags;
    /** set of disallowed html elements **/
    private final String[] vDisallowed;
    /** attributes which should be checked for valid protocols **/
    private final String[] vProtocolAtts;
    /** allowed protocols **/
    private final String[] vAllowedProtocols;
    /** tags which should be removed if they contain no content (e.g. "<b></b>" or "<b />") **/
    private final String[] vRemoveBlanks;
    /** entities allowed within html markup **/
    private final String[] vAllowedEntities;
    /** flag determining whether comments are allowed in input String. */
    private final boolean stripComment;
    private final boolean encodeQuotes;
    private boolean vDebug = false;
    /**
     * flag determining whether to try to make tags when presented with "unbalanced"
     * angle brackets (e.g. "<b text </b>" becomes "<b> text </b>").  If set to false,
     * unbalanced angle brackets will be html escaped.
     */
    private final boolean alwaysMakeTags;
 
    /** Default constructor.
     *
     */
    public HtmlFilter() {
        vAllowed = new HashMap<>();
 
        final ArrayList<String> aAtts = new ArrayList<String>();
        aAtts.add("href");
        aAtts.add("target");
        vAllowed.put("a", aAtts);
 
        final ArrayList<String> imgAtts = new ArrayList<String>();
        imgAtts.add("src");
        imgAtts.add("width");
        imgAtts.add("height");
        imgAtts.add("alt");
        vAllowed.put("img", imgAtts);
 
        final ArrayList<String> noAtts = new ArrayList<String>();
        vAllowed.put("b", noAtts);
        vAllowed.put("strong", noAtts);
        vAllowed.put("i", noAtts);
        vAllowed.put("em", noAtts);
 
        vSelfClosingTags = new String[]{"img"};
        vNeedClosingTags = new String[]{"a", "b", "strong", "i", "em"};
        vDisallowed = new String[]{};
        /**no ftp*/
        vAllowedProtocols = new String[]{"http", "mailto", "https"};
        vProtocolAtts = new String[]{"src", "href"};
        vRemoveBlanks = new String[]{"a", "b", "strong", "i", "em"};
        vAllowedEntities = new String[]{"amp", "gt", "lt", "quot"};
        stripComment = true;
        encodeQuotes = true;
        alwaysMakeTags = true;
    } 
    /** Set debug flag to true. Otherwise use default settings. See the default constructor.
     *
     * @param debug turn debug on with a true argument
     */
    public HtmlFilter(final boolean debug) {
        this();
        vDebug = debug; 
    } 
    /** Map-parameter configurable constructor.
     *
     * @param conf map containing configuration. keys match field names.
     */
    @SuppressWarnings("unchecked")
	public HtmlFilter(final Map<String,Object> conf) {
 
        assert conf.containsKey("vAllowed") : "configuration requires vAllowed";
        assert conf.containsKey("vSelfClosingTags") : "configuration requires vSelfClosingTags";
        assert conf.containsKey("vNeedClosingTags") : "configuration requires vNeedClosingTags";
        assert conf.containsKey("vDisallowed") : "configuration requires vDisallowed";
        assert conf.containsKey("vAllowedProtocols") : "configuration requires vAllowedProtocols";
        assert conf.containsKey("vProtocolAtts") : "configuration requires vProtocolAtts";
        assert conf.containsKey("vRemoveBlanks") : "configuration requires vRemoveBlanks";
        assert conf.containsKey("vAllowedEntities") : "configuration requires vAllowedEntities";
 
        vAllowed = Collections.unmodifiableMap((HashMap<String, List<String>>) conf.get("vAllowed"));
        vSelfClosingTags = (String[]) conf.get("vSelfClosingTags");
        vNeedClosingTags = (String[]) conf.get("vNeedClosingTags");
        vDisallowed = (String[]) conf.get("vDisallowed");
        vAllowedProtocols = (String[]) conf.get("vAllowedProtocols");
        vProtocolAtts = (String[]) conf.get("vProtocolAtts");
        vRemoveBlanks = (String[]) conf.get("vRemoveBlanks");
        vAllowedEntities = (String[]) conf.get("vAllowedEntities");
        stripComment =  conf.containsKey("stripComment") ? (Boolean) conf.get("stripComment") : true;
        encodeQuotes = conf.containsKey("encodeQuotes") ? (Boolean) conf.get("encodeQuotes") : true;
        alwaysMakeTags = conf.containsKey("alwaysMakeTags") ? (Boolean) conf.get("alwaysMakeTags") : true;
    } 
    private void reset() {
        vTagCounts.clear();
    } 
    private void debug(final String msg) {
        if (vDebug) {
            Logger.getAnonymousLogger().info(msg);
        }
    } 
    //---------------------------------------------------------------
    /**my versions of some PHP library functions*/
    public static String chr(final int decimal) {
        return String.valueOf((char) decimal);
    }
 
    public static String htmlSpecialChars(final String s) {
        String result = s;
        result = regexReplace(P_AMP, "&amp;", result);
        result = regexReplace(P_QUOTE, "&quot;", result);
        result = regexReplace(P_LEFT_ARROW, "&lt;", result);
        result = regexReplace(P_RIGHT_ARROW, "&gt;", result);
        return result;
    } 
    //---------------------------------------------------------------
    /**
     * given a user submitted input String, filter out any invalid or restricted
     * html.
     *
     * @param input text (i.e. submitted by a user) than may contain html
     * @return "clean" version of input, with only valid, whitelisted html elements allowed
     */
    public String filter(final String input) {
        reset();
        String s = input;
 
        debug("************************************************");
        debug("              INPUT: " + input);
 
        s = escapeComments(s);
        debug("     escapeComments: " + s);
 
        s = balanceHTML(s);
        debug("        balanceHTML: " + s);
 
        s = checkTags(s);
        debug("          checkTags: " + s);
 
        s = processRemoveBlanks(s);
        debug("processRemoveBlanks: " + s);
 
        s = validateEntities(s);
        debug("    validateEntites: " + s);
 
        debug("************************************************\n\n");
        return s;
    } 
    public boolean isAlwaysMakeTags(){
        return alwaysMakeTags;
    } 
    public boolean isStripComments(){
        return stripComment;
    } 
    private String escapeComments(final String s) {
        final Matcher m = P_COMMENTS.matcher(s);
        final StringBuffer buf = new StringBuffer();
        if (m.find()) {
        	/**(.*?)*/
            final String match = m.group(1); 
            m.appendReplacement(buf, Matcher.quoteReplacement("<!--" + htmlSpecialChars(match) + "-->"));
        }
        m.appendTail(buf); 
        return buf.toString();
    } 
    private String balanceHTML(String s) {
        if (alwaysMakeTags) {
            //
            // try and form html
            //
            s = regexReplace(P_END_ARROW, "", s);
            s = regexReplace(P_BODY_TO_END, "<$1>", s);
            s = regexReplace(P_XML_CONTENT, "$1<$2", s);
 
        } else {
            //
            // escape stray brackets
            //
            s = regexReplace(P_STRAY_LEFT_ARROW, "&lt;$1", s);
            s = regexReplace(P_STRAY_RIGHT_ARROW, "$1$2&gt;<", s);
 
            //
            // the last regexp causes '<>' entities to appear
            // (we need to do a lookahead assertion so that the last bracket can
            // be used in the next pass of the regexp)
            //
            s = regexReplace(P_BOTH_ARROWS, "", s);
        } 
        return s;
    } 
    private String checkTags(String s) {
        Matcher m = P_TAGS.matcher(s);
 
        final StringBuffer buf = new StringBuffer();
        while (m.find()) {
            String replaceStr = m.group(1);
            replaceStr = processTag(replaceStr);
            m.appendReplacement(buf, Matcher.quoteReplacement(replaceStr));
        }
        m.appendTail(buf); 
        s = buf.toString(); 
        // these get tallied in processTag
        // (remember to reset before subsequent calls to filter method)
        for (String key : vTagCounts.keySet()) {
            for (int ii = 0; ii < vTagCounts.get(key); ii++) {
                s += "</" + key + ">";
            }
        } 
        return s;
    } 
    private String processRemoveBlanks(final String s) {
        String result = s;
        for (String tag : vRemoveBlanks) {
            if(!P_REMOVE_PAIR_BLANKS.containsKey(tag)){
                P_REMOVE_PAIR_BLANKS.putIfAbsent(tag, Pattern.compile("<" + tag + "(\\s[^>]*)?></" + tag + ">"));
            }
            result = regexReplace(P_REMOVE_PAIR_BLANKS.get(tag), "", result);
            if(!P_REMOVE_SELF_BLANKS.containsKey(tag)){
                P_REMOVE_SELF_BLANKS.putIfAbsent(tag, Pattern.compile("<" + tag + "(\\s[^>]*)?/>"));
            }
            result = regexReplace(P_REMOVE_SELF_BLANKS.get(tag), "", result);
        } 
        return result;
    } 
    private static String regexReplace(final Pattern regexPattern, final String replacement, final String s) {
        Matcher m = regexPattern.matcher(s);
        return m.replaceAll(replacement);
    } 
    private String processTag(final String s) {
        // ending tags
        Matcher m = P_END_TAG.matcher(s);
        if (m.find()) {
            final String name = m.group(1).toLowerCase();
            if (allowed(name)) {
                if (!inArray(name, vSelfClosingTags)) {
                    if (vTagCounts.containsKey(name)) {
                        vTagCounts.put(name, vTagCounts.get(name) - 1);
                        return "</" + name + ">";
                    }
                }
            }
        }
        // starting tags
        m = P_START_TAG.matcher(s);
        if (m.find()) {
            final String name = m.group(1).toLowerCase();
            final String body = m.group(2);
            String ending = m.group(3);
            //debug( "in a starting tag, name='" + name + "'; body='" + body + "'; ending='" + ending + "'" );
            if (allowed(name)) {
                String params = "";
                final Matcher m2 = P_QUOTED_ATTRIBUTES.matcher(body);
                final Matcher m3 = P_UNQUOTED_ATTRIBUTES.matcher(body);
                final List<String> paramNames = new ArrayList<String>();
                final List<String> paramValues = new ArrayList<String>();
                while (m2.find()) {
                	/**([a-z0-9]+)*/
                    paramNames.add(m2.group(1)); 
                    /**(.*?)*/
                    paramValues.add(m2.group(3)); 
                }
                while (m3.find()) {
                	/**([a-z0-9]+)*/
                    paramNames.add(m3.group(1));
                    /**([^\"\\s']+)*/
                    paramValues.add(m3.group(3)); 
                }
                String paramName, paramValue;
                for (int ii = 0; ii < paramNames.size(); ii++) {
                    paramName = paramNames.get(ii).toLowerCase();
                    paramValue = paramValues.get(ii);
                    if (allowedAttribute(name, paramName)) {
                        if (inArray(paramName, vProtocolAtts)) {
                            paramValue = processParamProtocol(paramValue);
                        }
                        params += " " + paramName + "=\"" + paramValue + "\"";
                    }
                }
                if (inArray(name, vSelfClosingTags)) {
                    ending = " /";
                }
                if (inArray(name, vNeedClosingTags)) {
                    ending = "";
                }
                if (ending == null || ending.length() < 1) {
                    if (vTagCounts.containsKey(name)) {
                        vTagCounts.put(name, vTagCounts.get(name) + 1);
                    } else {
                        vTagCounts.put(name, 1);
                    }
                } else {
                    ending = " /";
                }
                return "<" + name + params + ending + ">";
            } else {
                return "";
            }
        }
        // comments
        m = P_COMMENT.matcher(s);
        if (!stripComment && m.find()) {
            return  "<" + m.group() + ">";
        }
        return "";
    } 
    private String processParamProtocol(String s) {
        s = decodeEntities(s);
        final Matcher m = P_PROTOCOL.matcher(s);
        if (m.find()) {
            final String protocol = m.group(1);
            if (!inArray(protocol, vAllowedProtocols)) {
                // bad protocol, turn into local anchor link instead
                s = "#" + s.substring(protocol.length() + 1, s.length());
                if (s.startsWith(SS)) {
                    s = "#" + s.substring(3, s.length());
                }
            }
        } 
        return s;
    } 
    private String decodeEntities(String s) {
        StringBuffer buf = new StringBuffer();
 
        Matcher m = P_ENTITY.matcher(s);
        while (m.find()) {
            final String match = m.group(1);
            final int decimal = Integer.decode(match).intValue();
            m.appendReplacement(buf, Matcher.quoteReplacement(chr(decimal)));
        }
        m.appendTail(buf);
        s = buf.toString();
 
        buf = new StringBuffer();
        m = P_ENTITY_UNICODE.matcher(s);
        while (m.find()) {
            final String match = m.group(1);
            final int decimal = Integer.valueOf(match, 16).intValue();
            m.appendReplacement(buf, Matcher.quoteReplacement(chr(decimal)));
        }
        m.appendTail(buf);
        s = buf.toString();
 
        buf = new StringBuffer();
        m = P_ENCODE.matcher(s);
        while (m.find()) {
            final String match = m.group(1);
            final int decimal = Integer.valueOf(match, 16).intValue();
            m.appendReplacement(buf, Matcher.quoteReplacement(chr(decimal)));
        }
        m.appendTail(buf);
        s = buf.toString();
 
        s = validateEntities(s);
        return s;
    } 
    private String validateEntities(final String s) {
        StringBuffer buf = new StringBuffer();
 
        // validate entities throughout the string
        Matcher m = P_VALID_ENTITIES.matcher(s);
        while (m.find()) {
        	/**([^&;]*)*/
            final String one = m.group(1);
            /**(?=(;|&|$))*/
            final String two = m.group(2);
            m.appendReplacement(buf, Matcher.quoteReplacement(checkEntity(one, two)));
        }
        m.appendTail(buf); 
        return encodeQuotes(buf.toString());
    } 
    private String encodeQuotes(final String s){
        if(encodeQuotes){
            StringBuffer buf = new StringBuffer();
            Matcher m = P_VALID_QUOTES.matcher(s);
            while (m.find()) {
            	/**(>|^)*/
                final String one = m.group(1);
                /**([^<]+?)*/
                final String two = m.group(2); 
                /**(<|$)*/
                final String three = m.group(3);
                m.appendReplacement(buf, Matcher.quoteReplacement(one + regexReplace(P_QUOTE, "&quot;", two) + three));
            }
            m.appendTail(buf);
            return buf.toString();
        }else{
            return s;
        }
    } 
    private String checkEntity(final String preamble, final String term) {
 
        return ";".equals(term) && isValidEntity(preamble)
                ? '&' + preamble
                : "&amp;" + preamble;
    } 
    private boolean isValidEntity(final String entity) {
        return inArray(entity, vAllowedEntities);
    } 
    private static boolean inArray(final String s, final String[] array) {
        for (String item : array) {
            if (item != null && item.equals(s)) {
                return true;
            }
        }
        return false;
    } 
    private boolean allowed(final String name) {
        return (vAllowed.isEmpty() || vAllowed.containsKey(name)) && !inArray(name, vDisallowed);
    } 
    private boolean allowedAttribute(final String name, final String paramName) {
        return allowed(name) && (vAllowed.isEmpty() || vAllowed.get(name).contains(paramName));
    }
}

敏感詞過濾工具類

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.util.*; 
import org.springframework.core.io.ClassPathResource;
 
/**
 * Title:      SensitiveWordUtils
 * @author:    gaodeqiang
 * @date       2018年12月12日
 * @version    V1.0
 * Description: 敏感詞過濾
 */
@SuppressWarnings("all")
public class SensitiveWordUtils implements Serializable { 
	private static final long serialVersionUID = 1L; 
	private SensitiveWordUtils() {
	}
 
	private static SensitiveWordUtils sensitiveWordUtils = null;
 
	public synchronized static SensitiveWordUtils getSensitiveWordUtils() {
		if (sensitiveWordUtils == null) {
			sensitiveWordUtils = new SensitiveWordUtils();
		}
		return sensitiveWordUtils;
	} 
	private static Set<String> sensitiveWordSet = null; 
	static {
		try {
			readResource("keywords.txt");
		} catch (Exception e) {
			e.printStackTrace();
		}
		// 初始化敏感詞庫
		initSensitiveWordMap();
	} 
	private static final int MinMatchTYpe = 1; // 最小匹配規則，如：敏感詞庫["中國","中國人"]，語句："我是中國人"，匹配結果：我是[中國]人
	private static final int MaxMatchType = 2; // 最大匹配規則，如：敏感詞庫["中國","中國人"]，語句："我是中國人"，匹配結果：我是[中國人] 
	private static HashMap sensitiveWordMap = new HashMap<>();
 
	/**
	 * @author gaodeqiang
	 * @Description 讀取資源文件
	 * @date 2018年12月13日上午9:29:01
	 */
	private static void readResource(String name) throws Exception {
		ClassPathResource resource = new ClassPathResource(name);
		InputStream inputStream = resource.getInputStream();
		InputStreamReader read = new InputStreamReader(inputStream, "utf-8");
		// 初始化set集合
		sensitiveWordSet = new HashSet<String>();
		// 緩沖區讀取流
		BufferedReader bufferedReader = new BufferedReader(read);
		// 循環讀取文件中內容，每次讀取一行內容
		String txt = null;
		while ((txt = bufferedReader.readLine()) != null) {
			// 讀取文件，將文件內容放入到set中
			sensitiveWordSet.add(txt);
		}
		read.close();
	} 
	/**
	 * @author gaodeqiang
	 * @Description 初始化敏感詞庫，構建DFA算法模型
	 * @date 2018年12月13日上午9:30:08
	 */
	private static void initSensitiveWordMap() {
		// 初始化敏感詞容器，減少擴容操作
		sensitiveWordMap = new HashMap(sensitiveWordSet.size());
		String key;
		Map nowMap;
		Map<String, String> newWorMap;
		// 迭代sensitiveWordSet
		Iterator<String> iterator = sensitiveWordSet.iterator();
		while (iterator.hasNext()) {
			// 關鍵字
			key = iterator.next();
			nowMap = sensitiveWordMap;
			for (int i = 0; i < key.length(); i++) {
				// 轉換成char型
				char keyChar = key.charAt(i);
				// 庫中獲取關鍵字
				Object wordMap = nowMap.get(keyChar);
				// 如果存在該key，直接賦值，用於下一個循環獲取
				if (wordMap != null) {
					nowMap = (Map) wordMap;
				} else {
					// 不存在則，則構建一個map，同時將isEnd設置為0，因為他不是最後一個
					newWorMap = new HashMap<>();
					// 不是最後一個
					newWorMap.put("isEnd", "0");
					nowMap.put(keyChar, newWorMap);
					nowMap = newWorMap;
				}
 
				if (i == key.length() - 1) {
					// 最後一個
					nowMap.put("isEnd", "1");
				}
			}
		}
	} 
	/**
	 * @author gaodeqiang
	 * @Description 檢查文字中是否包含敏感字符
	 * @date 2018年12月13日上午9:38:35
	 */
	private int checkSensitiveWord(String txt, int beginIndex, int matchType) {
		// 敏感詞結束標識位：用於敏感詞隻有1位的情況
		boolean flag = false;
		// 匹配標識數默認為0
		int matchFlag = 0;
		char word;
		Map nowMap = sensitiveWordMap;
		for (int i = beginIndex; i < txt.length(); i++) {
			word = txt.charAt(i);
			// 獲取指定key
			nowMap = (Map) nowMap.get(word);
			if (nowMap != null) {// 存在，則判斷是否為最後一個
				// 找到相應key，匹配標識+1
				matchFlag++;
				// 如果為最後一個匹配規則,結束循環，返回匹配標識數
				if ("1".equals(nowMap.get("isEnd"))) {
					// 結束標志位為true
					flag = true;
					// 最小規則，直接返回,最大規則還需繼續查找
					if (MinMatchTYpe == matchType) {
						break;
					}
				}
			} else {// 不存在，直接返回
				break;
			}
		}
		if (matchFlag < 2 || !flag) {// 長度必須大於等於1，為詞
			matchFlag = 0;
		}
		return matchFlag;
	} 
	/**
	 * @author gaodeqiang
	 * @Description 判斷文字是否包含敏感字符 匹配規則 1：最小匹配規則，2：最大匹配規則
	 * @date 2018年12月13日上午9:34:23
	 */
	public boolean contains(String txt, int matchType) {
		boolean flag = false;
		for (int i = 0; i < txt.length(); i++) {
			int matchFlag = checkSensitiveWord(txt, i, matchType); // 判斷是否包含敏感字符
			if (matchFlag > 0) { // 大於0存在，返回true
				flag = true;
			}
		}
		return flag;
	} 
	/**
	 * @author gaodeqiang
	 * @Description 獲取文字中的敏感詞
	 * @date 2018年12月13日上午9:36:00
	 */
	public Set<String> getSensitiveWord(String txt, int matchType) {
		Set<String> sensitiveWordList = new HashSet<>();
 
		for (int i = 0; i < txt.length(); i++) {
			// 判斷是否包含敏感字符
			int length = checkSensitiveWord(txt, i, matchType);
			if (length > 0) {// 存在,加入list中
				sensitiveWordList.add(txt.substring(i, i + length));
				i = i + length - 1;// 減1的原因，是因為for會自增
			}
		} 
		return sensitiveWordList;
	} 
	/**
	 * @author gaodeqiang
	 * @Description 替換敏感字字符
	 * @date 2018年12月13日上午9:36:34
	 */
	public String replaceSensitiveWord(String txt, char replaceChar, int matchType) {
		String resultTxt = txt;
		// 獲取所有的敏感詞
		Set<String> set = getSensitiveWord(txt, matchType);
		Iterator<String> iterator = set.iterator();
		String word;
		String replaceString;
		while (iterator.hasNext()) {
			word = iterator.next();
			replaceString = getReplaceChars(replaceChar, word.length());
			resultTxt = resultTxt.replaceAll(word, replaceString);
		} 
		return resultTxt;
	}
 
	/**
	 * @author gaodeqiang
	 * @Description 獲取替換字符串
	 * @date 2018年12月13日上午9:37:53
	 */
	private String getReplaceChars(char replaceChar, int length) {
		String resultReplace = String.valueOf(replaceChar);
		for (int i = 1; i < length; i++) {
			resultReplace += replaceChar;
		}
		return resultReplace;
	}
}

以上為個人經驗，希望能給大傢一個參考，也希望大傢多多支持WalkonNet。

推薦閱讀：

發佈留言取消回覆