OWASPSanitizer.java

/*
 * Copyright 2019 Global Crop Diversity Trust
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.genesys.server.service.impl;

import java.util.regex.Pattern;

import org.genesys.server.service.HtmlSanitizer;
import org.owasp.html.HtmlPolicyBuilder;
import org.owasp.html.PolicyFactory;
import org.springframework.stereotype.Service;

/**
 * HTML sanitizer using owasp-java-html-sanitizer.
 *
 * @author mobreza
 */
@Service
public class OWASPSanitizer implements HtmlSanitizer {

	private final PolicyFactory POLICY = new HtmlPolicyBuilder()
			.allowStandardUrlProtocols()
			// Allow title attr
			.allowAttributes("title")
			.globally()
			// Href on links
			.allowAttributes("href", "target")
			.onElements("a")
			// Defeat link spammers.
			.requireRelNofollowOnLinks()
			// Allow lang= with an alphabetic value on any element.
			.allowAttributes("lang")
			.matching(Pattern.compile("[a-zA-Z]{2,20}"))
			.globally()
			// Allow class= with an alphabetic value on div and span elements.
			.allowAttributes("class")
			.matching(Pattern.compile("[a-zA-Z\\- ]{2,50}"))
			.onElements("div", "span")
			// Align
			.allowAttributes("align")
			.matching(true, "center", "left", "right", "justify", "char")
			.onElements("p", "table")
			// Iframe attributes
			.allowAttributes("width", "height", "frameborder", "webkitallowfullscreen", "mozallowfullscreen", "allowfullscreen")
			.onElements("iframe")
			// Iframe sources: vimeo and youtube 
			.allowAttributes("src")
			.matching(Pattern.compile("^((https:)?//player\\.vimeo\\.com/|(https:)?//www\\.youtube\\.com/).+"))
			.onElements("iframe")
			// Images
			.allowAttributes("src", "alt", "style", "width", "height", "srcset", "sizes")
			.onElements("img")
			
			// Elements
			.allowElements("table", "thead", "tbody", "tr", "td", "th", "tfoot", "a", "p", "div", "i", "b", "em", "blockquote", "tt", "strong", "br", "ul",
					"ol", "li", "h1", "h2", "h3", "h4", "small", "pre", "code", "iframe", "img")

			// Get factory
			.toFactory();

	@Override
	public String sanitize(String html) {
		return html == null ? null : POLICY.sanitize(html);
	};
}