SitemapXMLController.java

/*
 * Copyright 2019 Global Crop Diversity Trust
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.genesys.server.mvc;

import java.io.IOException;
import java.io.Writer;
import java.time.Instant;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.TimeZone;

import javax.servlet.http.HttpServletResponse;

import org.genesys.blocks.model.filters.TemporalFilter;
import org.genesys.server.exception.NotFoundElement;
import org.genesys.server.model.Partner;
import org.genesys.server.model.dataset.Dataset;
import org.genesys.server.model.filters.DatasetFilter;
import org.genesys.server.model.filters.DescriptorListFilter;
import org.genesys.server.model.filters.PartnerFilter;
import org.genesys.server.model.genesys.Accession;
import org.genesys.server.model.impl.Crop;
import org.genesys.server.model.impl.FaoInstitute;
import org.genesys.server.model.impl.Subset;
import org.genesys.server.model.traits.DescriptorList;
import org.genesys.server.model.vocab.VocabularyTerm;
import org.genesys.server.service.ActivityPostService;
import org.genesys.server.service.CropService;
import org.genesys.server.service.DatasetService;
import org.genesys.server.service.DescriptorListService;
import org.genesys.server.service.GeoService;
import org.genesys.server.service.InstituteService;
import org.genesys.server.service.PartnerService;
import org.genesys.server.service.SubsetService;
import org.genesys.server.service.TaxonomyService;
import org.genesys.server.service.filter.AccessionFilter;
import org.genesys.server.service.filter.ActivityPostFilter;
import org.genesys.server.service.filter.SubsetFilter;
import org.genesys.server.exception.SearchException;
import org.genesys.server.service.worker.AccessionProcessor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Sort;
import org.springframework.http.MediaType;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;

import com.google.common.collect.Sets;

import static org.genesys.server.service.ActivityPostTranslationService.TranslatedActivityPost;

/**
 * http://www.sitemaps.org/protocol.html
 * 
 * @author Matija Obreza, matija.obreza@croptrust.org
 */
@Controller
public class SitemapXMLController {

	private static final Logger LOG = LoggerFactory.getLogger(SitemapXMLController.class);

	// https://www.w3.org/TR/NOTE-datetime: YYYY-MM-DDThh:mm:ssTZD
	private static ThreadLocal<DateTimeFormatter> w3cDatetime = ThreadLocal.withInitial(() -> {
		// YYYY-MM-DDThh:mm:ssTZD
		return DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ssXXX").withZone(TimeZone.getTimeZone("UTC").toZoneId());
	});

	public static enum Frequency {
		YEARLY,
		MONTHLY,
		WEEKLY,
		DAILY,
		HOURLY,
		ALWAYS
	}
	
	@Value("${frontend.url}")
	private String frontendUrl;

	@Autowired
	private TaxonomyService taxonomyService;

	@Autowired
	private GeoService geoService;

	@Autowired
	private InstituteService instituteService;

	@Autowired
	private CropService cropService;

	@Autowired
	private AccessionProcessor accessionProcessor;

	@Autowired
	private ActivityPostService activityPostService;

	@Autowired
	private PartnerService partnerService;

	@Autowired
	private SubsetService subsetService;

	@Autowired
	private DescriptorListService descriptorListService;

	@Autowired
	private DatasetService datasetService;

	@Autowired
	private JspHelper jspHelper;

	private static class SitemapPage {
		String url;
		Frequency freq;
		Double priority;
		Instant lastModified;

		public SitemapPage(String url) {
			this.url = url;
		}

		public SitemapPage(String url, double priority) {
			this.url = url;
			this.priority = priority;
		}

		public SitemapPage(String url, Frequency changeFrequency, double priority) {
			this.url = url;
			this.freq = changeFrequency;
			this.priority = priority;
		}

		public SitemapPage(String url, Instant lastModified) {
			this.url = url;
			this.lastModified = lastModified;
		}

	}

	private final static SitemapPage[] sitemaps = new SitemapPage[] {
			new SitemapPage("/sitemap-content.xml"),
			new SitemapPage("/sitemap-iso3166.xml"),
			new SitemapPage("/sitemap-wiews.xml"),
			new SitemapPage("/sitemap-crop.xml"),
			new SitemapPage("/sitemap-partner.xml"),
			new SitemapPage("/sitemap-subset.xml"),
			new SitemapPage("/sitemap-dataset.xml"),
			new SitemapPage("/sitemap-descriptorlist.xml"),
			new SitemapPage("/sitemap-genus.xml")
	};

	private final SitemapPage[] sitemapContentPages = new SitemapPage[] {
			new SitemapPage("/", Frequency.WEEKLY, 1.0),
			new SitemapPage("/content/news", Frequency.WEEKLY, 0.8),
			new SitemapPage("/content/about/about", Frequency.YEARLY, 0.7),
			new SitemapPage("/content/legal/terms", Frequency.YEARLY, 0.2),
			new SitemapPage("/content/legal/disclaimer", Frequency.YEARLY, 0.2),
			new SitemapPage("/documentation/apis", Frequency.YEARLY, 0.1),
			new SitemapPage("/documentation/brapi", Frequency.YEARLY, 0.1)
	};

	@RequestMapping(value = "/sitemap.xml", method = RequestMethod.GET)
	public void sitemapsXml(Writer writer, HttpServletResponse response) throws IOException {
		response.setContentType(MediaType.TEXT_XML_VALUE);
		writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
		writer.flush();
		writer.append("<sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");
		for (final SitemapPage page : sitemaps) {
			writer.append(" <sitemap>");
			writer.append("   <loc>").append(frontendUrl).append(response.encodeURL(page.url)).append("</loc>");
			writer.append(" </sitemap>");
		}
		writer.append("</sitemapindex>");
	}

	@RequestMapping(value = "/sitemap-genus.xml", method = RequestMethod.GET)
	public void sitemapGenusXml(Writer writer, HttpServletResponse response) throws IOException {
		response.setContentType(MediaType.TEXT_XML_VALUE);
		writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
		writer.flush();
		writer.append("<urlset xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");

		taxonomyService.getAllGenera().stream().filter(g -> g.matches("^[a-zA-Z]+$")).forEach(genus -> {
			try {
				writePage(writer, response, "/t/" + genus, Frequency.YEARLY, 0.1, null);
				writer.flush();
			} catch (IOException e) {
				LOG.warn(e.getMessage());
			}
		});

		writer.append("</urlset>");
		writer.flush();
	}

	@RequestMapping(value = "/sitemap-content.xml", method = RequestMethod.GET)
	public void sitemapContentXml(Writer writer, HttpServletResponse response) throws Exception {
		final List<SitemapPage> list = new ArrayList<>(Arrays.asList(sitemapContentPages));

		final Instant inst = Instant.now();
		
		var filter = (ActivityPostFilter) new ActivityPostFilter()
			.publishDate(new TemporalFilter<Instant>().lt(inst))
			.AND(
				(ActivityPostFilter) new ActivityPostFilter().NULL(Set.of("expirationDate"))
					.OR(new ActivityPostFilter().expirationDate(new TemporalFilter<Instant>().gt(inst)))
			);
		List<TranslatedActivityPost> activityPosts = activityPostService.listFiltered(filter, PageRequest.of(0, 20, Sort.Direction.DESC, "publishDate")).getContent();

		for (TranslatedActivityPost ap : activityPosts) {
			var title = ap.getTranslation() != null ? ap.getTranslation().getTitle() : ap.getEntity().getTitle();
			String slug = jspHelper.suggestUrlForText(title);
			SitemapPage sm = new SitemapPage("/content/news/" + ap.getEntity().getId() + "/" + slug, ap.getEntity().getLastModifiedDate());
			sm.priority = 0.7;
			sm.freq = Frequency.YEARLY;
			list.add(sm);
		}

		writeSitemap(writer, response, list.toArray(new SitemapPage[list.size()]));
	}

	@RequestMapping(value = "/sitemap-iso3166.xml", method = RequestMethod.GET)
	public void sitemapGeoXml(Writer writer, HttpServletResponse response) throws IOException {
		response.setContentType(MediaType.TEXT_XML_VALUE);
		writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
		writer.flush();
		writer.append("<urlset xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");

		List<VocabularyTerm> vt = geoService.list3166Alpha3Terms(PageRequest.of(0, Integer.MAX_VALUE, Sort.Direction.ASC, "code3")).getContent();
		for (VocabularyTerm term : vt) {
			writePage(writer, response, "/iso3166/" + term.getCode().toUpperCase(), Frequency.YEARLY, 0.4, null);
		}

		writer.append("</urlset>");
		writer.flush();
	}

	@RequestMapping(value = "/sitemap-partner.xml", method = RequestMethod.GET)
	public void sitemapPartnerXml(Writer writer, HttpServletResponse response) throws IOException, SearchException {
		response.setContentType(MediaType.TEXT_XML_VALUE);
		writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
		writer.flush();
		writer.append("<urlset xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");

		writePage(writer, response, "/partners", Frequency.MONTHLY, 0.7, null);

		List<Partner> partners = partnerService.list(new PartnerFilter(), PageRequest.of(0, Integer.MAX_VALUE, Sort.Direction.ASC, "id")).getContent();
		for (Partner partner : partners) {
			writePage(writer, response, "/partners/" + partner.getUuid(), Frequency.YEARLY, 0.6, partner.getLastModifiedDate());
		}

		writer.append("</urlset>");
		writer.flush();
	}

	@RequestMapping(value = "/sitemap-subset.xml", method = RequestMethod.GET)
	public void sitemapSubsetXml(Writer writer, HttpServletResponse response) throws IOException {
		response.setContentType(MediaType.TEXT_XML_VALUE);
		writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
		writer.flush();
		writer.append("<urlset xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");

		List<Subset> subsets = null;
		try {
			subsets = subsetService.list(new SubsetFilter(), PageRequest.of(0, Integer.MAX_VALUE, Sort.Direction.ASC, "id")).getContent();
		} catch (SearchException e) {
			LOG.error("Error occurred during search", e);
			subsets = Collections.emptyList();
		}
		for (Subset subset : subsets) {
			writePage(writer, response, "/subsets/" + subset.getUuid(), Frequency.YEARLY, 0.5, subset.getLastModifiedDate());
		}

		writer.append("</urlset>");
		writer.flush();
	}

	@RequestMapping(value = "/sitemap-dataset.xml", method = RequestMethod.GET)
	public void sitemapDatasetXml(Writer writer, HttpServletResponse response) throws IOException {
		response.setContentType(MediaType.TEXT_XML_VALUE);
		writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
		writer.flush();
		writer.append("<urlset xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");

		List<Dataset> datasets;
		try {
			datasets = datasetService.list(new DatasetFilter(), PageRequest.of(0, Integer.MAX_VALUE, Sort.Direction.ASC, "id")).getContent();
		} catch (SearchException e) {
			LOG.error("Error occurred during search", e);
			datasets = Collections.emptyList();
		}
		for (Dataset dataset : datasets) {
			writePage(writer, response, "/datasets/" + dataset.getUuid(), Frequency.YEARLY, 0.5, dataset.getLastModifiedDate());
		}

		writer.append("</urlset>");
		writer.flush();
	}

	@RequestMapping(value = "/sitemap-descriptorlist.xml", method = RequestMethod.GET)
	public void sitemapDescriptorListXml(Writer writer, HttpServletResponse response) throws IOException {
		response.setContentType(MediaType.TEXT_XML_VALUE);
		writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
		writer.flush();
		writer.append("<urlset xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");

		List<DescriptorList> descriptorLists = null;
		try {
			descriptorLists = descriptorListService.list(new DescriptorListFilter(), PageRequest.of(0, Integer.MAX_VALUE, Sort.Direction.ASC, "id")).getContent();
		} catch (SearchException e) {
			LOG.error("Error occurred during search", e);
			descriptorLists = Collections.emptyList();
		}
		for (DescriptorList descriptorList : descriptorLists) {
			writePage(writer, response, "/descriptorlists/" + descriptorList.getUuid(), Frequency.YEARLY, 0.5, descriptorList.getLastModifiedDate());
		}

		writer.append("</urlset>");
		writer.flush();
	}

	@RequestMapping(value = "/sitemap-wiews.xml", method = RequestMethod.GET)
	public void sitemapWiewsXml(Writer writer, HttpServletResponse response) throws IOException {
		response.setContentType(MediaType.TEXT_XML_VALUE);
		writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
		writer.flush();
		writer.append("<sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");
		for (FaoInstitute institute : instituteService.listActive(PageRequest.of(0, Integer.MAX_VALUE))) {
			writer.append(" <sitemap>");
			writer.append("   <loc>").append(frontendUrl).append("/sitemap-").append(institute.getCode().toUpperCase()).append(".xml").append("</loc>");
			writer.append(" </sitemap>");
		}
		writer.append("</sitemapindex>");
		writer.flush();
	}

	@RequestMapping(value = "/sitemap-{instCode}.xml", method = RequestMethod.GET)
	public void sitemapWiewAndAccessionsXml(@PathVariable(value = "instCode") String instCode, Writer writer, HttpServletResponse response) throws IOException {
		FaoInstitute faoInstitute = instituteService.getInstitute(instCode);
		if (faoInstitute == null) {
			throw new NotFoundElement("No Institute with specified code.");
		}

		response.setContentType(MediaType.TEXT_XML_VALUE);
		writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
		writer.flush();
		writer.append("<urlset xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");

		writePage(writer, response, "/wiews/" + faoInstitute.getCode(), Frequency.YEARLY, 0.6, null);

		// Create JSON filter
		AccessionFilter instituteFilter = new AccessionFilter();
		instituteFilter.holder().code(Sets.newHashSet(faoInstitute.getCode()));

		try {
			accessionProcessor.process(instituteFilter, (accessions) -> {
				for (Accession accession: accessions) {
					if (accession.getDoi() != null) {
						writePage(writer, response, "/" + accession.getDoi(), Frequency.YEARLY, 0.2, accession.getLastModifiedDate());
					} else {
						writePage(writer, response, "/a/" + accession.getUuid(), Frequency.YEARLY, 0.2, accession.getLastModifiedDate());
					}
				}
				writer.flush();
			});
		} catch (Exception e) {
			LOG.warn("Stopped writing accessions sitemap: {}", e.getMessage());
		}

		writer.append("</urlset>");
		writer.flush();
	}

	@RequestMapping(value = "/sitemap-crop.xml", method = RequestMethod.GET)
	public void sitemapCropXml(Writer writer, HttpServletResponse response) throws IOException {
		response.setContentType(MediaType.TEXT_XML_VALUE);
		writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
		writer.flush();
		writer.append("<urlset xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");

		writePage(writer, response, "/c", Frequency.MONTHLY, 0.6, null);

		for (Crop crop : cropService.listCrops()) {
			writePage(writer, response, "/c/" + crop.getShortName(), Frequency.MONTHLY, 0.5, null);
		}

		writer.append("</urlset>");
		writer.flush();
		response.flushBuffer();
	}

	private void writeSitemap(Writer writer, HttpServletResponse response, SitemapPage[] pages) throws IOException {
		response.setContentType(MediaType.TEXT_XML_VALUE);
		writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
		writer.flush();
		writer.append("<urlset xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");
		for (final SitemapPage page : pages) {
			writePage(writer, response, page.url, page.freq, page.priority, page.lastModified);
		}
		writer.append("</urlset>");
		writer.flush();
	}

	private void writePage(final Writer writer, HttpServletResponse response, final String url, final Frequency frequency, final Double priority, final Instant lastModified) throws IOException {
		writer.append(" <url>");
		writer.append(" <loc>").append(frontendUrl).append(response.encodeURL(url)).append("</loc>");
		if (frequency != null) {
			writer.append(" <changefreq>").append(frequency.toString().toLowerCase()).append("</changefreq>");
		}
		if (priority != null) {
			writer.append(" <priority>").append(priority.toString()).append("</priority>");
		}
		if (lastModified != null) {
			writer.append(" <lastmod>").append(w3cDatetime.get().format(lastModified)).append("</lastmod>");
		}
		writer.append(" </url>");
	}
}