SitemapXMLController.java
/*
* Copyright 2019 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.genesys.server.mvc;
import java.io.IOException;
import java.io.Writer;
import java.time.Instant;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.TimeZone;
import javax.servlet.http.HttpServletResponse;
import org.genesys.blocks.model.filters.TemporalFilter;
import org.genesys.server.exception.NotFoundElement;
import org.genesys.server.model.Partner;
import org.genesys.server.model.dataset.Dataset;
import org.genesys.server.model.filters.DatasetFilter;
import org.genesys.server.model.filters.DescriptorListFilter;
import org.genesys.server.model.filters.PartnerFilter;
import org.genesys.server.model.genesys.Accession;
import org.genesys.server.model.impl.Crop;
import org.genesys.server.model.impl.FaoInstitute;
import org.genesys.server.model.impl.Subset;
import org.genesys.server.model.traits.DescriptorList;
import org.genesys.server.model.vocab.VocabularyTerm;
import org.genesys.server.service.ActivityPostService;
import org.genesys.server.service.CropService;
import org.genesys.server.service.DatasetService;
import org.genesys.server.service.DescriptorListService;
import org.genesys.server.service.GeoService;
import org.genesys.server.service.InstituteService;
import org.genesys.server.service.PartnerService;
import org.genesys.server.service.SubsetService;
import org.genesys.server.service.TaxonomyService;
import org.genesys.server.service.filter.AccessionFilter;
import org.genesys.server.service.filter.ActivityPostFilter;
import org.genesys.server.service.filter.SubsetFilter;
import org.genesys.server.exception.SearchException;
import org.genesys.server.service.worker.AccessionProcessor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Sort;
import org.springframework.http.MediaType;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import com.google.common.collect.Sets;
import static org.genesys.server.service.ActivityPostTranslationService.TranslatedActivityPost;
/**
* http://www.sitemaps.org/protocol.html
*
* @author Matija Obreza, matija.obreza@croptrust.org
*/
@Controller
public class SitemapXMLController {
private static final Logger LOG = LoggerFactory.getLogger(SitemapXMLController.class);
// https://www.w3.org/TR/NOTE-datetime: YYYY-MM-DDThh:mm:ssTZD
private static ThreadLocal<DateTimeFormatter> w3cDatetime = ThreadLocal.withInitial(() -> {
// YYYY-MM-DDThh:mm:ssTZD
return DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ssXXX").withZone(TimeZone.getTimeZone("UTC").toZoneId());
});
public static enum Frequency {
YEARLY,
MONTHLY,
WEEKLY,
DAILY,
HOURLY,
ALWAYS
}
@Value("${frontend.url}")
private String frontendUrl;
@Autowired
private TaxonomyService taxonomyService;
@Autowired
private GeoService geoService;
@Autowired
private InstituteService instituteService;
@Autowired
private CropService cropService;
@Autowired
private AccessionProcessor accessionProcessor;
@Autowired
private ActivityPostService activityPostService;
@Autowired
private PartnerService partnerService;
@Autowired
private SubsetService subsetService;
@Autowired
private DescriptorListService descriptorListService;
@Autowired
private DatasetService datasetService;
@Autowired
private JspHelper jspHelper;
private static class SitemapPage {
String url;
Frequency freq;
Double priority;
Instant lastModified;
public SitemapPage(String url) {
this.url = url;
}
public SitemapPage(String url, double priority) {
this.url = url;
this.priority = priority;
}
public SitemapPage(String url, Frequency changeFrequency, double priority) {
this.url = url;
this.freq = changeFrequency;
this.priority = priority;
}
public SitemapPage(String url, Instant lastModified) {
this.url = url;
this.lastModified = lastModified;
}
}
private final static SitemapPage[] sitemaps = new SitemapPage[] {
new SitemapPage("/sitemap-content.xml"),
new SitemapPage("/sitemap-iso3166.xml"),
new SitemapPage("/sitemap-wiews.xml"),
new SitemapPage("/sitemap-crop.xml"),
new SitemapPage("/sitemap-partner.xml"),
new SitemapPage("/sitemap-subset.xml"),
new SitemapPage("/sitemap-dataset.xml"),
new SitemapPage("/sitemap-descriptorlist.xml"),
new SitemapPage("/sitemap-genus.xml")
};
private final SitemapPage[] sitemapContentPages = new SitemapPage[] {
new SitemapPage("/", Frequency.WEEKLY, 1.0),
new SitemapPage("/content/news", Frequency.WEEKLY, 0.8),
new SitemapPage("/content/about/about", Frequency.YEARLY, 0.7),
new SitemapPage("/content/legal/terms", Frequency.YEARLY, 0.2),
new SitemapPage("/content/legal/disclaimer", Frequency.YEARLY, 0.2),
new SitemapPage("/documentation/apis", Frequency.YEARLY, 0.1),
new SitemapPage("/documentation/brapi", Frequency.YEARLY, 0.1)
};
@RequestMapping(value = "/sitemap.xml", method = RequestMethod.GET)
public void sitemapsXml(Writer writer, HttpServletResponse response) throws IOException {
response.setContentType(MediaType.TEXT_XML_VALUE);
writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
writer.flush();
writer.append("<sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");
for (final SitemapPage page : sitemaps) {
writer.append(" <sitemap>");
writer.append(" <loc>").append(frontendUrl).append(response.encodeURL(page.url)).append("</loc>");
writer.append(" </sitemap>");
}
writer.append("</sitemapindex>");
}
@RequestMapping(value = "/sitemap-genus.xml", method = RequestMethod.GET)
public void sitemapGenusXml(Writer writer, HttpServletResponse response) throws IOException {
response.setContentType(MediaType.TEXT_XML_VALUE);
writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
writer.flush();
writer.append("<urlset xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");
taxonomyService.getAllGenera().stream().filter(g -> g.matches("^[a-zA-Z]+$")).forEach(genus -> {
try {
writePage(writer, response, "/t/" + genus, Frequency.YEARLY, 0.1, null);
writer.flush();
} catch (IOException e) {
LOG.warn(e.getMessage());
}
});
writer.append("</urlset>");
writer.flush();
}
@RequestMapping(value = "/sitemap-content.xml", method = RequestMethod.GET)
public void sitemapContentXml(Writer writer, HttpServletResponse response) throws Exception {
final List<SitemapPage> list = new ArrayList<>(Arrays.asList(sitemapContentPages));
final Instant inst = Instant.now();
var filter = (ActivityPostFilter) new ActivityPostFilter()
.publishDate(new TemporalFilter<Instant>().lt(inst))
.AND(
(ActivityPostFilter) new ActivityPostFilter().NULL(Set.of("expirationDate"))
.OR(new ActivityPostFilter().expirationDate(new TemporalFilter<Instant>().gt(inst)))
);
List<TranslatedActivityPost> activityPosts = activityPostService.listFiltered(filter, PageRequest.of(0, 20, Sort.Direction.DESC, "publishDate")).getContent();
for (TranslatedActivityPost ap : activityPosts) {
var title = ap.getTranslation() != null ? ap.getTranslation().getTitle() : ap.getEntity().getTitle();
String slug = jspHelper.suggestUrlForText(title);
SitemapPage sm = new SitemapPage("/content/news/" + ap.getEntity().getId() + "/" + slug, ap.getEntity().getLastModifiedDate());
sm.priority = 0.7;
sm.freq = Frequency.YEARLY;
list.add(sm);
}
writeSitemap(writer, response, list.toArray(new SitemapPage[list.size()]));
}
@RequestMapping(value = "/sitemap-iso3166.xml", method = RequestMethod.GET)
public void sitemapGeoXml(Writer writer, HttpServletResponse response) throws IOException {
response.setContentType(MediaType.TEXT_XML_VALUE);
writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
writer.flush();
writer.append("<urlset xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");
List<VocabularyTerm> vt = geoService.list3166Alpha3Terms(PageRequest.of(0, Integer.MAX_VALUE, Sort.Direction.ASC, "code3")).getContent();
for (VocabularyTerm term : vt) {
writePage(writer, response, "/iso3166/" + term.getCode().toUpperCase(), Frequency.YEARLY, 0.4, null);
}
writer.append("</urlset>");
writer.flush();
}
@RequestMapping(value = "/sitemap-partner.xml", method = RequestMethod.GET)
public void sitemapPartnerXml(Writer writer, HttpServletResponse response) throws IOException, SearchException {
response.setContentType(MediaType.TEXT_XML_VALUE);
writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
writer.flush();
writer.append("<urlset xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");
writePage(writer, response, "/partners", Frequency.MONTHLY, 0.7, null);
List<Partner> partners = partnerService.list(new PartnerFilter(), PageRequest.of(0, Integer.MAX_VALUE, Sort.Direction.ASC, "id")).getContent();
for (Partner partner : partners) {
writePage(writer, response, "/partners/" + partner.getUuid(), Frequency.YEARLY, 0.6, partner.getLastModifiedDate());
}
writer.append("</urlset>");
writer.flush();
}
@RequestMapping(value = "/sitemap-subset.xml", method = RequestMethod.GET)
public void sitemapSubsetXml(Writer writer, HttpServletResponse response) throws IOException {
response.setContentType(MediaType.TEXT_XML_VALUE);
writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
writer.flush();
writer.append("<urlset xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");
List<Subset> subsets = null;
try {
subsets = subsetService.list(new SubsetFilter(), PageRequest.of(0, Integer.MAX_VALUE, Sort.Direction.ASC, "id")).getContent();
} catch (SearchException e) {
LOG.error("Error occurred during search", e);
subsets = Collections.emptyList();
}
for (Subset subset : subsets) {
writePage(writer, response, "/subsets/" + subset.getUuid(), Frequency.YEARLY, 0.5, subset.getLastModifiedDate());
}
writer.append("</urlset>");
writer.flush();
}
@RequestMapping(value = "/sitemap-dataset.xml", method = RequestMethod.GET)
public void sitemapDatasetXml(Writer writer, HttpServletResponse response) throws IOException {
response.setContentType(MediaType.TEXT_XML_VALUE);
writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
writer.flush();
writer.append("<urlset xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");
List<Dataset> datasets;
try {
datasets = datasetService.list(new DatasetFilter(), PageRequest.of(0, Integer.MAX_VALUE, Sort.Direction.ASC, "id")).getContent();
} catch (SearchException e) {
LOG.error("Error occurred during search", e);
datasets = Collections.emptyList();
}
for (Dataset dataset : datasets) {
writePage(writer, response, "/datasets/" + dataset.getUuid(), Frequency.YEARLY, 0.5, dataset.getLastModifiedDate());
}
writer.append("</urlset>");
writer.flush();
}
@RequestMapping(value = "/sitemap-descriptorlist.xml", method = RequestMethod.GET)
public void sitemapDescriptorListXml(Writer writer, HttpServletResponse response) throws IOException {
response.setContentType(MediaType.TEXT_XML_VALUE);
writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
writer.flush();
writer.append("<urlset xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");
List<DescriptorList> descriptorLists = null;
try {
descriptorLists = descriptorListService.list(new DescriptorListFilter(), PageRequest.of(0, Integer.MAX_VALUE, Sort.Direction.ASC, "id")).getContent();
} catch (SearchException e) {
LOG.error("Error occurred during search", e);
descriptorLists = Collections.emptyList();
}
for (DescriptorList descriptorList : descriptorLists) {
writePage(writer, response, "/descriptorlists/" + descriptorList.getUuid(), Frequency.YEARLY, 0.5, descriptorList.getLastModifiedDate());
}
writer.append("</urlset>");
writer.flush();
}
@RequestMapping(value = "/sitemap-wiews.xml", method = RequestMethod.GET)
public void sitemapWiewsXml(Writer writer, HttpServletResponse response) throws IOException {
response.setContentType(MediaType.TEXT_XML_VALUE);
writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
writer.flush();
writer.append("<sitemapindex xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");
for (FaoInstitute institute : instituteService.listActive(PageRequest.of(0, Integer.MAX_VALUE))) {
writer.append(" <sitemap>");
writer.append(" <loc>").append(frontendUrl).append("/sitemap-").append(institute.getCode().toUpperCase()).append(".xml").append("</loc>");
writer.append(" </sitemap>");
}
writer.append("</sitemapindex>");
writer.flush();
}
@RequestMapping(value = "/sitemap-{instCode}.xml", method = RequestMethod.GET)
public void sitemapWiewAndAccessionsXml(@PathVariable(value = "instCode") String instCode, Writer writer, HttpServletResponse response) throws IOException {
FaoInstitute faoInstitute = instituteService.getInstitute(instCode);
if (faoInstitute == null) {
throw new NotFoundElement("No Institute with specified code.");
}
response.setContentType(MediaType.TEXT_XML_VALUE);
writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
writer.flush();
writer.append("<urlset xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");
writePage(writer, response, "/wiews/" + faoInstitute.getCode(), Frequency.YEARLY, 0.6, null);
// Create JSON filter
AccessionFilter instituteFilter = new AccessionFilter();
instituteFilter.holder().code(Sets.newHashSet(faoInstitute.getCode()));
try {
accessionProcessor.process(instituteFilter, (accessions) -> {
for (Accession accession: accessions) {
if (accession.getDoi() != null) {
writePage(writer, response, "/" + accession.getDoi(), Frequency.YEARLY, 0.2, accession.getLastModifiedDate());
} else {
writePage(writer, response, "/a/" + accession.getUuid(), Frequency.YEARLY, 0.2, accession.getLastModifiedDate());
}
}
writer.flush();
});
} catch (Exception e) {
LOG.warn("Stopped writing accessions sitemap: {}", e.getMessage());
}
writer.append("</urlset>");
writer.flush();
}
@RequestMapping(value = "/sitemap-crop.xml", method = RequestMethod.GET)
public void sitemapCropXml(Writer writer, HttpServletResponse response) throws IOException {
response.setContentType(MediaType.TEXT_XML_VALUE);
writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
writer.flush();
writer.append("<urlset xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");
writePage(writer, response, "/c", Frequency.MONTHLY, 0.6, null);
for (Crop crop : cropService.listCrops()) {
writePage(writer, response, "/c/" + crop.getShortName(), Frequency.MONTHLY, 0.5, null);
}
writer.append("</urlset>");
writer.flush();
response.flushBuffer();
}
private void writeSitemap(Writer writer, HttpServletResponse response, SitemapPage[] pages) throws IOException {
response.setContentType(MediaType.TEXT_XML_VALUE);
writer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
writer.flush();
writer.append("<urlset xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");
for (final SitemapPage page : pages) {
writePage(writer, response, page.url, page.freq, page.priority, page.lastModified);
}
writer.append("</urlset>");
writer.flush();
}
private void writePage(final Writer writer, HttpServletResponse response, final String url, final Frequency frequency, final Double priority, final Instant lastModified) throws IOException {
writer.append(" <url>");
writer.append(" <loc>").append(frontendUrl).append(response.encodeURL(url)).append("</loc>");
if (frequency != null) {
writer.append(" <changefreq>").append(frequency.toString().toLowerCase()).append("</changefreq>");
}
if (priority != null) {
writer.append(" <priority>").append(priority.toString()).append("</priority>");
}
if (lastModified != null) {
writer.append(" <lastmod>").append(w3cDatetime.get().format(lastModified)).append("</lastmod>");
}
writer.append(" </url>");
}
}