ISO639VocabularyUpdater.java

/*
 * Copyright 2018 Global Crop Diversity Trust
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.genesys.server.service.worker;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.genesys.server.model.vocab.ControlledVocabulary;
import org.genesys.server.model.vocab.VocabularyTerm;
import org.genesys.server.service.PartnerService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import java.io.IOException;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import java.util.function.Function;
import java.util.stream.Collectors;

/**
 * The Class ISO639VocabularyUpdater.
 *
 * @author Maxym Borodenko
 */
@Component
public class ISO639VocabularyUpdater {

	/**
	 * ISO 639 representation of names for languages and language groups.
	 * https://en.wikipedia.org/wiki/ISO_639
	 */
	public static final UUID ISO639_3 = UUID.fromString("21b10067-ba15-44dd-867f-6a18a117fee8");

	/** The Constant LOG. */
	public static final Log LOG = LogFactory.getLog(ISO639VocabularyUpdater.class);

	@Autowired
	private GeonamesISOLanguageSource isoLanguageSource;

	@Autowired
	private PartnerService partnerService;

	/**
	 * Generates a current ISO639-3 {@link ControlledVocabulary} but doesn't persist
	 * it to storage.
	 *
	 * @return vocabulary of ISO639-3 3-letter language codes
	 * @throws IOException IOException
	 */
	public ControlledVocabulary getISO639Vocabulary() throws IOException {
		return createVocabulary("ISO639-3", GeonamesISOLanguageSource.LanguageInfo::getCode);
	}

	/**
	 * Creates the vocabulary.
	 *
	 * @param title the title
	 * @param toTerm the to term
	 * @return the controlled vocabulary
	 * @throws IOException Signals that an I/O exception has occurred.
	 */
	protected ControlledVocabulary createVocabulary(final String title, final Function<GeonamesISOLanguageSource.LanguageInfo, String> toTerm) throws IOException {
		final ControlledVocabulary vocabulary = new ControlledVocabulary();
		vocabulary.setTitle(title);
		vocabulary.setUrl(GeonamesISOLanguageSource.GEONAMES_ISO639_URL);
		vocabulary.setOwner(partnerService.getPrimaryPartner());

		final DateTimeFormatter dtf = DateTimeFormatter.ofPattern("yyyy.MM.dd");
		final LocalDate localDate = LocalDate.now();
		vocabulary.setVersionTag(dtf.format(localDate));

		final Map<String, VocabularyTerm> assignedCodes = new HashMap<>();
		isoLanguageSource.fetchLanguageData().stream().map(language -> {
			final VocabularyTerm term = new VocabularyTerm();
			term.setCode(toTerm.apply(language));
			term.setTitle(language.getLanguage());
			return term;
		})
			// remove terms without codes
			.filter(term -> term.getCode() != null && term.getCode().length() > 0)
			// add to vocabularyTerms if ISO language code is not assigned
			.forEach(term -> {
				if (!assignedCodes.containsKey(term.getCode())) {
					assignedCodes.put(term.getCode(), term);
				}
			});

		vocabulary.setTerms(new ArrayList<>(assignedCodes.values().stream().sorted((a, b) -> a.getCode().compareTo(b.getCode())).collect(Collectors.toList())));
		return vocabulary;
	}
}