GeonamesISOLanguageSource.java

/*
 * Copyright 2018 Global Crop Diversity Trust
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.genesys.server.service.worker;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;

/**
 * The Class GeonamesISOLanguageSource.
 *
 * @author Maxym Borodenko
 */
@Component("genonamesVocabularySource")
public class GeonamesISOLanguageSource {
	public static final String GEONAMES_ISO639_URL = "http://download.geonames.org/export/dump/iso-languagecodes.txt";

	/** The Constant LOG. */
	public static final Logger LOG = LoggerFactory.getLogger(GeonamesISOLanguageSource.class);

	/**
	 * Retrieve data from geonames.org
	 *
	 * @return List with LanguageInfo
	 * @throws IOException IOException
	 */
	public List<LanguageInfo> fetchLanguageData() throws IOException {

		final CloseableHttpClient httpclient = HttpClientBuilder.create().build();
		final HttpGet httpget = new HttpGet(GEONAMES_ISO639_URL);

		HttpResponse response = null;
		InputStream instream = null;

		try {
			response = httpclient.execute(httpget);

			LOG.debug("HTTP Response status: {}", response.getStatusLine());

			// Get hold of the response entity
			final HttpEntity entity = response.getEntity();
			LOG.debug(entity.getContentType() + " " + entity.getContentLength());

			instream = entity.getContent();
			final BufferedReader inreader = new BufferedReader(new InputStreamReader(instream));

			final List<LanguageInfo> languages = new ArrayList<>();

			String line;
			while ((line = inreader.readLine()) != null) {
				if (LOG.isTraceEnabled()) {
					LOG.trace(line);
				}
				if (line.startsWith("ISO 639-3")) {
					continue;
				} else {
					final LanguageInfo languageInfo = parseLine(line);
					if (!languages.contains(languageInfo)) {
						languages.add(languageInfo);
					}
				}
			}

			inreader.close();
			LOG.info("Returning {} languages data from geonames.org", languages.size());
			return languages.stream().sorted((a, b) -> a.code.compareTo(b.code)).collect(Collectors.toList());

		} catch (final ClientProtocolException e) {
			LOG.error(e.getMessage(), e);
			throw new IOException("Could not execute HTTP request: " + e.getMessage(), e);
		} catch (final RuntimeException ex) {
			LOG.error(ex.getMessage(), ex);
			httpget.abort();
			throw new IOException(ex);
		} finally {
			LOG.info("Done fetching languages info from geonames.org");
			if (instream != null) {
				instream.close();
			}
			httpclient.close();
		}
	}

	/**
	 * @param line line
	 * @return parsed LanguageInfo
	 */
	public static LanguageInfo parseLine(final String line) {
		final String[] values = line.split("\t");
		final String code = values[0];
		final String lang = values[3];
		if (LOG.isTraceEnabled()) {
			LOG.trace("Language code={} name={}", code, lang);
		}
		return new LanguageInfo(code, lang);
	}

	/**
	 * The Class LanguageInfo.
	 */
	public static class LanguageInfo {
		private final String code;
		private final String language;

		/**
		 * Instantiates a new language info.
		 *
		 * @param code the ISO639-3 code
		 * @param lang the language
		 */
		public LanguageInfo(final String code, final String lang) {
			this.code = code;
			this.language = lang;
		}

		/**
		 * Gets the ISO639-3 code.
		 *
		 * @return the ISO639-3 code
		 */
		public String getCode() {
			return code;
		}

		/**
		 * Gets the language.
		 *
		 * @return the language
		 */
		public String getLanguage() {
			return language;
		}

		@Override
		public String toString() {
			final StringBuilder sb = new StringBuilder();
			sb.append("code=").append(code).append(" language=").append(language);
			return sb.toString();
		}
	}
}