GeonamesISOLanguageSource.java
/*
* Copyright 2018 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.genesys.server.service.worker;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
/**
* The Class GeonamesISOLanguageSource.
*
* @author Maxym Borodenko
*/
@Component("genonamesVocabularySource")
public class GeonamesISOLanguageSource {
public static final String GEONAMES_ISO639_URL = "http://download.geonames.org/export/dump/iso-languagecodes.txt";
/** The Constant LOG. */
public static final Logger LOG = LoggerFactory.getLogger(GeonamesISOLanguageSource.class);
/**
* Retrieve data from geonames.org
*
* @return List with LanguageInfo
* @throws IOException IOException
*/
public List<LanguageInfo> fetchLanguageData() throws IOException {
final CloseableHttpClient httpclient = HttpClientBuilder.create().build();
final HttpGet httpget = new HttpGet(GEONAMES_ISO639_URL);
HttpResponse response = null;
InputStream instream = null;
try {
response = httpclient.execute(httpget);
LOG.debug("HTTP Response status: {}", response.getStatusLine());
// Get hold of the response entity
final HttpEntity entity = response.getEntity();
LOG.debug(entity.getContentType() + " " + entity.getContentLength());
instream = entity.getContent();
final BufferedReader inreader = new BufferedReader(new InputStreamReader(instream));
final List<LanguageInfo> languages = new ArrayList<>();
String line;
while ((line = inreader.readLine()) != null) {
if (LOG.isTraceEnabled()) {
LOG.trace(line);
}
if (line.startsWith("ISO 639-3")) {
continue;
} else {
final LanguageInfo languageInfo = parseLine(line);
if (!languages.contains(languageInfo)) {
languages.add(languageInfo);
}
}
}
inreader.close();
LOG.info("Returning {} languages data from geonames.org", languages.size());
return languages.stream().sorted((a, b) -> a.code.compareTo(b.code)).collect(Collectors.toList());
} catch (final ClientProtocolException e) {
LOG.error(e.getMessage(), e);
throw new IOException("Could not execute HTTP request: " + e.getMessage(), e);
} catch (final RuntimeException ex) {
LOG.error(ex.getMessage(), ex);
httpget.abort();
throw new IOException(ex);
} finally {
LOG.info("Done fetching languages info from geonames.org");
if (instream != null) {
instream.close();
}
httpclient.close();
}
}
/**
* @param line line
* @return parsed LanguageInfo
*/
public static LanguageInfo parseLine(final String line) {
final String[] values = line.split("\t");
final String code = values[0];
final String lang = values[3];
if (LOG.isTraceEnabled()) {
LOG.trace("Language code={} name={}", code, lang);
}
return new LanguageInfo(code, lang);
}
/**
* The Class LanguageInfo.
*/
public static class LanguageInfo {
private final String code;
private final String language;
/**
* Instantiates a new language info.
*
* @param code the ISO639-3 code
* @param lang the language
*/
public LanguageInfo(final String code, final String lang) {
this.code = code;
this.language = lang;
}
/**
* Gets the ISO639-3 code.
*
* @return the ISO639-3 code
*/
public String getCode() {
return code;
}
/**
* Gets the language.
*
* @return the language
*/
public String getLanguage() {
return language;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
sb.append("code=").append(code).append(" language=").append(language);
return sb.toString();
}
}
}