UsdaTaxonomyUpdater.java
- /*
- * Copyright 2020 Global Crop Diversity Trust
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.genesys.server.service.worker;
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.IOException;
- import java.nio.file.Files;
- import java.nio.file.attribute.BasicFileAttributes;
- import java.nio.file.attribute.FileTime;
- import java.time.Instant;
- import java.time.ZoneOffset;
- import java.time.temporal.ChronoUnit;
- import java.util.ArrayList;
- import java.util.HashMap;
- import java.util.HashSet;
- import java.util.Iterator;
- import java.util.LinkedList;
- import java.util.List;
- import java.util.Map;
- import java.util.concurrent.Future;
- import java.util.stream.Collectors;
- import org.apache.commons.io.FileUtils;
- import org.genesys.server.model.genesys.QTaxonomy2;
- import org.genesys.server.model.grin.QTaxonomyCommonName;
- import org.genesys.server.model.grin.QTaxonomyFamily;
- import org.genesys.server.model.grin.QTaxonomyGenus;
- import org.genesys.server.model.grin.QTaxonomySpecies;
- import org.genesys.server.model.grin.TaxonomyCommonName;
- import org.genesys.server.model.grin.TaxonomyFamily;
- import org.genesys.server.model.grin.TaxonomyGenus;
- import org.genesys.server.model.grin.TaxonomySpecies;
- import org.genesys.server.persistence.Taxonomy2Repository;
- import org.genesys.server.persistence.grin.TaxonomyCommonNameRepository;
- import org.genesys.server.persistence.grin.TaxonomyFamilyRepository;
- import org.genesys.server.persistence.grin.TaxonomyGenusRepository;
- import org.genesys.server.persistence.grin.TaxonomySpeciesRepository;
- import org.genesys.spring.TransactionHelper;
- import org.genesys.taxonomy.download.TaxonomyDownloader;
- import org.genesys.taxonomy.gringlobal.component.CabReader;
- import org.genesys.taxonomy.gringlobal.model.CommonNameRow;
- import org.genesys.taxonomy.gringlobal.model.FamilyRow;
- import org.genesys.taxonomy.gringlobal.model.GenusRow;
- import org.genesys.taxonomy.gringlobal.model.SpeciesRow;
- import org.springframework.beans.factory.InitializingBean;
- import org.springframework.beans.factory.annotation.Autowired;
- import org.springframework.beans.factory.annotation.Value;
- import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
- import org.springframework.security.access.prepost.PreAuthorize;
- import org.springframework.stereotype.Component;
- import com.google.common.collect.Lists;
- import com.opencsv.CSVReader;
- import com.querydsl.jpa.impl.JPAQueryFactory;
- import lombok.extern.slf4j.Slf4j;
- import javax.persistence.EntityManager;
- import javax.persistence.PersistenceContext;
- /**
- * The component downloads current GRIN Taxonomy database if no local copy
- * exists and updates Family, Genus and Species tables in the local database.
- * The matching is done on entity IDs of USDA GRIN Taxonomy and it overrides
- * local data.
- *
- * @author Matija Obreza
- */
- @Component
- @Slf4j
- public class UsdaTaxonomyUpdater implements InitializingBean {
- @Autowired
- private TaxonomyGenusRepository taxonomyGenusRepository;
- @Autowired
- private TaxonomySpeciesRepository taxonomySpeciesRepository;
- @Autowired
- private TaxonomyCommonNameRepository taxonomyCommonNameRepository;
- @Autowired
- private TaxonomyFamilyRepository taxonomyFamilyRepository;
- @Autowired
- private Taxonomy2Repository taxonomy2Repository;
- @Autowired
- private ThreadPoolTaskExecutor taskExecutor;
- @Autowired
- private JPAQueryFactory jpaQueryFactory;
- @PersistenceContext
- private EntityManager entityManager;
- @Value("${data.dir}")
- private String rootDataDir;
- private File downloadFolder;
- @Override
- public void afterPropertiesSet() throws Exception {
- downloadFolder = new File(rootDataDir, "grin-taxonomy"); // + System.currentTimeMillis());
- log.warn("GRIN Taxonomy data folder: {}", downloadFolder.toPath().toAbsolutePath().toString());
- }
- /**
- * Update local taxonomy tables with data from GRIN Taxonomy.
- *
- * @throws Exception
- */
- @PreAuthorize("hasRole('ADMINISTRATOR')")
- // @Scheduled(initialDelayString = "P1D", fixedDelayString = "P7DT1H")
- // @SchedulerLock(name = "org.genesys.server.service.worker.UsdaTaxonomyUpdater")
- public void update() throws Exception {
- log.warn("Updating GRIN taxonomy database from folder {}", downloadFolder.getAbsolutePath());
- if (downloadDataIfNeeded(downloadFolder)) {
- updateLocalDatabase();
- log.warn("Taxonomy database updated successfully. Enjoy!");
- } else {
- log.warn("Taxonomy database is still recent. Enjoy!");
- }
- }
- /**
- * The update starts with {@link TaxonomyFamily}, {@link TaxonomyGenus} and then
- * {@link TaxonomySpecies}. The entries from source database are mapped to local
- * identifiers. No records are removed from the local database.
- * <p>
- * Note: The update may update capitalization of names.
- * </p>
- *
- * @throws Exception
- */
- private void updateLocalDatabase() throws Exception {
- // read taxonomy_family.txt
- log.info("Loading {}/taxonomy_family.txt", downloadFolder);
- {
- List<FamilyRow> ggFamilies = new ArrayList<>();
- try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(new File(downloadFolder, "taxonomy_family.txt")), 0)) {
- Iterator<FamilyRow> beanReader = CabReader.beanReader(FamilyRow.class, reader).iterator();
- FamilyRow familyRow = null;
- while (beanReader.hasNext() && (familyRow = beanReader.next()) != null) {
- ggFamilies.add(familyRow);
- }
- }
- final var missingFamiliesId = new HashSet<>(jpaQueryFactory.from(QTaxonomyFamily.taxonomyFamily).select(QTaxonomyFamily.taxonomyFamily.id).fetch());
- List<Future<List<TaxonomyFamily>>> futures = Lists.partition(ggFamilies, 1000).stream().map(batch -> taskExecutor.submit(() -> {
- return updateFamily(batch);
- })).collect(Collectors.toList());
- Map<Long, Long> familyCurrentMap = new HashMap<>();
- // Wait for all tasks to complete
- futures.forEach(f -> {
- try {
- List<TaxonomyFamily> result = f.get();
- var familyWithCurrent = result.stream()
- .filter(family -> family.getCurrentTaxonomyFamily() != null && family.getCurrentTaxonomyFamily().getId() != null).collect(Collectors.toList());
- familyWithCurrent.forEach(family -> {
- familyCurrentMap.put(family.getId(), family.getCurrentTaxonomyFamily().getId());
- family.setCurrentTaxonomyFamily(null);
- });
- TransactionHelper.executeInTransaction(false, () -> taxonomyFamilyRepository.saveAll(result));
- result.stream().map(TaxonomyFamily::getId).forEach(missingFamiliesId::remove);
- log.info("Updated {} families", result.size());
- } catch (Exception e) {
- log.error(e.getMessage(), e);
- }
- });
- if (!missingFamiliesId.isEmpty()) {
- log.warn("After refreshing grin_family {} records remained untouched: grin_family id={}", missingFamiliesId.size(), missingFamiliesId);
- try {
- taxonomyFamilyRepository.deleteAllByIdInBatch(missingFamiliesId);
- } catch (Throwable e) {
- log.warn("Could not delete untouched TaxonomyFamilies: {}", e.getMessage());
- }
- }
- var familyList = new LinkedList<>(familyCurrentMap.keySet());
- for (int i = 0; i < familyList.size(); i += 200) {
- int endIndex = Math.min(i + 200, familyList.size());
- var families = taxonomyFamilyRepository.findAllById(familyList.subList(i, endIndex));
- families.forEach(family -> family.setCurrentTaxonomyFamily(TaxonomyFamily.withId(familyCurrentMap.get(family.getId()))));
- taxonomyFamilyRepository.saveAll(families);
- }
- }
- // read taxonomy_genus.txt
- log.info("Loading taxonomy_genus.txt");
- {
- List<GenusRow> ggGens = new ArrayList<>();
- try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(new File(downloadFolder, "taxonomy_genus.txt")), 0)) {
- Iterator<GenusRow> beanReader = CabReader.beanReader(GenusRow.class, reader).iterator();
- GenusRow genusRow = null;
- while (beanReader.hasNext() && (genusRow = beanReader.next()) != null) {
- ggGens.add(genusRow);
- }
- }
- final var missingGenusId = new HashSet<>(jpaQueryFactory.from(QTaxonomyGenus.taxonomyGenus).select(QTaxonomyGenus.taxonomyGenus.id).fetch());
- List<Future<List<TaxonomyGenus>>> futures = Lists.partition(ggGens, 1000).stream().map(batch -> taskExecutor.submit(() -> {
- return updateGenera(batch);
- })).collect(Collectors.toList());
- // Wait for all tasks to complete
- futures.forEach(f -> {
- try {
- List<TaxonomyGenus> result = f.get();
- TransactionHelper.executeInTransaction(false, () -> taxonomyGenusRepository.saveAll(result));
- missingGenusId.removeAll(result.stream().map(TaxonomyGenus::getId).collect(Collectors.toList()));
- log.info("Updated {} genera", result.size());
- } catch (Exception e) {
- log.error(e.getMessage(), e);
- }
- });
- if (!missingGenusId.isEmpty()) {
- log.warn("After refreshing grin_genus {} records remained untouched: grin_genus id={}", missingGenusId.size(), missingGenusId);
- try {
- taxonomyGenusRepository.deleteAllByIdInBatch(missingGenusId);
- } catch (Throwable e) {
- log.warn("Could not delete untouched TaxonomyGenus: {}", e.getMessage());
- }
- }
- }
- // read taxonomy_species.txt
- log.info("Loading taxonomy_species.txt");
- {
- List<SpeciesRow> ggSpes = new ArrayList<>();
- try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(new File(downloadFolder, "taxonomy_species.txt")), 0)) {
- Iterator<SpeciesRow> beanReader = CabReader.beanReader(SpeciesRow.class, reader).iterator();
- SpeciesRow speciesRow = null;
- while (beanReader.hasNext() && (speciesRow = beanReader.next()) != null) {
- ggSpes.add(speciesRow);
- }
- }
- final var missingSpeciesId = new HashSet<>(jpaQueryFactory.from(QTaxonomySpecies.taxonomySpecies).select(QTaxonomySpecies.taxonomySpecies.id).fetch());
- List<Future<List<TaxonomySpecies>>> futures = Lists.partition(ggSpes, 1000).stream().map(batch -> taskExecutor.submit(() -> {
- return updateSpecies(batch);
- })).collect(Collectors.toList());
- // Wait for scheduled tasks to complete
- futures.forEach(f -> {
- try {
- List<TaxonomySpecies> result = f.get();
- TransactionHelper.executeInTransaction(false, () -> taxonomySpeciesRepository.saveAll(result));
- missingSpeciesId.removeAll(result.stream().map(TaxonomySpecies::getId).collect(Collectors.toList()));
- log.debug("Updated {} species", result.size());
- } catch (Exception e) {
- log.error("Execution failed {}", e.getMessage(), e);
- }
- });
- if (!missingSpeciesId.isEmpty()) {
- log.warn("After refreshing grin_species {} records remained untouched: grin_species id={}", missingSpeciesId.size(), missingSpeciesId);
- try {
- // Clear references
- TransactionHelper.executeInTransaction(false, () -> {
- var clearedCount = jpaQueryFactory.update(QTaxonomy2.taxonomy2).setNull(QTaxonomy2.taxonomy2.grinTaxonomySpecies()).where(QTaxonomy2.taxonomy2.grinTaxonomySpecies().id.in(missingSpeciesId)).execute();
- log.warn("Cleared {} Taxonomy2.grinTaxonomySpecies references", clearedCount);
- clearedCount = jpaQueryFactory.update(QTaxonomy2.taxonomy2).setNull(QTaxonomy2.taxonomy2.currentTaxonomySpecies()).where(QTaxonomy2.taxonomy2.currentTaxonomySpecies().id.in(missingSpeciesId)).execute();
- log.warn("Cleared {} Taxonomy2.currentTaxonomySpecies references", clearedCount);
- clearedCount = jpaQueryFactory.update(QTaxonomy2.taxonomy2).setNull(QTaxonomy2.taxonomy2.overrideTaxonomySpecies()).where(QTaxonomy2.taxonomy2.overrideTaxonomySpecies().id.in(missingSpeciesId)).execute();
- log.warn("Cleared {} Taxonomy2.overrideTaxonomySpecies references", clearedCount);
- // Delete obsolete GRIN Taxonomy records
- taxonomySpeciesRepository.deleteAllByIdInBatch(missingSpeciesId);
- return true;
- });
- } catch (Throwable e) {
- log.warn("Could not delete untouched TaxonomySpecies: {}", e.getMessage(), e);
- }
- }
- }
- {
- log.info("Loading taxonomy_common_name.txt");
- List<CommonNameRow> ggCommonNames = new ArrayList<>();
- try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(new File(downloadFolder, "taxonomy_common_name.txt")), 0)) {
- Iterator<CommonNameRow> beanReader = CabReader.beanReader(CommonNameRow.class, reader).iterator();
- CommonNameRow commonNameRow = null;
- while (beanReader.hasNext() && (commonNameRow = beanReader.next()) != null) {
- ggCommonNames.add(commonNameRow);
- }
- }
- final var missingCommonNameId = new HashSet<>(jpaQueryFactory.from(QTaxonomyCommonName.taxonomyCommonName).select(QTaxonomyCommonName.taxonomyCommonName.id).fetch());
- List<Future<List<TaxonomyCommonName>>> futures = Lists.partition(ggCommonNames, 1000).stream().map(batch -> taskExecutor.submit(() -> {
- return updateCommonNames(batch);
- })).collect(Collectors.toList());
- // Wait for all tasks to complete
- futures.forEach(f -> {
- try {
- List<TaxonomyCommonName> result = f.get();
- TransactionHelper.executeInTransaction(false, () -> taxonomyCommonNameRepository.saveAll(result));
- missingCommonNameId.removeAll(result.stream().map(TaxonomyCommonName::getId).collect(Collectors.toList()));
- log.info("Updated {} taxonomy common names", result.size());
- } catch (Exception e) {
- log.error(e.getMessage(), e);
- }
- });
- if (!missingCommonNameId.isEmpty()) {
- log.warn("After refreshing grin_common_name {} records remained untouched: grin_common_name id={}", missingCommonNameId.size(), missingCommonNameId);
- try {
- taxonomyCommonNameRepository.deleteAllByIdInBatch(missingCommonNameId);
- } catch (Throwable e) {
- log.warn("Could not delete untouched TaxonomyCommonName: {}", e.getMessage());
- }
- }
- }
- }
- private List<TaxonomyFamily> updateFamily(List<FamilyRow> batch) {
- log.info("Processing {} families", batch.size());
- List<TaxonomyFamily> toSave = new ArrayList<>(batch.size());
- for (FamilyRow familyRow : batch) {
- TaxonomyFamily family = new TaxonomyFamily();
- family.setGrinId(familyRow.getTaxonomyFamilyId());
- family.setId(familyRow.getTaxonomyFamilyId());
- family.setFamilyName(familyRow.getFamilyName());
- family.setFamilyAuthority(familyRow.getFamilyAuthority());
- family.setSubfamilyName(familyRow.getSubfamilyName());
- family.setTribeName(familyRow.getTribeName());
- family.setSubtribeName(familyRow.getSubtribeName());
- family.setSuprafamilyRankCode(familyRow.getSuprafamilyRankCode());
- family.setSuprafamilyRankName(familyRow.getSuprafamilyRankName());
- family.setAlternateName(familyRow.getAlternateName());
- family.setFamilyTypeCode(familyRow.getFamilyTypeCode());
- family.setCurrentTaxonomyFamily(TaxonomyFamily.withId(familyRow.getCurrentTaxonomyFamilyId()));
- family.setNote(familyRow.getNote());
- // GG Audit
- family.setOwnedDate(familyRow.getOwnedDate().toInstant(ZoneOffset.UTC));
- family.setOwnedById(familyRow.getOwnedBy());
- family.setCreatedDate(familyRow.getCreatedDate().toInstant(ZoneOffset.UTC));
- family.setCreatedById(familyRow.getCreatedBy());
- if (familyRow.getModifiedDate() != null) {
- family.setModifiedDate(familyRow.getModifiedDate().toInstant(ZoneOffset.UTC));
- }
- family.setModifiedById(familyRow.getModifiedBy());
- toSave.add(family);
- }
- return toSave;
- }
- private List<TaxonomyCommonName> updateCommonNames(List<CommonNameRow> batch) {
- log.info("Processing {} common names", batch.size());
- List<TaxonomyCommonName> toSave = new ArrayList<>(batch.size());
- for (CommonNameRow commonNameRow : batch) {
- TaxonomyCommonName commonName = new TaxonomyCommonName(commonNameRow.getId());
- commonName.setTaxonomyGenus(TaxonomyGenus.withId(commonNameRow.getTaxonomyGenusId()));
- commonName.setTaxonomySpecies(TaxonomySpecies.withId(commonNameRow.getTaxonomySpeciesId()));
- commonName.setLanguageDescription(commonNameRow.getLanguageDescription());
- commonName.setAlternateTranscription(commonNameRow.getAlternateTranscription());
- commonName.setName(commonNameRow.getName());
- commonName.setSimplifiedName(commonNameRow.getSimplifiedName());
- commonName.setNote(commonNameRow.getNote());
- // commonNameRow.getCitationId();
- // GG Audit
- commonName.setOwnedDate(commonNameRow.getOwnedDate().toInstant(ZoneOffset.UTC));
- commonName.setOwnedById(commonNameRow.getOwnedBy());
- commonName.setCreatedDate(commonNameRow.getCreatedDate().toInstant(ZoneOffset.UTC));
- commonName.setCreatedById(commonNameRow.getCreatedBy());
- if (commonNameRow.getModifiedDate() != null) {
- commonName.setModifiedDate(commonNameRow.getModifiedDate().toInstant(ZoneOffset.UTC));
- }
- commonName.setModifiedById(commonNameRow.getModifiedBy());
- toSave.add(commonName);
- }
- return toSave;
- }
- private List<TaxonomySpecies> updateSpecies(List<SpeciesRow> batch) {
- log.info("Processing {} species", batch.size());
- List<TaxonomySpecies> toSave = new ArrayList<>(batch.size());
- for (SpeciesRow speciesRow : batch) {
- TaxonomySpecies species = new TaxonomySpecies(speciesRow.getSpeciesId());
- species.setCurrentTaxonomySpecies(TaxonomySpecies.withId(speciesRow.getCurrentTaxonomySpeciesId()));
- species.setTaxonomyGenus(new TaxonomyGenus(speciesRow.getGenusId()));
- species.setName(speciesRow.getName());
- species.setNameAuthority(speciesRow.getNameAuthority());
- species.setNomenNumber(speciesRow.getNomenNumber().intValue());
- species.setIsSpecificHybrid(speciesRow.getIsSpecificHybrid());
- species.setSpeciesName(speciesRow.getSpeciesName());
- species.setSpeciesAuthority(speciesRow.getSpeciesAuthority());
- species.setIsSubspecificHybrid(speciesRow.getIsSubspecificHybrid());
- species.setSubspeciesName(speciesRow.getSubspeciesName());
- species.setSubspeciesAuthority(speciesRow.getSubspeciesAuthority());
- species.setIsVarietalHybrid(speciesRow.getIsVarietalHybrid());
- species.setVarietyName(speciesRow.getVarietyName());
- species.setVarietyAuthority(speciesRow.getVarietyAuthority());
- species.setIsSubvarietalHybrid(speciesRow.getIsSubvarietalHybrid());
- species.setSubvarietyName(speciesRow.getSubvarietyName());
- species.setSubvarietyAuthority(speciesRow.getSubvarietyAuthority());
- species.setIsFormaHybrid(speciesRow.getIsFormaHybrid());
- species.setFormaRankType(speciesRow.getFormaRankType());
- species.setFormaName(speciesRow.getFormaName());
- species.setFormaAuthority(speciesRow.getFormaAuthority());
- // species.setPrioritySite1(speciesRow.getPrioritySite1());
- // species.setPrioritySite2(speciesRow.getPrioritySite2());
- // species.setCurator1Id(speciesRow.getCurator1Id());
- // species.setCurator2Id(speciesRow.getCurator2Id());
- species.setRestrictionCode(speciesRow.getRestrictionCode());
- species.setLifeFormCode(speciesRow.getLifeFormCode());
- species.setCommonFertilizationCode(speciesRow.getCommonFertilizationCode());
- species.setIsNamePending(speciesRow.getIsNamePending());
- species.setSynonymCode(speciesRow.getSynonymCode());
- // species.setVerifierCooperator(speciesRow.getVerifierId());
- if (speciesRow.getNameVerifiedDate() != null) {
- species.setNameVerifiedDate(speciesRow.getNameVerifiedDate().toInstant(ZoneOffset.UTC));
- }
- species.setProtologue(speciesRow.getProtologue());
- species.setProtologueVirtualPath(speciesRow.getProtologueVirtualPath());
- species.setNote(speciesRow.getNote());
- species.setSiteNote(speciesRow.getSiteNote());
- species.setAlternateName(speciesRow.getAlternateName());
- // GG Audit
- species.setOwnedDate(speciesRow.getOwnedDate().toInstant(ZoneOffset.UTC));
- species.setOwnedById(speciesRow.getOwnedBy());
- species.setCreatedDate(speciesRow.getCreatedDate().toInstant(ZoneOffset.UTC));
- species.setCreatedById(speciesRow.getCreatedBy());
- if (speciesRow.getModifiedDate() != null) {
- species.setModifiedDate(speciesRow.getModifiedDate().toInstant(ZoneOffset.UTC));
- }
- species.setModifiedById(speciesRow.getModifiedBy());
- toSave.add(species);
- }
- return toSave;
- }
- private List<TaxonomyGenus> updateGenera(List<GenusRow> batch) {
- log.info("Processing {} genera", batch.size());
- List<TaxonomyGenus> toSave = new ArrayList<>(batch.size());
- for (GenusRow genusRow : batch) {
- TaxonomyGenus genus = new TaxonomyGenus(genusRow.getGenusId());
- genus.setCurrentTaxonomyGenus(TaxonomyGenus.withId(genusRow.getCurrentTaxonomyGenusId()));
- genus.setGenusName(genusRow.getGenusName());
- genus.setGenusAuthority(genusRow.getGenusAuthority());
- genus.setSubgenusName(genusRow.getSubgenusName());
- genus.setSectionName(genusRow.getSectionName());
- genus.setSubsectionName(genusRow.getSubsectionName());
- genus.setSeriesName(genusRow.getSeriesName());
- genus.setSubseriesName(genusRow.getSubseriesName());
- genus.setTaxonomyFamily(new TaxonomyFamily(genusRow.getTaxonomyFamilyId()));
- genus.setGenusName(genusRow.getGenusName());
- genus.setGenusAuthority(genusRow.getGenusAuthority());
- genus.setSubgenusName(genusRow.getSubgenusName());
- genus.setSectionName(genusRow.getSectionName());
- genus.setSubsectionName(genusRow.getSubsectionName());
- genus.setSeriesName(genusRow.getSeriesName());
- genus.setSubseriesName(genusRow.getSubseriesName());
- genus.setQualifyingCode(genusRow.getQualifyingCode());
- genus.setHybridCode(genusRow.getHybridCode());
- genus.setNote(genusRow.getNote());
- // GG Audit
- genus.setOwnedDate(genusRow.getOwnedDate().toInstant(ZoneOffset.UTC));
- genus.setOwnedById(genusRow.getOwnedBy());
- genus.setCreatedDate(genusRow.getCreatedDate().toInstant(ZoneOffset.UTC));
- genus.setCreatedById(genusRow.getCreatedBy());
- if (genusRow.getModifiedDate() != null) {
- genus.setModifiedDate(genusRow.getModifiedDate().toInstant(ZoneOffset.UTC));
- }
- genus.setModifiedById(genusRow.getModifiedBy());
- toSave.add(genus);
- }
- return toSave;
- }
- static boolean downloadDataIfNeeded(File folder) throws IOException {
- final File dataFolder = folder;
- if (!dataFolder.exists()) {
- log.warn("Making directory " + dataFolder.getAbsolutePath());
- if (!dataFolder.mkdirs()) {
- throw new IOException("Failed to create data folder at " + dataFolder.getAbsolutePath());
- }
- }
- // The two required files
- final File genusFile = new File(dataFolder, "taxonomy_genus.txt");
- final File speciesFile = new File(dataFolder, "taxonomy_species.txt");
- boolean needsDownload = (!genusFile.exists() || isTooOld(genusFile)) || (!speciesFile.exists() || isTooOld(speciesFile));
- if (needsDownload) {
- log.warn("Taxonomy data not provided or too old in {}, starting download", dataFolder.getAbsolutePath());
- final TaxonomyDownloader dl = new TaxonomyDownloader();
- log.warn("Downloading GRIN-Taxonomy database to {}", dataFolder.getAbsolutePath());
- final File downloadedCabFile = File.createTempFile("grin-", ".cab");
- dl.downloadCurrent(downloadedCabFile);
- TaxonomyDownloader.unpackCabinetFile(downloadedCabFile, dataFolder, false);
- if (downloadedCabFile.exists() && downloadedCabFile.canWrite()) {
- log.warn("Deleting downloaded file {}", downloadedCabFile.getAbsolutePath());
- FileUtils.forceDelete(downloadedCabFile);
- }
- return true;
- }
- return false;
- }
- private static boolean isTooOld(File theFile) {
- try {
- BasicFileAttributes attr = Files.readAttributes(theFile.toPath(), BasicFileAttributes.class);
- FileTime fileTime = attr.creationTime();
- boolean isOld = fileTime.toInstant().isBefore(Instant.now().minus(5, ChronoUnit.DAYS));
- log.warn("{} created {} is old={}", theFile.getName(), fileTime.toInstant(), isOld);
- return isOld;
- } catch (IOException e) {
- log.warn("Could not determine age: {}", e.getMessage());
- return false;
- }
- }
- }