UsdaTaxonomyUpdater.java

  1. /*
  2.  * Copyright 2020 Global Crop Diversity Trust
  3.  *
  4.  * Licensed under the Apache License, Version 2.0 (the "License");
  5.  * you may not use this file except in compliance with the License.
  6.  * You may obtain a copy of the License at
  7.  *
  8.  *   http://www.apache.org/licenses/LICENSE-2.0
  9.  *
  10.  * Unless required by applicable law or agreed to in writing, software
  11.  * distributed under the License is distributed on an "AS IS" BASIS,
  12.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13.  * See the License for the specific language governing permissions and
  14.  * limitations under the License.
  15.  */
  16. package org.genesys.server.service.worker;

  17. import java.io.File;
  18. import java.io.FileInputStream;
  19. import java.io.IOException;
  20. import java.nio.file.Files;
  21. import java.nio.file.attribute.BasicFileAttributes;
  22. import java.nio.file.attribute.FileTime;
  23. import java.time.Instant;
  24. import java.time.ZoneOffset;
  25. import java.time.temporal.ChronoUnit;
  26. import java.util.ArrayList;
  27. import java.util.HashMap;
  28. import java.util.HashSet;
  29. import java.util.Iterator;
  30. import java.util.LinkedList;
  31. import java.util.List;
  32. import java.util.Map;
  33. import java.util.concurrent.Future;
  34. import java.util.stream.Collectors;

  35. import org.apache.commons.io.FileUtils;
  36. import org.genesys.server.model.genesys.QTaxonomy2;
  37. import org.genesys.server.model.grin.QTaxonomyCommonName;
  38. import org.genesys.server.model.grin.QTaxonomyFamily;
  39. import org.genesys.server.model.grin.QTaxonomyGenus;
  40. import org.genesys.server.model.grin.QTaxonomySpecies;
  41. import org.genesys.server.model.grin.TaxonomyCommonName;
  42. import org.genesys.server.model.grin.TaxonomyFamily;
  43. import org.genesys.server.model.grin.TaxonomyGenus;
  44. import org.genesys.server.model.grin.TaxonomySpecies;
  45. import org.genesys.server.persistence.Taxonomy2Repository;
  46. import org.genesys.server.persistence.grin.TaxonomyCommonNameRepository;
  47. import org.genesys.server.persistence.grin.TaxonomyFamilyRepository;
  48. import org.genesys.server.persistence.grin.TaxonomyGenusRepository;
  49. import org.genesys.server.persistence.grin.TaxonomySpeciesRepository;
  50. import org.genesys.spring.TransactionHelper;
  51. import org.genesys.taxonomy.download.TaxonomyDownloader;
  52. import org.genesys.taxonomy.gringlobal.component.CabReader;
  53. import org.genesys.taxonomy.gringlobal.model.CommonNameRow;
  54. import org.genesys.taxonomy.gringlobal.model.FamilyRow;
  55. import org.genesys.taxonomy.gringlobal.model.GenusRow;
  56. import org.genesys.taxonomy.gringlobal.model.SpeciesRow;
  57. import org.springframework.beans.factory.InitializingBean;
  58. import org.springframework.beans.factory.annotation.Autowired;
  59. import org.springframework.beans.factory.annotation.Value;
  60. import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
  61. import org.springframework.security.access.prepost.PreAuthorize;
  62. import org.springframework.stereotype.Component;

  63. import com.google.common.collect.Lists;
  64. import com.opencsv.CSVReader;
  65. import com.querydsl.jpa.impl.JPAQueryFactory;

  66. import lombok.extern.slf4j.Slf4j;

  67. import javax.persistence.EntityManager;
  68. import javax.persistence.PersistenceContext;

  69. /**
  70.  * The component downloads current GRIN Taxonomy database if no local copy
  71.  * exists and updates Family, Genus and Species tables in the local database.
  72.  * The matching is done on entity IDs of USDA GRIN Taxonomy and it overrides
  73.  * local data.
  74.  *
  75.  * @author Matija Obreza
  76.  */
  77. @Component
  78. @Slf4j
  79. public class UsdaTaxonomyUpdater implements InitializingBean {

  80.     @Autowired
  81.     private TaxonomyGenusRepository taxonomyGenusRepository;
  82.     @Autowired
  83.     private TaxonomySpeciesRepository taxonomySpeciesRepository;
  84.     @Autowired
  85.     private TaxonomyCommonNameRepository taxonomyCommonNameRepository;
  86.     @Autowired
  87.     private TaxonomyFamilyRepository taxonomyFamilyRepository;
  88.     @Autowired
  89.     private Taxonomy2Repository taxonomy2Repository;

  90.     @Autowired
  91.     private ThreadPoolTaskExecutor taskExecutor;

  92.     @Autowired
  93.     private JPAQueryFactory jpaQueryFactory;

  94.     @PersistenceContext
  95.     private EntityManager entityManager;

  96.     @Value("${data.dir}")
  97.     private String rootDataDir;

  98.     private File downloadFolder;

  99.     @Override
  100.     public void afterPropertiesSet() throws Exception {
  101.         downloadFolder = new File(rootDataDir, "grin-taxonomy"); // + System.currentTimeMillis());
  102.         log.warn("GRIN Taxonomy data folder: {}", downloadFolder.toPath().toAbsolutePath().toString());
  103.     }

  104.     /**
  105.      * Update local taxonomy tables with data from GRIN Taxonomy.
  106.      *
  107.      * @throws Exception
  108.      */
  109.     @PreAuthorize("hasRole('ADMINISTRATOR')")
  110.     // @Scheduled(initialDelayString = "P1D", fixedDelayString = "P7DT1H")
  111.     // @SchedulerLock(name = "org.genesys.server.service.worker.UsdaTaxonomyUpdater")
  112.     public void update() throws Exception {
  113.         log.warn("Updating GRIN taxonomy database from folder {}", downloadFolder.getAbsolutePath());
  114.         if (downloadDataIfNeeded(downloadFolder)) {
  115.             updateLocalDatabase();
  116.             log.warn("Taxonomy database updated successfully. Enjoy!");
  117.         } else {
  118.             log.warn("Taxonomy database is still recent. Enjoy!");
  119.         }
  120.     }

  121.     /**
  122.      * The update starts with {@link TaxonomyFamily}, {@link TaxonomyGenus} and then
  123.      * {@link TaxonomySpecies}. The entries from source database are mapped to local
  124.      * identifiers. No records are removed from the local database.
  125.      * <p>
  126.      * Note: The update may update capitalization of names.
  127.      * </p>
  128.      *
  129.      * @throws Exception
  130.      */
  131.     private void updateLocalDatabase() throws Exception {
  132.         // read taxonomy_family.txt
  133.         log.info("Loading {}/taxonomy_family.txt", downloadFolder);
  134.         {
  135.             List<FamilyRow> ggFamilies = new ArrayList<>();
  136.             try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(new File(downloadFolder, "taxonomy_family.txt")), 0)) {
  137.                 Iterator<FamilyRow> beanReader = CabReader.beanReader(FamilyRow.class, reader).iterator();

  138.                 FamilyRow familyRow = null;
  139.                 while (beanReader.hasNext() && (familyRow = beanReader.next()) != null) {
  140.                     ggFamilies.add(familyRow);
  141.                 }
  142.             }

  143.             final var missingFamiliesId = new HashSet<>(jpaQueryFactory.from(QTaxonomyFamily.taxonomyFamily).select(QTaxonomyFamily.taxonomyFamily.id).fetch());

  144.             List<Future<List<TaxonomyFamily>>> futures = Lists.partition(ggFamilies, 1000).stream().map(batch -> taskExecutor.submit(() -> {
  145.                 return updateFamily(batch);
  146.             })).collect(Collectors.toList());

  147.             Map<Long, Long> familyCurrentMap = new HashMap<>();

  148.             // Wait for all tasks to complete
  149.             futures.forEach(f -> {
  150.                 try {
  151.                     List<TaxonomyFamily> result = f.get();

  152.                     var familyWithCurrent = result.stream()
  153.                         .filter(family -> family.getCurrentTaxonomyFamily() != null && family.getCurrentTaxonomyFamily().getId() != null).collect(Collectors.toList());

  154.                     familyWithCurrent.forEach(family -> {
  155.                         familyCurrentMap.put(family.getId(), family.getCurrentTaxonomyFamily().getId());
  156.                         family.setCurrentTaxonomyFamily(null);
  157.                     });

  158.                     TransactionHelper.executeInTransaction(false, () -> taxonomyFamilyRepository.saveAll(result));

  159.                     result.stream().map(TaxonomyFamily::getId).forEach(missingFamiliesId::remove);
  160.                     log.info("Updated {} families", result.size());
  161.                 } catch (Exception e) {
  162.                     log.error(e.getMessage(), e);
  163.                 }
  164.             });

  165.             if (!missingFamiliesId.isEmpty()) {
  166.                 log.warn("After refreshing grin_family {} records remained untouched: grin_family id={}", missingFamiliesId.size(), missingFamiliesId);
  167.                 try {
  168.                     taxonomyFamilyRepository.deleteAllByIdInBatch(missingFamiliesId);
  169.                 } catch (Throwable e) {
  170.                     log.warn("Could not delete untouched TaxonomyFamilies: {}", e.getMessage());
  171.                 }
  172.             }

  173.             var familyList = new LinkedList<>(familyCurrentMap.keySet());
  174.             for (int i = 0; i < familyList.size(); i += 200) {
  175.                 int endIndex = Math.min(i + 200, familyList.size());
  176.                 var families = taxonomyFamilyRepository.findAllById(familyList.subList(i, endIndex));
  177.                 families.forEach(family -> family.setCurrentTaxonomyFamily(TaxonomyFamily.withId(familyCurrentMap.get(family.getId()))));
  178.                 taxonomyFamilyRepository.saveAll(families);
  179.             }
  180.         }

  181.         // read taxonomy_genus.txt
  182.         log.info("Loading taxonomy_genus.txt");
  183.         {
  184.             List<GenusRow> ggGens = new ArrayList<>();
  185.             try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(new File(downloadFolder, "taxonomy_genus.txt")), 0)) {
  186.                 Iterator<GenusRow> beanReader = CabReader.beanReader(GenusRow.class, reader).iterator();

  187.                 GenusRow genusRow = null;
  188.                 while (beanReader.hasNext() && (genusRow = beanReader.next()) != null) {
  189.                     ggGens.add(genusRow);
  190.                 }
  191.             }

  192.             final var missingGenusId = new HashSet<>(jpaQueryFactory.from(QTaxonomyGenus.taxonomyGenus).select(QTaxonomyGenus.taxonomyGenus.id).fetch());

  193.             List<Future<List<TaxonomyGenus>>> futures = Lists.partition(ggGens, 1000).stream().map(batch -> taskExecutor.submit(() -> {
  194.                 return updateGenera(batch);
  195.             })).collect(Collectors.toList());

  196.             // Wait for all tasks to complete
  197.             futures.forEach(f -> {
  198.                 try {
  199.                     List<TaxonomyGenus> result = f.get();
  200.                     TransactionHelper.executeInTransaction(false, () -> taxonomyGenusRepository.saveAll(result));

  201.                     missingGenusId.removeAll(result.stream().map(TaxonomyGenus::getId).collect(Collectors.toList()));
  202.                     log.info("Updated {} genera", result.size());
  203.                 } catch (Exception e) {
  204.                     log.error(e.getMessage(), e);
  205.                 }
  206.             });

  207.             if (!missingGenusId.isEmpty()) {
  208.                 log.warn("After refreshing grin_genus {} records remained untouched: grin_genus id={}", missingGenusId.size(), missingGenusId);
  209.                 try {
  210.                     taxonomyGenusRepository.deleteAllByIdInBatch(missingGenusId);
  211.                 } catch (Throwable e) {
  212.                     log.warn("Could not delete untouched TaxonomyGenus: {}", e.getMessage());
  213.                 }
  214.             }
  215.         }

  216.         // read taxonomy_species.txt
  217.         log.info("Loading taxonomy_species.txt");
  218.         {
  219.             List<SpeciesRow> ggSpes = new ArrayList<>();
  220.             try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(new File(downloadFolder, "taxonomy_species.txt")), 0)) {
  221.                 Iterator<SpeciesRow> beanReader = CabReader.beanReader(SpeciesRow.class, reader).iterator();

  222.                 SpeciesRow speciesRow = null;
  223.                 while (beanReader.hasNext() && (speciesRow = beanReader.next()) != null) {
  224.                     ggSpes.add(speciesRow);
  225.                 }
  226.             }

  227.             final var missingSpeciesId = new HashSet<>(jpaQueryFactory.from(QTaxonomySpecies.taxonomySpecies).select(QTaxonomySpecies.taxonomySpecies.id).fetch());

  228.             List<Future<List<TaxonomySpecies>>> futures = Lists.partition(ggSpes, 1000).stream().map(batch -> taskExecutor.submit(() -> {
  229.                 return updateSpecies(batch);
  230.             })).collect(Collectors.toList());

  231.             // Wait for scheduled tasks to complete
  232.             futures.forEach(f -> {
  233.                 try {
  234.                     List<TaxonomySpecies> result = f.get();
  235.                     TransactionHelper.executeInTransaction(false, () -> taxonomySpeciesRepository.saveAll(result));
  236.                     missingSpeciesId.removeAll(result.stream().map(TaxonomySpecies::getId).collect(Collectors.toList()));
  237.                     log.debug("Updated {} species", result.size());
  238.                 } catch (Exception e) {
  239.                     log.error("Execution failed {}", e.getMessage(), e);
  240.                 }
  241.             });

  242.             if (!missingSpeciesId.isEmpty()) {
  243.                 log.warn("After refreshing grin_species {} records remained untouched: grin_species id={}", missingSpeciesId.size(), missingSpeciesId);
  244.                 try {
  245.                     // Clear references
  246.                     TransactionHelper.executeInTransaction(false, () -> {
  247.                         var clearedCount = jpaQueryFactory.update(QTaxonomy2.taxonomy2).setNull(QTaxonomy2.taxonomy2.grinTaxonomySpecies()).where(QTaxonomy2.taxonomy2.grinTaxonomySpecies().id.in(missingSpeciesId)).execute();
  248.                         log.warn("Cleared {} Taxonomy2.grinTaxonomySpecies references", clearedCount);
  249.                         clearedCount = jpaQueryFactory.update(QTaxonomy2.taxonomy2).setNull(QTaxonomy2.taxonomy2.currentTaxonomySpecies()).where(QTaxonomy2.taxonomy2.currentTaxonomySpecies().id.in(missingSpeciesId)).execute();
  250.                         log.warn("Cleared {} Taxonomy2.currentTaxonomySpecies references", clearedCount);
  251.                         clearedCount = jpaQueryFactory.update(QTaxonomy2.taxonomy2).setNull(QTaxonomy2.taxonomy2.overrideTaxonomySpecies()).where(QTaxonomy2.taxonomy2.overrideTaxonomySpecies().id.in(missingSpeciesId)).execute();
  252.                         log.warn("Cleared {} Taxonomy2.overrideTaxonomySpecies references", clearedCount);

  253.                         // Delete obsolete GRIN Taxonomy records
  254.                         taxonomySpeciesRepository.deleteAllByIdInBatch(missingSpeciesId);
  255.                         return true;
  256.                     });
  257.                 } catch (Throwable e) {
  258.                     log.warn("Could not delete untouched TaxonomySpecies: {}", e.getMessage(), e);
  259.                 }
  260.             }
  261.         }

  262.         {
  263.             log.info("Loading taxonomy_common_name.txt");

  264.             List<CommonNameRow> ggCommonNames = new ArrayList<>();
  265.             try (CSVReader reader = CabReader.openCsvReader(new FileInputStream(new File(downloadFolder, "taxonomy_common_name.txt")), 0)) {
  266.                 Iterator<CommonNameRow> beanReader = CabReader.beanReader(CommonNameRow.class, reader).iterator();

  267.                 CommonNameRow commonNameRow = null;
  268.                 while (beanReader.hasNext() && (commonNameRow = beanReader.next()) != null) {
  269.                     ggCommonNames.add(commonNameRow);
  270.                 }
  271.             }

  272.             final var missingCommonNameId = new HashSet<>(jpaQueryFactory.from(QTaxonomyCommonName.taxonomyCommonName).select(QTaxonomyCommonName.taxonomyCommonName.id).fetch());

  273.             List<Future<List<TaxonomyCommonName>>> futures = Lists.partition(ggCommonNames, 1000).stream().map(batch -> taskExecutor.submit(() -> {
  274.                 return updateCommonNames(batch);
  275.             })).collect(Collectors.toList());

  276.             // Wait for all tasks to complete
  277.             futures.forEach(f -> {
  278.                 try {
  279.                     List<TaxonomyCommonName> result = f.get();
  280.                     TransactionHelper.executeInTransaction(false, () -> taxonomyCommonNameRepository.saveAll(result));
  281.                     missingCommonNameId.removeAll(result.stream().map(TaxonomyCommonName::getId).collect(Collectors.toList()));
  282.                     log.info("Updated {} taxonomy common names", result.size());
  283.                 } catch (Exception e) {
  284.                     log.error(e.getMessage(), e);
  285.                 }
  286.             });

  287.             if (!missingCommonNameId.isEmpty()) {
  288.                 log.warn("After refreshing grin_common_name {} records remained untouched: grin_common_name id={}", missingCommonNameId.size(), missingCommonNameId);
  289.                 try {
  290.                     taxonomyCommonNameRepository.deleteAllByIdInBatch(missingCommonNameId);
  291.                 } catch (Throwable e) {
  292.                     log.warn("Could not delete untouched TaxonomyCommonName: {}", e.getMessage());
  293.                 }
  294.             }
  295.         }
  296.     }

  297.     private List<TaxonomyFamily> updateFamily(List<FamilyRow> batch) {
  298.         log.info("Processing {} families", batch.size());
  299.         List<TaxonomyFamily> toSave = new ArrayList<>(batch.size());

  300.         for (FamilyRow familyRow : batch) {
  301.             TaxonomyFamily family = new TaxonomyFamily();
  302.             family.setGrinId(familyRow.getTaxonomyFamilyId());
  303.             family.setId(familyRow.getTaxonomyFamilyId());
  304.             family.setFamilyName(familyRow.getFamilyName());
  305.             family.setFamilyAuthority(familyRow.getFamilyAuthority());
  306.             family.setSubfamilyName(familyRow.getSubfamilyName());
  307.             family.setTribeName(familyRow.getTribeName());
  308.             family.setSubtribeName(familyRow.getSubtribeName());
  309.             family.setSuprafamilyRankCode(familyRow.getSuprafamilyRankCode());
  310.             family.setSuprafamilyRankName(familyRow.getSuprafamilyRankName());
  311.             family.setAlternateName(familyRow.getAlternateName());
  312.             family.setFamilyTypeCode(familyRow.getFamilyTypeCode());
  313.             family.setCurrentTaxonomyFamily(TaxonomyFamily.withId(familyRow.getCurrentTaxonomyFamilyId()));
  314.             family.setNote(familyRow.getNote());

  315.             // GG Audit
  316.             family.setOwnedDate(familyRow.getOwnedDate().toInstant(ZoneOffset.UTC));
  317.             family.setOwnedById(familyRow.getOwnedBy());
  318.             family.setCreatedDate(familyRow.getCreatedDate().toInstant(ZoneOffset.UTC));
  319.             family.setCreatedById(familyRow.getCreatedBy());
  320.             if (familyRow.getModifiedDate() != null) {
  321.                 family.setModifiedDate(familyRow.getModifiedDate().toInstant(ZoneOffset.UTC));
  322.             }
  323.             family.setModifiedById(familyRow.getModifiedBy());

  324.             toSave.add(family);
  325.         }

  326.         return toSave;
  327.     }

  328.     private List<TaxonomyCommonName> updateCommonNames(List<CommonNameRow> batch) {

  329.         log.info("Processing {} common names", batch.size());
  330.         List<TaxonomyCommonName> toSave = new ArrayList<>(batch.size());

  331.         for (CommonNameRow commonNameRow : batch) {
  332.             TaxonomyCommonName commonName = new TaxonomyCommonName(commonNameRow.getId());
  333.             commonName.setTaxonomyGenus(TaxonomyGenus.withId(commonNameRow.getTaxonomyGenusId()));
  334.             commonName.setTaxonomySpecies(TaxonomySpecies.withId(commonNameRow.getTaxonomySpeciesId()));
  335.             commonName.setLanguageDescription(commonNameRow.getLanguageDescription());
  336.             commonName.setAlternateTranscription(commonNameRow.getAlternateTranscription());
  337.             commonName.setName(commonNameRow.getName());
  338.             commonName.setSimplifiedName(commonNameRow.getSimplifiedName());
  339.             commonName.setNote(commonNameRow.getNote());
  340.             // commonNameRow.getCitationId();

  341.             // GG Audit
  342.             commonName.setOwnedDate(commonNameRow.getOwnedDate().toInstant(ZoneOffset.UTC));
  343.             commonName.setOwnedById(commonNameRow.getOwnedBy());
  344.             commonName.setCreatedDate(commonNameRow.getCreatedDate().toInstant(ZoneOffset.UTC));
  345.             commonName.setCreatedById(commonNameRow.getCreatedBy());
  346.             if (commonNameRow.getModifiedDate() != null) {
  347.                 commonName.setModifiedDate(commonNameRow.getModifiedDate().toInstant(ZoneOffset.UTC));
  348.             }
  349.             commonName.setModifiedById(commonNameRow.getModifiedBy());

  350.             toSave.add(commonName);
  351.         }

  352.         return toSave;
  353.     }

  354.     private List<TaxonomySpecies> updateSpecies(List<SpeciesRow> batch) {
  355.         log.info("Processing {} species", batch.size());
  356.         List<TaxonomySpecies> toSave = new ArrayList<>(batch.size());

  357.         for (SpeciesRow speciesRow : batch) {
  358.             TaxonomySpecies species = new TaxonomySpecies(speciesRow.getSpeciesId());
  359.             species.setCurrentTaxonomySpecies(TaxonomySpecies.withId(speciesRow.getCurrentTaxonomySpeciesId()));
  360.             species.setTaxonomyGenus(new TaxonomyGenus(speciesRow.getGenusId()));
  361.             species.setName(speciesRow.getName());
  362.             species.setNameAuthority(speciesRow.getNameAuthority());

  363.             species.setNomenNumber(speciesRow.getNomenNumber().intValue());
  364.             species.setIsSpecificHybrid(speciesRow.getIsSpecificHybrid());
  365.             species.setSpeciesName(speciesRow.getSpeciesName());
  366.             species.setSpeciesAuthority(speciesRow.getSpeciesAuthority());
  367.             species.setIsSubspecificHybrid(speciesRow.getIsSubspecificHybrid());
  368.             species.setSubspeciesName(speciesRow.getSubspeciesName());
  369.             species.setSubspeciesAuthority(speciesRow.getSubspeciesAuthority());
  370.             species.setIsVarietalHybrid(speciesRow.getIsVarietalHybrid());
  371.             species.setVarietyName(speciesRow.getVarietyName());
  372.             species.setVarietyAuthority(speciesRow.getVarietyAuthority());
  373.             species.setIsSubvarietalHybrid(speciesRow.getIsSubvarietalHybrid());
  374.             species.setSubvarietyName(speciesRow.getSubvarietyName());
  375.             species.setSubvarietyAuthority(speciesRow.getSubvarietyAuthority());
  376.             species.setIsFormaHybrid(speciesRow.getIsFormaHybrid());
  377.             species.setFormaRankType(speciesRow.getFormaRankType());
  378.             species.setFormaName(speciesRow.getFormaName());
  379.             species.setFormaAuthority(speciesRow.getFormaAuthority());
  380.             // species.setPrioritySite1(speciesRow.getPrioritySite1());
  381.             // species.setPrioritySite2(speciesRow.getPrioritySite2());
  382.             // species.setCurator1Id(speciesRow.getCurator1Id());
  383.             // species.setCurator2Id(speciesRow.getCurator2Id());
  384.             species.setRestrictionCode(speciesRow.getRestrictionCode());
  385.             species.setLifeFormCode(speciesRow.getLifeFormCode());
  386.             species.setCommonFertilizationCode(speciesRow.getCommonFertilizationCode());
  387.             species.setIsNamePending(speciesRow.getIsNamePending());
  388.             species.setSynonymCode(speciesRow.getSynonymCode());
  389.             // species.setVerifierCooperator(speciesRow.getVerifierId());
  390.             if (speciesRow.getNameVerifiedDate() != null) {
  391.                 species.setNameVerifiedDate(speciesRow.getNameVerifiedDate().toInstant(ZoneOffset.UTC));
  392.             }

  393.             species.setProtologue(speciesRow.getProtologue());
  394.             species.setProtologueVirtualPath(speciesRow.getProtologueVirtualPath());
  395.             species.setNote(speciesRow.getNote());
  396.             species.setSiteNote(speciesRow.getSiteNote());
  397.             species.setAlternateName(speciesRow.getAlternateName());

  398.             // GG Audit
  399.             species.setOwnedDate(speciesRow.getOwnedDate().toInstant(ZoneOffset.UTC));
  400.             species.setOwnedById(speciesRow.getOwnedBy());
  401.             species.setCreatedDate(speciesRow.getCreatedDate().toInstant(ZoneOffset.UTC));
  402.             species.setCreatedById(speciesRow.getCreatedBy());
  403.             if (speciesRow.getModifiedDate() != null) {
  404.                 species.setModifiedDate(speciesRow.getModifiedDate().toInstant(ZoneOffset.UTC));
  405.             }
  406.             species.setModifiedById(speciesRow.getModifiedBy());

  407.             toSave.add(species);
  408.         }

  409.         return toSave;
  410.     }

  411.     private List<TaxonomyGenus> updateGenera(List<GenusRow> batch) {
  412.         log.info("Processing {} genera", batch.size());
  413.         List<TaxonomyGenus> toSave = new ArrayList<>(batch.size());

  414.         for (GenusRow genusRow : batch) {
  415.             TaxonomyGenus genus = new TaxonomyGenus(genusRow.getGenusId());
  416.             genus.setCurrentTaxonomyGenus(TaxonomyGenus.withId(genusRow.getCurrentTaxonomyGenusId()));

  417.             genus.setGenusName(genusRow.getGenusName());
  418.             genus.setGenusAuthority(genusRow.getGenusAuthority());
  419.             genus.setSubgenusName(genusRow.getSubgenusName());
  420.             genus.setSectionName(genusRow.getSectionName());
  421.             genus.setSubsectionName(genusRow.getSubsectionName());
  422.             genus.setSeriesName(genusRow.getSeriesName());
  423.             genus.setSubseriesName(genusRow.getSubseriesName());
  424.             genus.setTaxonomyFamily(new TaxonomyFamily(genusRow.getTaxonomyFamilyId()));

  425.             genus.setGenusName(genusRow.getGenusName());
  426.             genus.setGenusAuthority(genusRow.getGenusAuthority());
  427.             genus.setSubgenusName(genusRow.getSubgenusName());
  428.             genus.setSectionName(genusRow.getSectionName());
  429.             genus.setSubsectionName(genusRow.getSubsectionName());
  430.             genus.setSeriesName(genusRow.getSeriesName());
  431.             genus.setSubseriesName(genusRow.getSubseriesName());

  432.             genus.setQualifyingCode(genusRow.getQualifyingCode());
  433.             genus.setHybridCode(genusRow.getHybridCode());
  434.             genus.setNote(genusRow.getNote());

  435.             // GG Audit
  436.             genus.setOwnedDate(genusRow.getOwnedDate().toInstant(ZoneOffset.UTC));
  437.             genus.setOwnedById(genusRow.getOwnedBy());
  438.             genus.setCreatedDate(genusRow.getCreatedDate().toInstant(ZoneOffset.UTC));
  439.             genus.setCreatedById(genusRow.getCreatedBy());
  440.             if (genusRow.getModifiedDate() != null) {
  441.                 genus.setModifiedDate(genusRow.getModifiedDate().toInstant(ZoneOffset.UTC));
  442.             }
  443.             genus.setModifiedById(genusRow.getModifiedBy());

  444.             toSave.add(genus);
  445.         }
  446.         return toSave;
  447.     }

  448.     static boolean downloadDataIfNeeded(File folder) throws IOException {
  449.         final File dataFolder = folder;
  450.         if (!dataFolder.exists()) {
  451.             log.warn("Making directory " + dataFolder.getAbsolutePath());
  452.             if (!dataFolder.mkdirs()) {
  453.                 throw new IOException("Failed to create data folder at " + dataFolder.getAbsolutePath());
  454.             }
  455.         }

  456.         // The two required files
  457.         final File genusFile = new File(dataFolder, "taxonomy_genus.txt");
  458.         final File speciesFile = new File(dataFolder, "taxonomy_species.txt");

  459.         boolean needsDownload = (!genusFile.exists() || isTooOld(genusFile)) || (!speciesFile.exists() || isTooOld(speciesFile));

  460.         if (needsDownload) {
  461.             log.warn("Taxonomy data not provided or too old in {}, starting download", dataFolder.getAbsolutePath());
  462.             final TaxonomyDownloader dl = new TaxonomyDownloader();

  463.             log.warn("Downloading GRIN-Taxonomy database to {}", dataFolder.getAbsolutePath());
  464.             final File downloadedCabFile = File.createTempFile("grin-", ".cab");
  465.             dl.downloadCurrent(downloadedCabFile);

  466.             TaxonomyDownloader.unpackCabinetFile(downloadedCabFile, dataFolder, false);
  467.             if (downloadedCabFile.exists() && downloadedCabFile.canWrite()) {
  468.                 log.warn("Deleting downloaded file {}", downloadedCabFile.getAbsolutePath());
  469.                 FileUtils.forceDelete(downloadedCabFile);
  470.             }
  471.             return true;
  472.         }
  473.         return false;
  474.     }

  475.     private static boolean isTooOld(File theFile) {
  476.         try {
  477.             BasicFileAttributes attr = Files.readAttributes(theFile.toPath(), BasicFileAttributes.class);
  478.             FileTime fileTime = attr.creationTime();
  479.             boolean isOld = fileTime.toInstant().isBefore(Instant.now().minus(5, ChronoUnit.DAYS));
  480.             log.warn("{} created {} is old={}", theFile.getName(), fileTime.toInstant(), isOld);
  481.             return isOld;
  482.         } catch (IOException e) {
  483.             log.warn("Could not determine age: {}", e.getMessage());
  484.             return false;
  485.         }
  486.     }
  487. }