ElasticsearchService.java

/*
 * Copyright 2022 Global Crop Diversity Trust
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.genesys.server.service;

import java.beans.Transient;
import java.io.Serializable;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;

import org.elasticsearch.index.query.QueryBuilder;
import org.genesys.blocks.model.EmptyModel;
import org.genesys.blocks.model.filters.EmptyModelFilter;
import org.genesys.server.service.AccessionService.IBatchAction;
import org.genesys.server.exception.SearchException;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.Pageable;

import com.fasterxml.jackson.annotation.JsonInclude;
import com.querydsl.core.types.Predicate;
import com.querydsl.core.types.dsl.SetPath;

/**
 * The Interface ElasticsearchService.
 */
public interface ElasticsearchService {
	
	/**
	 * Makes sure indices are ready and up-to-date on startup.
	 *
	 * @param <R> the generic type
	 * @param clazz the clazz
	 */
	<R extends EmptyModel> void indexEntity(Class<R> clazz);

	/**
	 * Index entity.
	 *
	 * @param <R> the generic type
	 * @param clazz the model class
	 * @param reindexBatchSize custom batch size
	 */
	<R extends EmptyModel> void indexEntity(Class<R> clazz, int reindexBatchSize);

	/**
	 * Reindex.
	 *
	 * @param <R> the generic type
	 * @param clazz the clazz
	 */
	<R> void reindex(Class<R> clazz);

	/**
	 * Update.
	 *
	 * @param <R> the generic type
	 * @param clazz the clazz
	 * @param ids the ids
	 */
	<R> void update(Class<R> clazz, Collection<Long> ids);

	/**
	 * Async update.
	 *
	 * @param <R> the generic type
	 * @param clazz the clazz
	 * @param bucket the bucket
	 */
	<R> void asyncUpdate(Class<R> clazz, Collection<Long> bucket);

	/**
	 * Search single entity.
	 *
	 * @param shouldMatch the should match
	 * @param searchQuery the search query
	 * @param clazz the clazz
	 * @return the list
	 */
	<T extends EmptyModel> List<T> search(QueryBuilder shouldMatch, String searchQuery, Class<T> clazz);

	/**
	 * Search using query for selected entity types.
	 *
	 * @param shouldMatch Any additional filters that should match
	 * @param searchQuery the search query
	 * @param clazzes the clazzes
	 * @return the map
	 */
	<T extends EmptyModel> Map<Class<? extends EmptyModel>, List<? extends EmptyModel>> search(QueryBuilder shouldMatch, String searchQuery, Set<Class<? extends EmptyModel>> clazzes);

	/**
	 * Term statistics auto.
	 * @param filters the filters
	 * @param instcode the instcode
	 * @param i the i
	 *
	 * @return the term result
	 * @throws SearchException the search exception
	 */
	TermResult termStatisticsAuto(Class<? extends EmptyModel> clazz, EmptyModelFilter<?, ?> filters, int size, String term) throws SearchException;
	Map<String, TermResult> termStatisticsAuto(Class<? extends EmptyModel> clazz, EmptyModelFilter<?, ?> filters, int size, String... terms) throws SearchException;

	TreeNode treeNodeStatistics(Class<? extends EmptyModel> clazz, EmptyModelFilter<?, ?> filters, String[] terms) throws SearchException;

	TermResult termStatistics(Class<? extends EmptyModel> clazz, EmptyModelFilter<?, ?> filters, int size, String term) throws SearchException;
	Map<String, TermResult> termStatistics(Class<? extends EmptyModel> clazz, EmptyModelFilter<?, ?> filters, int size, String... terms) throws SearchException;

	void realias(String aliasName, String indexName);

	void addAlias(String aliasName, String indexName);

	void deleteAlias(String aliasName);

	void deleteIndex(String indexName);

	void reindexAll();

	void stopReindex();

	void stopReindexAll();
	void allowReindexAll();


	TermResult recountResult(Class<? extends EmptyModel> clazz, SetPath<?, ?> setPath, EmptyModelFilter<?, ?> filter, TermResult toRecount, String termName) throws ExecutionException, InterruptedException, SearchException;

	<T extends EmptyModel> void reindex(Class<T> clazz, EmptyModelFilter<?, ?> filter);

	List<Class<?>> getIndexedEntities();

	long count(Class<? extends EmptyModel> clazz, EmptyModelFilter<?, ?> filter) throws SearchException;

	/**
	 * Count missing values for all fields of specified class.
	 * @param clazz the index class
	 * @param filter the EmptyModelFilter<?, ?> filter
	 * @return the map of all JSON paths with their missing value
	 */
	Map<String, Long> countMissingValues(Class<? extends EmptyModel> clazz, EmptyModelFilter<?, ?> filter) throws SearchException;

	/**
	 * Aggregate by date
	 * @param size max size of results to be returned
	 * @param targetClass the target entity class
	 * @param indexClass the index class
	 * @param aggregatedDateField the name of date field to aggregate
	 * @param groupingByField the name of field to grouping
	 * @param filter the EmptyModelFilter<?, ?> filter
	 * @return the result list
	 */
	List<Object[]> aggregateDate(int size, Class<? extends EmptyModel> targetClass, Class<? extends EmptyModel> indexClass, String aggregatedDateField, String groupingByField, EmptyModelFilter<?, ?> filter) throws SearchException;

	public static class TermResult implements Serializable {
		private static final long serialVersionUID = -6646063484562660447L;
		private final List<Term> terms;
		private final Long total;
		private final long other;
		private Long missing;

		public TermResult(String name, Long total, List<Term> terms, long other) {
			this.terms = terms;
			this.total = total;
			this.other = other;
			this.missing = total - terms.stream().map(Term::getCount).mapToLong(Long::longValue).sum() - other;
			if (missing <= 0) {
				// We have some terms (storage) where the total is not the total number of all
				// storage options, but count of accessions 
				missing = null;
			}
		}

		public List<Term> getTerms() {
			return terms;
		}
		
		public Long getTotal() {
			return total;
		}
		
		@Transient
		public Long getTotalCount() {
			return total;
		}
		
		public long getOther() {
			return other;
		}
		
		public Long getMissing() {
			return missing;
		}
	}

	public static class Term implements Serializable {
		private static final long serialVersionUID = -4161698220975370044L;
		private final String term;
		private final long count;

		public Term(String term, long count) {
			this.term = term;
			this.count = count;
		}

		public String getTerm() {
			return term;
		}

		public long getCount() {
			return count;
		}
	}

	public static class TreeNode implements Serializable {
		private static final long serialVersionUID = 4878674372987777983L;
		public String groupBy; // ES aggregation group
		public String name; // term value
		public long value; // term count
		public List<TreeNode> children;
		public Object filter; // filters
		@JsonInclude(JsonInclude.Include.NON_EMPTY)
		public String filterCode; // filter code
		
		public TreeNode(String groupBy, String name, long value, List<TreeNode> children, Object filter) {
			this.groupBy = groupBy;
			this.name = name;
			this.value = value;
			this.children = children;
			this.filter = filter;
		}

		public String getName() {
			return name;
		}

		public long getValue() {
			return value;
		}

		public List<TreeNode> getChildren() {
			return children;
		}

		public Object getFilter() {
			return filter;
		}
	}

	/**
	 * Make a full-text search for top 5 hits for each of the indexed entities.
	 *
	 * @param text the search text
	 * @return map
	 * @throws SearchException
	 */
	Map<String, List<? extends EmptyModel>> fullTextSearch(String text) throws SearchException;

	<T extends EmptyModel> List<T> find(Class<T> clazz, EmptyModelFilter<?, ?> filter) throws SearchException;

	<T extends EmptyModel> Page<T> findAll(Class<T> clazz, EmptyModelFilter<?, ?> filter, Pageable page) throws SearchException;

	<T extends EmptyModel> Page<T> findAll(Class<T> clazz, EmptyModelFilter<?, ?> filter, Predicate predicate, Pageable page) throws SearchException;

	/**
	 * The usual search, but with a custom entity loader
	 */
	<T extends EmptyModel> Page<T> findAll(Class<T> clazz, EmptyModelFilter<?, ?> filter, Predicate predicate, Pageable page, IEntityLoader<T> entityLoader, String... boostFields) throws SearchException;

	Number[][] getAccessionGeoBounds(EmptyModelFilter<?, ?> filter) throws SearchException;

	public static interface IEntityLoader<T> {
		List<T> loadEntities(List<Long> entityIds);
	}

	/**
	 * Wrapper for search results
	 */
	public static class SearchResults<T extends EmptyModel> {
		public List<String> filters;
		public String key = "uuid";
		public List<T> hits;

		public static <T extends EmptyModel> SearchResults<T> from(String key, List<String> filters, List<T> list) {
			if (list == null || list.isEmpty())
				return null;

			SearchResults<T> sr = new SearchResults<T>();
			sr.filters = filters;
			sr.key = key;
			sr.hits = list;
			return sr;
		}
	}

	List<Double[]> distinctCoordinates(Predicate filt, String _text) throws SearchException;

	/*
	 * Scroll search and process entity records in a batch action.
	 */
	<T extends EmptyModel> void process(Class<T> clazz, EmptyModelFilter<?, ?> filter, IBatchAction<T> action, Long maxSize) throws Exception;

	/**
	 * Process entity IDs in a batch action.
	 */
	<T extends EmptyModel> void processById(Class<T> clazz, EmptyModelFilter<?, ?> filter, IBatchAction<Long> action, Pageable page) throws Exception, SearchException;

	/**
	 * Wait until X records match specified filter in ES.
	 * 
	 * @param clazz
	 * @param filter
	 * @param mustHaveCount
	 * @throws InterruptedException 
	 */
	long waitForCount(Class<? extends EmptyModel> clazz, EmptyModelFilter<?, ?> filter, int mustHaveCount) throws SearchException;

	/**
	 * Sets the batch size for reindexing. Some entities generate super large JSON and we want to 
	 * reindex those in much smaller batches.
	 *
	 * @param <R> the EmptyModel type
	 * @param model the document model
	 * @param batchSize the batch size
	 * @return the integer
	 */
	<R extends EmptyModel> Integer setReindexBatchSize(Class<R> model, Integer batchSize);

	/**
	 * Remove matching documents from index
	 * 
	 * @param clazz
	 * @param filter
	 */
	<T extends EmptyModel> void remove(Class<T> clazz, EmptyModelFilter<?, T> filter) throws SearchException;

	/**
	 * Delete all documents from the specified index
	 * 
	 * @param <R> the type
	 * @param clazz realias target
	 * @throws SearchException
	 */
	<R> void removeAll(Class<R> clazz) throws SearchException;

}