Loading src/main/java/io/confluent/connect/elasticsearch/internals/HttpClient.java→src/main/java/io/confluent/connect/elasticsearch/BulkIndexingClient.java +96 −0 Original line number Diff line number Diff line Loading @@ -13,83 +13,84 @@ * License for the specific language governing permissions and limitations under * the License. **/ package io.confluent.connect.elasticsearch; package io.confluent.connect.elasticsearch.internals; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.node.ObjectNode; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Set; import io.confluent.connect.elasticsearch.bulk.BulkClient; import io.confluent.connect.elasticsearch.bulk.BulkResponse; import io.searchbox.client.JestClient; import io.searchbox.client.JestResultHandler; import io.searchbox.core.Bulk; import io.searchbox.core.BulkResult; import io.searchbox.core.Index; public class HttpClient implements Client<Response> { public class BulkIndexingClient implements BulkClient<IndexingRequest, Bulk> { private static final Logger log = LoggerFactory.getLogger(HttpClient.class); private ObjectMapper objectMapper = new ObjectMapper(); private final JestClient jestClient; private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final Set<String> NON_RETRIABLE_ERROR_TYPES = Collections.singleton("mapper_parse_exception"); public HttpClient(JestClient jestClient) { this.jestClient = jestClient; } private final JestClient client; @Override public void execute(RecordBatch batch, Callback<Response> callback) { Bulk bulk = constructBulk(batch, callback); jestClient.executeAsync(bulk, new CallbackHandler(callback)); public BulkIndexingClient(JestClient client) { this.client = client; } @Override public void close() { // We shutdown the JEST client when sink tasks are stopped. } private Bulk constructBulk(RecordBatch batch, Callback<Response> callback) { Bulk.Builder builder = new Bulk.Builder(); List<ESRequest> requests = batch.requests(); for (ESRequest request: requests) { JsonNode data = null; try { data = objectMapper.readTree(request.getPayload()); } catch (IOException e) { callback.onFailure(e); } Index index = new Index.Builder(data.toString()) public Bulk bulkRequest(List<IndexingRequest> batch) { final Bulk.Builder builder = new Bulk.Builder(); for (IndexingRequest request : batch) { builder.addAction( new Index.Builder(request.getPayload()) .index(request.getIndex()) .type(request.getType()) .id(request.getId()) .build(); builder.addAction(index); .build() ); } return builder.build(); } private static class CallbackHandler implements JestResultHandler<BulkResult> { @Override public BulkResponse execute(Bulk bulk) throws IOException { return toBulkResponse(client.execute(bulk)); } private Callback<Response> callback; private static BulkResponse toBulkResponse(BulkResult result) { if (result.isSucceeded()) { return BulkResponse.success(); } public CallbackHandler(Callback<Response> callback) { this.callback = callback; final List<BulkResult.BulkResultItem> failedItems = result.getFailedItems(); if (failedItems.isEmpty()) { return BulkResponse.failure(true, result.getErrorMessage()); } @Override public void completed(BulkResult result) { log.debug("Request completed with result: {}", result); callback.onResponse(new Response(result)); boolean retriable = true; final List<String> errors = new ArrayList<>(failedItems.size()); for (BulkResult.BulkResultItem failedItem : failedItems) { errors.add(failedItem.error); retriable &= isRetriableError(failedItem.error); } return BulkResponse.failure(retriable, errors.toString()); } @Override public void failed(Exception e) { log.debug("Request failed with exception: {}", e.getMessage()); callback.onFailure(e); private static boolean isRetriableError(String error) { if (error != null && !error.trim().isEmpty()) { try { final ObjectNode parsedError = (ObjectNode) OBJECT_MAPPER.readTree(error); return !NON_RETRIABLE_ERROR_TYPES.contains(parsedError.get("type").asText()); } catch (IOException e) { return true; } } return true; } } src/main/java/io/confluent/connect/elasticsearch/DataConverter.java +8 −27 Original line number Diff line number Diff line Loading @@ -33,13 +33,13 @@ import org.apache.kafka.connect.storage.Converter; import java.io.IOException; import java.math.BigDecimal; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Map; import java.util.Set; import io.confluent.connect.elasticsearch.internals.ESRequest; import io.searchbox.client.JestClient; import static io.confluent.connect.elasticsearch.ElasticsearchSinkConnectorConstants.MAP_KEY; Loading @@ -53,14 +53,6 @@ public class DataConverter { JSON_CONVERTER.configure(Collections.singletonMap("schemas.enable", "false"), false); } /** * Convert the key to the string representation. * * @param key The key of a SinkRecord. * @param keySchema The key schema. * @return The string representation of the key. * @throws ConnectException if the key is null. */ public static String convertKey(Object key, Schema keySchema) { if (key == null) { throw new ConnectException("Key is used as document id and can not be null."); Loading @@ -86,26 +78,14 @@ public class DataConverter { } } /** * Convert a SinkRecord to an IndexRequest. * * @param record The SinkRecord to be converted. * @param client The client to connect to Elasticsearch. * @param ignoreKey Whether to ignore the key during indexing. * @param ignoreSchema Whether to ignore the schema during indexing. * @param topicConfigs The map of per topic configs. * @param mappings The mapping cache. * @return The converted IndexRequest. */ public static ESRequest convertRecord( public static IndexingRequest convertRecord( SinkRecord record, String type, JestClient client, boolean ignoreKey, boolean ignoreSchema, Map<String, TopicConfig> topicConfigs, Set<String> mappings) { Set<String> mappingCache) { String topic = record.topic(); int partition = record.kafkaPartition(); Loading Loading @@ -139,9 +119,9 @@ public class DataConverter { } try { if (!topicIgnoreSchema && !mappings.contains(index) && !Mapping.doesMappingExist(client, index, type, mappings)) { if (!topicIgnoreSchema && !mappingCache.contains(index) && !Mapping.doesMappingExist(client, index, type, mappingCache)) { Mapping.createMapping(client, index, type, valueSchema); mappings.add(index); mappingCache.add(index); } } catch (IOException e) { // TODO: It is possible that two clients are creating the mapping at the same time and Loading @@ -160,8 +140,9 @@ public class DataConverter { newValue = value; } byte[] json = JSON_CONVERTER.fromConnectData(topic, newSchema, newValue); return new ESRequest(index, type, id, json); String payload = new String(JSON_CONVERTER.fromConnectData(topic, newSchema, newValue), StandardCharsets.UTF_8); return new IndexingRequest(index, type, id, payload); } // We need to pre process the Kafka Connect schema before converting to JSON as Elasticsearch Loading src/main/java/io/confluent/connect/elasticsearch/ElasticsearchSinkConnectorConfig.java +3 −4 Original line number Diff line number Diff line Loading @@ -65,9 +65,8 @@ public class ElasticsearchSinkConnectorConfig extends AbstractConfig { public static final String MAX_BUFFERED_RECORDS_CONFIG = "max.buffered.records"; private static final String MAX_BUFFERED_RECORDS_DOC = "Approximately the max number of records each task will buffer. This config controls the memory usage for each task. When the number of " + "buffered records is larger than this value, the partitions assigned to this task will be paused."; private static final long MAX_BUFFERED_RECORDS_DEFAULT = 20000; "Approximately the max number of records each task will buffer. This config controls the memory usage for each task."; private static final int MAX_BUFFERED_RECORDS_DEFAULT = 20000; private static final String MAX_BUFFERED_RECORDS_DISPLAY = "Max Number of Records to Buffer"; public static final String BATCH_SIZE_CONFIG = "batch.size"; Loading Loading @@ -135,7 +134,7 @@ public class ElasticsearchSinkConnectorConfig extends AbstractConfig { .define(RETRY_BACKOFF_MS_CONFIG, Type.LONG, RETRY_BACKOFF_MS_DEFAULT, Importance.LOW, RETRY_BACKOFF_MS_DOC, CONNECTOR_GROUP, 11, Width.SHORT, RETRY_BACKOFF_MS_DISPLAY) .define(MAX_RETRIES_CONFIG, Type.INT, MAX_RETRIES_DEFAULT, Importance.LOW, MAX_RETRIES_DOC, CONNECTOR_GROUP, 12, Width.SHORT, MAX_RETRIES_DISPLAY) .define(FLUSH_TIMEOUT_MS_CONFIG, Type.LONG, FLUSH_TIMEOUT_MS_DEFAULT, Importance.LOW, FLUSH_TIMEOUT_MS_DOC, CONNECTOR_GROUP, 13, Width.SHORT, FLUSH_TIMEOUT_MS_DISPLAY) .define(MAX_BUFFERED_RECORDS_CONFIG, Type.LONG, MAX_BUFFERED_RECORDS_DEFAULT, Importance.LOW, MAX_BUFFERED_RECORDS_DOC, CONNECTOR_GROUP, 14, Width.SHORT, MAX_BUFFERED_RECORDS_DISPLAY); .define(MAX_BUFFERED_RECORDS_CONFIG, Type.INT, MAX_BUFFERED_RECORDS_DEFAULT, Importance.LOW, MAX_BUFFERED_RECORDS_DOC, CONNECTOR_GROUP, 14, Width.SHORT, MAX_BUFFERED_RECORDS_DISPLAY); } static ConfigDef config = baseConfigDef(); Loading src/main/java/io/confluent/connect/elasticsearch/ElasticsearchSinkTask.java +2 −2 Original line number Diff line number Diff line Loading @@ -68,7 +68,7 @@ public class ElasticsearchSinkTask extends SinkTask { Map<String, TopicConfig> topicConfigs = constructTopicConfig(topicIndex, topicIgnoreKey, topicIgnoreSchema); long flushTimeoutMs = config.getLong(ElasticsearchSinkConnectorConfig.FLUSH_TIMEOUT_MS_CONFIG); long maxBufferedRecords = config.getLong(ElasticsearchSinkConnectorConfig.MAX_BUFFERED_RECORDS_CONFIG); int maxBufferedRecords = config.getInt(ElasticsearchSinkConnectorConfig.MAX_BUFFERED_RECORDS_CONFIG); int batchSize = config.getInt(ElasticsearchSinkConnectorConfig.BATCH_SIZE_CONFIG); long lingerMs = config.getLong(ElasticsearchSinkConnectorConfig.LINGER_MS_CONFIG); int maxInFlightRequests = config.getInt(ElasticsearchSinkConnectorConfig.MAX_IN_FLIGHT_REQUESTS_CONFIG); Loading src/main/java/io/confluent/connect/elasticsearch/ElasticsearchWriter.java +34 −155 Original line number Diff line number Diff line Loading @@ -16,8 +16,8 @@ package io.confluent.connect.elasticsearch; import org.apache.kafka.common.utils.SystemTime; import org.apache.kafka.connect.errors.ConnectException; import org.apache.kafka.connect.errors.RetriableException; import org.apache.kafka.connect.sink.SinkRecord; import org.slf4j.Logger; import org.slf4j.LoggerFactory; Loading @@ -30,55 +30,27 @@ import java.util.HashSet; import java.util.Map; import java.util.Set; import io.confluent.connect.elasticsearch.internals.BulkProcessor; import io.confluent.connect.elasticsearch.internals.ESRequest; import io.confluent.connect.elasticsearch.internals.HttpClient; import io.confluent.connect.elasticsearch.internals.Listener; import io.confluent.connect.elasticsearch.internals.RecordBatch; import io.confluent.connect.elasticsearch.internals.Response; import io.confluent.connect.elasticsearch.bulk.BulkProcessor; import io.searchbox.action.Action; import io.searchbox.client.JestClient; import io.searchbox.client.JestResult; import io.searchbox.indices.CreateIndex; import io.searchbox.indices.IndicesExists; /** * The ElasticsearchWriter handles connections to Elasticsearch, sending data and flush. * Transport client is used to send requests to Elasticsearch cluster. Requests are batched * when sending to Elasticsearch. To ensure delivery guarantee and order, we retry in case of * failures for a batch. * * Currently, we only send out requests to Elasticsearch when flush is called, which is not * desirable from the latency point of view. * * TODO: Use offset as external version to fence requests with lower version. */ // TODO: Use offset as external version to fence requests with lower version. public class ElasticsearchWriter { private static final Logger log = LoggerFactory.getLogger(ElasticsearchWriter.class); private final JestClient client; private final BulkProcessor bulkProcessor; private final String type; private final boolean ignoreKey; private final boolean ignoreSchema; private final Map<String, TopicConfig> topicConfigs; private final long flushTimeoutMs; private final long maxBufferedRecords; private final Set<String> mappings; private final BulkProcessor<IndexingRequest, ?> bulkProcessor; /** * ElasticsearchWriter constructor * @param client The client to connect to Elasticsearch. * @param type The type to use when writing to Elasticsearch. * @param ignoreKey Whether to ignore key during indexing. * @param ignoreSchema Whether to ignore schema during indexing. * @param topicConfigs The map of per topic configs. * @param flushTimeoutMs The flush timeout. * @param maxBufferedRecords The max number of buffered records. * @param maxInFlightRequests The max number of inflight requests allowed. * @param batchSize Approximately the max number of records each writer will buffer. * @param lingerMs The time to wait before sending a batch. */ ElasticsearchWriter( JestClient client, String type, Loading @@ -86,28 +58,32 @@ public class ElasticsearchWriter { boolean ignoreSchema, Map<String, TopicConfig> topicConfigs, long flushTimeoutMs, long maxBufferedRecords, int maxBufferedRecords, int maxInFlightRequests, int batchSize, long lingerMs, int maxRetry, long retryBackoffMs) { int maxRetries, long retryBackoffMs ) { this.client = client; this.type = type; this.ignoreKey = ignoreKey; this.ignoreSchema = ignoreSchema; this.topicConfigs = topicConfigs == null ? Collections.<String, TopicConfig>emptyMap() : topicConfigs; this.flushTimeoutMs = flushTimeoutMs; this.maxBufferedRecords = maxBufferedRecords; // Start the BulkProcessor bulkProcessor = new BulkProcessor(new HttpClient(client), maxInFlightRequests, batchSize, lingerMs, maxRetry, retryBackoffMs, createDefaultListener()); //Create mapping cache mappings = new HashSet<>(); bulkProcessor = new BulkProcessor<>( new SystemTime(), new BulkIndexingClient(client), maxBufferedRecords, maxInFlightRequests, batchSize, lingerMs, maxRetries, retryBackoffMs ); } public static class Builder { Loading @@ -117,166 +93,88 @@ public class ElasticsearchWriter { private boolean ignoreSchema = false; private Map<String, TopicConfig> topicConfigs = new HashMap<>(); private long flushTimeoutMs; private long maxBufferedRecords; private int maxBufferedRecords; private int maxInFlightRequests; private int batchSize; private long lingerMs; private int maxRetry; private long retryBackoffMs; /** * Constructor of ElasticsearchWriter Builder. * @param client The client to connect to Elasticsearch. */ public Builder(JestClient client) { this.client = client; } /** * Set the index. * @param type The type to use for each index. * @return an instance of ElasticsearchWriter Builder. */ public Builder setType(String type) { this.type = type; return this; } /** * Set whether to ignore key during indexing. * @param ignoreKey Whether to ignore key. * @return an instance of ElasticsearchWriter Builder. */ public Builder setIgnoreKey(boolean ignoreKey) { this.ignoreKey = ignoreKey; return this; } /** * Set whether to ignore schema during indexing. * @param ignoreSchema Whether to ignore key. * @return an instance of ElasticsearchWriter Builder. */ public Builder setIgnoreSchema(boolean ignoreSchema) { this.ignoreSchema = ignoreSchema; return this; } /** * Set per topic configurations. * @param topicConfigs The map of per topic configuration. * @return an instance of ElasticsearchWriter Builder. */ public Builder setTopicConfigs(Map<String, TopicConfig> topicConfigs) { this.topicConfigs = topicConfigs; return this; } /** * Set the flush timeout. * @param flushTimeoutMs The flush timeout in milliseconds. * @return an instance of ElasticsearchWriter Builder. */ public Builder setFlushTimoutMs(long flushTimeoutMs) { this.flushTimeoutMs = flushTimeoutMs; return this; } /** * Set the max number of records to buffer for each writer. * @param maxBufferedRecords The max number of buffered records. * @return an instance of ElasticsearchWriter Builder. */ public Builder setMaxBufferedRecords(long maxBufferedRecords) { public Builder setMaxBufferedRecords(int maxBufferedRecords) { this.maxBufferedRecords = maxBufferedRecords; return this; } /** * Set the max number of inflight requests. * @param maxInFlightRequests The max allowed number of inflight requests. * @return an instance of ElasticsearchWriter Builder. */ public Builder setMaxInFlightRequests(int maxInFlightRequests) { this.maxInFlightRequests = maxInFlightRequests; return this; } /** * Set the number of requests to process as a batch when writing. * to Elasticsearch. * @param batchSize the size of each batch. * @return an instance of ElasticsearchWriter Builder. */ public Builder setBatchSize(int batchSize) { this.batchSize = batchSize; return this; } /** * Set the linger time. * @param lingerMs The linger time to use in milliseconds. * @return an instance of ElasticsearchWriter Builder. */ public Builder setLingerMs(long lingerMs) { this.lingerMs = lingerMs; return this; } /** * Set the max retry for a batch * @param maxRetry The number of max retry. * @return an instance of ElasticsearchWriter Builder. */ public Builder setMaxRetry(int maxRetry) { this.maxRetry = maxRetry; return this; } /** * Set the retry backoff. * @param retryBackoffMs The retry backoff in milliseconds. * @return an instance of ElasticsearchWriter Builder. */ public Builder setRetryBackoffMs(long retryBackoffMs) { this.retryBackoffMs = retryBackoffMs; return this; } /** * Build the ElasticsearchWriter. * @return an instance of ElasticsearchWriter. */ public ElasticsearchWriter build() { return new ElasticsearchWriter( client, type, ignoreKey, ignoreSchema, topicConfigs, flushTimeoutMs, maxBufferedRecords, maxInFlightRequests, batchSize, lingerMs, maxRetry, retryBackoffMs); client, type, ignoreKey, ignoreSchema, topicConfigs, flushTimeoutMs, maxBufferedRecords, maxInFlightRequests, batchSize, lingerMs, maxRetry, retryBackoffMs ); } } public void write(Collection<SinkRecord> records) { if (bulkProcessor.getException() != null) { throw new ConnectException("BulkProcessor failed with non-retriable exception", bulkProcessor.getException()); } if (bulkProcessor.getTotalBufferedRecords() + records.size() > maxBufferedRecords) { throw new RetriableException("Exceeded max number of buffered records: " + maxBufferedRecords); } for (SinkRecord record : records) { ESRequest request = DataConverter.convertRecord(record, type, client, ignoreKey, ignoreSchema, topicConfigs, mappings); IndexingRequest request = DataConverter.convertRecord(record, type, client, ignoreKey, ignoreSchema, topicConfigs, mappings); bulkProcessor.add(request); } } public void flush() { try { if (!bulkProcessor.flush(flushTimeoutMs)) { throw new ConnectException("Cannot finish flush messages within " + flushTimeoutMs); } } catch (InterruptedException e) { Thread.currentThread().interrupt(); } catch (Throwable t) { throw new ConnectException("Flush failed with non retriable exception.", t); } bulkProcessor.flush(flushTimeoutMs); } public void start() { Loading @@ -284,14 +182,13 @@ public class ElasticsearchWriter { } public void stop() { bulkProcessor.stop(); try { bulkProcessor.awaitStop(flushTimeoutMs); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } catch (Throwable t) { throw new ConnectException("Close failed with non retriable exception", t); bulkProcessor.flush(flushTimeoutMs); } catch (Exception e) { log.warn("Failed to flush during stop", e); } bulkProcessor.stop(); bulkProcessor.awaitStop(flushTimeoutMs); } private boolean indexExists(String index) { Loading Loading @@ -329,22 +226,4 @@ public class ElasticsearchWriter { } } private Listener createDefaultListener() { return new Listener() { @Override public void beforeBulk(long executionId, RecordBatch batch) { } @Override public void afterBulk(long executionId, RecordBatch batch, Response response) { } @Override public void afterBulk(long executionId, RecordBatch batch, Throwable failure) { } }; } } Loading
src/main/java/io/confluent/connect/elasticsearch/internals/HttpClient.java→src/main/java/io/confluent/connect/elasticsearch/BulkIndexingClient.java +96 −0 Original line number Diff line number Diff line Loading @@ -13,83 +13,84 @@ * License for the specific language governing permissions and limitations under * the License. **/ package io.confluent.connect.elasticsearch; package io.confluent.connect.elasticsearch.internals; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.node.ObjectNode; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Set; import io.confluent.connect.elasticsearch.bulk.BulkClient; import io.confluent.connect.elasticsearch.bulk.BulkResponse; import io.searchbox.client.JestClient; import io.searchbox.client.JestResultHandler; import io.searchbox.core.Bulk; import io.searchbox.core.BulkResult; import io.searchbox.core.Index; public class HttpClient implements Client<Response> { public class BulkIndexingClient implements BulkClient<IndexingRequest, Bulk> { private static final Logger log = LoggerFactory.getLogger(HttpClient.class); private ObjectMapper objectMapper = new ObjectMapper(); private final JestClient jestClient; private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final Set<String> NON_RETRIABLE_ERROR_TYPES = Collections.singleton("mapper_parse_exception"); public HttpClient(JestClient jestClient) { this.jestClient = jestClient; } private final JestClient client; @Override public void execute(RecordBatch batch, Callback<Response> callback) { Bulk bulk = constructBulk(batch, callback); jestClient.executeAsync(bulk, new CallbackHandler(callback)); public BulkIndexingClient(JestClient client) { this.client = client; } @Override public void close() { // We shutdown the JEST client when sink tasks are stopped. } private Bulk constructBulk(RecordBatch batch, Callback<Response> callback) { Bulk.Builder builder = new Bulk.Builder(); List<ESRequest> requests = batch.requests(); for (ESRequest request: requests) { JsonNode data = null; try { data = objectMapper.readTree(request.getPayload()); } catch (IOException e) { callback.onFailure(e); } Index index = new Index.Builder(data.toString()) public Bulk bulkRequest(List<IndexingRequest> batch) { final Bulk.Builder builder = new Bulk.Builder(); for (IndexingRequest request : batch) { builder.addAction( new Index.Builder(request.getPayload()) .index(request.getIndex()) .type(request.getType()) .id(request.getId()) .build(); builder.addAction(index); .build() ); } return builder.build(); } private static class CallbackHandler implements JestResultHandler<BulkResult> { @Override public BulkResponse execute(Bulk bulk) throws IOException { return toBulkResponse(client.execute(bulk)); } private Callback<Response> callback; private static BulkResponse toBulkResponse(BulkResult result) { if (result.isSucceeded()) { return BulkResponse.success(); } public CallbackHandler(Callback<Response> callback) { this.callback = callback; final List<BulkResult.BulkResultItem> failedItems = result.getFailedItems(); if (failedItems.isEmpty()) { return BulkResponse.failure(true, result.getErrorMessage()); } @Override public void completed(BulkResult result) { log.debug("Request completed with result: {}", result); callback.onResponse(new Response(result)); boolean retriable = true; final List<String> errors = new ArrayList<>(failedItems.size()); for (BulkResult.BulkResultItem failedItem : failedItems) { errors.add(failedItem.error); retriable &= isRetriableError(failedItem.error); } return BulkResponse.failure(retriable, errors.toString()); } @Override public void failed(Exception e) { log.debug("Request failed with exception: {}", e.getMessage()); callback.onFailure(e); private static boolean isRetriableError(String error) { if (error != null && !error.trim().isEmpty()) { try { final ObjectNode parsedError = (ObjectNode) OBJECT_MAPPER.readTree(error); return !NON_RETRIABLE_ERROR_TYPES.contains(parsedError.get("type").asText()); } catch (IOException e) { return true; } } return true; } }
src/main/java/io/confluent/connect/elasticsearch/DataConverter.java +8 −27 Original line number Diff line number Diff line Loading @@ -33,13 +33,13 @@ import org.apache.kafka.connect.storage.Converter; import java.io.IOException; import java.math.BigDecimal; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Map; import java.util.Set; import io.confluent.connect.elasticsearch.internals.ESRequest; import io.searchbox.client.JestClient; import static io.confluent.connect.elasticsearch.ElasticsearchSinkConnectorConstants.MAP_KEY; Loading @@ -53,14 +53,6 @@ public class DataConverter { JSON_CONVERTER.configure(Collections.singletonMap("schemas.enable", "false"), false); } /** * Convert the key to the string representation. * * @param key The key of a SinkRecord. * @param keySchema The key schema. * @return The string representation of the key. * @throws ConnectException if the key is null. */ public static String convertKey(Object key, Schema keySchema) { if (key == null) { throw new ConnectException("Key is used as document id and can not be null."); Loading @@ -86,26 +78,14 @@ public class DataConverter { } } /** * Convert a SinkRecord to an IndexRequest. * * @param record The SinkRecord to be converted. * @param client The client to connect to Elasticsearch. * @param ignoreKey Whether to ignore the key during indexing. * @param ignoreSchema Whether to ignore the schema during indexing. * @param topicConfigs The map of per topic configs. * @param mappings The mapping cache. * @return The converted IndexRequest. */ public static ESRequest convertRecord( public static IndexingRequest convertRecord( SinkRecord record, String type, JestClient client, boolean ignoreKey, boolean ignoreSchema, Map<String, TopicConfig> topicConfigs, Set<String> mappings) { Set<String> mappingCache) { String topic = record.topic(); int partition = record.kafkaPartition(); Loading Loading @@ -139,9 +119,9 @@ public class DataConverter { } try { if (!topicIgnoreSchema && !mappings.contains(index) && !Mapping.doesMappingExist(client, index, type, mappings)) { if (!topicIgnoreSchema && !mappingCache.contains(index) && !Mapping.doesMappingExist(client, index, type, mappingCache)) { Mapping.createMapping(client, index, type, valueSchema); mappings.add(index); mappingCache.add(index); } } catch (IOException e) { // TODO: It is possible that two clients are creating the mapping at the same time and Loading @@ -160,8 +140,9 @@ public class DataConverter { newValue = value; } byte[] json = JSON_CONVERTER.fromConnectData(topic, newSchema, newValue); return new ESRequest(index, type, id, json); String payload = new String(JSON_CONVERTER.fromConnectData(topic, newSchema, newValue), StandardCharsets.UTF_8); return new IndexingRequest(index, type, id, payload); } // We need to pre process the Kafka Connect schema before converting to JSON as Elasticsearch Loading
src/main/java/io/confluent/connect/elasticsearch/ElasticsearchSinkConnectorConfig.java +3 −4 Original line number Diff line number Diff line Loading @@ -65,9 +65,8 @@ public class ElasticsearchSinkConnectorConfig extends AbstractConfig { public static final String MAX_BUFFERED_RECORDS_CONFIG = "max.buffered.records"; private static final String MAX_BUFFERED_RECORDS_DOC = "Approximately the max number of records each task will buffer. This config controls the memory usage for each task. When the number of " + "buffered records is larger than this value, the partitions assigned to this task will be paused."; private static final long MAX_BUFFERED_RECORDS_DEFAULT = 20000; "Approximately the max number of records each task will buffer. This config controls the memory usage for each task."; private static final int MAX_BUFFERED_RECORDS_DEFAULT = 20000; private static final String MAX_BUFFERED_RECORDS_DISPLAY = "Max Number of Records to Buffer"; public static final String BATCH_SIZE_CONFIG = "batch.size"; Loading Loading @@ -135,7 +134,7 @@ public class ElasticsearchSinkConnectorConfig extends AbstractConfig { .define(RETRY_BACKOFF_MS_CONFIG, Type.LONG, RETRY_BACKOFF_MS_DEFAULT, Importance.LOW, RETRY_BACKOFF_MS_DOC, CONNECTOR_GROUP, 11, Width.SHORT, RETRY_BACKOFF_MS_DISPLAY) .define(MAX_RETRIES_CONFIG, Type.INT, MAX_RETRIES_DEFAULT, Importance.LOW, MAX_RETRIES_DOC, CONNECTOR_GROUP, 12, Width.SHORT, MAX_RETRIES_DISPLAY) .define(FLUSH_TIMEOUT_MS_CONFIG, Type.LONG, FLUSH_TIMEOUT_MS_DEFAULT, Importance.LOW, FLUSH_TIMEOUT_MS_DOC, CONNECTOR_GROUP, 13, Width.SHORT, FLUSH_TIMEOUT_MS_DISPLAY) .define(MAX_BUFFERED_RECORDS_CONFIG, Type.LONG, MAX_BUFFERED_RECORDS_DEFAULT, Importance.LOW, MAX_BUFFERED_RECORDS_DOC, CONNECTOR_GROUP, 14, Width.SHORT, MAX_BUFFERED_RECORDS_DISPLAY); .define(MAX_BUFFERED_RECORDS_CONFIG, Type.INT, MAX_BUFFERED_RECORDS_DEFAULT, Importance.LOW, MAX_BUFFERED_RECORDS_DOC, CONNECTOR_GROUP, 14, Width.SHORT, MAX_BUFFERED_RECORDS_DISPLAY); } static ConfigDef config = baseConfigDef(); Loading
src/main/java/io/confluent/connect/elasticsearch/ElasticsearchSinkTask.java +2 −2 Original line number Diff line number Diff line Loading @@ -68,7 +68,7 @@ public class ElasticsearchSinkTask extends SinkTask { Map<String, TopicConfig> topicConfigs = constructTopicConfig(topicIndex, topicIgnoreKey, topicIgnoreSchema); long flushTimeoutMs = config.getLong(ElasticsearchSinkConnectorConfig.FLUSH_TIMEOUT_MS_CONFIG); long maxBufferedRecords = config.getLong(ElasticsearchSinkConnectorConfig.MAX_BUFFERED_RECORDS_CONFIG); int maxBufferedRecords = config.getInt(ElasticsearchSinkConnectorConfig.MAX_BUFFERED_RECORDS_CONFIG); int batchSize = config.getInt(ElasticsearchSinkConnectorConfig.BATCH_SIZE_CONFIG); long lingerMs = config.getLong(ElasticsearchSinkConnectorConfig.LINGER_MS_CONFIG); int maxInFlightRequests = config.getInt(ElasticsearchSinkConnectorConfig.MAX_IN_FLIGHT_REQUESTS_CONFIG); Loading
src/main/java/io/confluent/connect/elasticsearch/ElasticsearchWriter.java +34 −155 Original line number Diff line number Diff line Loading @@ -16,8 +16,8 @@ package io.confluent.connect.elasticsearch; import org.apache.kafka.common.utils.SystemTime; import org.apache.kafka.connect.errors.ConnectException; import org.apache.kafka.connect.errors.RetriableException; import org.apache.kafka.connect.sink.SinkRecord; import org.slf4j.Logger; import org.slf4j.LoggerFactory; Loading @@ -30,55 +30,27 @@ import java.util.HashSet; import java.util.Map; import java.util.Set; import io.confluent.connect.elasticsearch.internals.BulkProcessor; import io.confluent.connect.elasticsearch.internals.ESRequest; import io.confluent.connect.elasticsearch.internals.HttpClient; import io.confluent.connect.elasticsearch.internals.Listener; import io.confluent.connect.elasticsearch.internals.RecordBatch; import io.confluent.connect.elasticsearch.internals.Response; import io.confluent.connect.elasticsearch.bulk.BulkProcessor; import io.searchbox.action.Action; import io.searchbox.client.JestClient; import io.searchbox.client.JestResult; import io.searchbox.indices.CreateIndex; import io.searchbox.indices.IndicesExists; /** * The ElasticsearchWriter handles connections to Elasticsearch, sending data and flush. * Transport client is used to send requests to Elasticsearch cluster. Requests are batched * when sending to Elasticsearch. To ensure delivery guarantee and order, we retry in case of * failures for a batch. * * Currently, we only send out requests to Elasticsearch when flush is called, which is not * desirable from the latency point of view. * * TODO: Use offset as external version to fence requests with lower version. */ // TODO: Use offset as external version to fence requests with lower version. public class ElasticsearchWriter { private static final Logger log = LoggerFactory.getLogger(ElasticsearchWriter.class); private final JestClient client; private final BulkProcessor bulkProcessor; private final String type; private final boolean ignoreKey; private final boolean ignoreSchema; private final Map<String, TopicConfig> topicConfigs; private final long flushTimeoutMs; private final long maxBufferedRecords; private final Set<String> mappings; private final BulkProcessor<IndexingRequest, ?> bulkProcessor; /** * ElasticsearchWriter constructor * @param client The client to connect to Elasticsearch. * @param type The type to use when writing to Elasticsearch. * @param ignoreKey Whether to ignore key during indexing. * @param ignoreSchema Whether to ignore schema during indexing. * @param topicConfigs The map of per topic configs. * @param flushTimeoutMs The flush timeout. * @param maxBufferedRecords The max number of buffered records. * @param maxInFlightRequests The max number of inflight requests allowed. * @param batchSize Approximately the max number of records each writer will buffer. * @param lingerMs The time to wait before sending a batch. */ ElasticsearchWriter( JestClient client, String type, Loading @@ -86,28 +58,32 @@ public class ElasticsearchWriter { boolean ignoreSchema, Map<String, TopicConfig> topicConfigs, long flushTimeoutMs, long maxBufferedRecords, int maxBufferedRecords, int maxInFlightRequests, int batchSize, long lingerMs, int maxRetry, long retryBackoffMs) { int maxRetries, long retryBackoffMs ) { this.client = client; this.type = type; this.ignoreKey = ignoreKey; this.ignoreSchema = ignoreSchema; this.topicConfigs = topicConfigs == null ? Collections.<String, TopicConfig>emptyMap() : topicConfigs; this.flushTimeoutMs = flushTimeoutMs; this.maxBufferedRecords = maxBufferedRecords; // Start the BulkProcessor bulkProcessor = new BulkProcessor(new HttpClient(client), maxInFlightRequests, batchSize, lingerMs, maxRetry, retryBackoffMs, createDefaultListener()); //Create mapping cache mappings = new HashSet<>(); bulkProcessor = new BulkProcessor<>( new SystemTime(), new BulkIndexingClient(client), maxBufferedRecords, maxInFlightRequests, batchSize, lingerMs, maxRetries, retryBackoffMs ); } public static class Builder { Loading @@ -117,166 +93,88 @@ public class ElasticsearchWriter { private boolean ignoreSchema = false; private Map<String, TopicConfig> topicConfigs = new HashMap<>(); private long flushTimeoutMs; private long maxBufferedRecords; private int maxBufferedRecords; private int maxInFlightRequests; private int batchSize; private long lingerMs; private int maxRetry; private long retryBackoffMs; /** * Constructor of ElasticsearchWriter Builder. * @param client The client to connect to Elasticsearch. */ public Builder(JestClient client) { this.client = client; } /** * Set the index. * @param type The type to use for each index. * @return an instance of ElasticsearchWriter Builder. */ public Builder setType(String type) { this.type = type; return this; } /** * Set whether to ignore key during indexing. * @param ignoreKey Whether to ignore key. * @return an instance of ElasticsearchWriter Builder. */ public Builder setIgnoreKey(boolean ignoreKey) { this.ignoreKey = ignoreKey; return this; } /** * Set whether to ignore schema during indexing. * @param ignoreSchema Whether to ignore key. * @return an instance of ElasticsearchWriter Builder. */ public Builder setIgnoreSchema(boolean ignoreSchema) { this.ignoreSchema = ignoreSchema; return this; } /** * Set per topic configurations. * @param topicConfigs The map of per topic configuration. * @return an instance of ElasticsearchWriter Builder. */ public Builder setTopicConfigs(Map<String, TopicConfig> topicConfigs) { this.topicConfigs = topicConfigs; return this; } /** * Set the flush timeout. * @param flushTimeoutMs The flush timeout in milliseconds. * @return an instance of ElasticsearchWriter Builder. */ public Builder setFlushTimoutMs(long flushTimeoutMs) { this.flushTimeoutMs = flushTimeoutMs; return this; } /** * Set the max number of records to buffer for each writer. * @param maxBufferedRecords The max number of buffered records. * @return an instance of ElasticsearchWriter Builder. */ public Builder setMaxBufferedRecords(long maxBufferedRecords) { public Builder setMaxBufferedRecords(int maxBufferedRecords) { this.maxBufferedRecords = maxBufferedRecords; return this; } /** * Set the max number of inflight requests. * @param maxInFlightRequests The max allowed number of inflight requests. * @return an instance of ElasticsearchWriter Builder. */ public Builder setMaxInFlightRequests(int maxInFlightRequests) { this.maxInFlightRequests = maxInFlightRequests; return this; } /** * Set the number of requests to process as a batch when writing. * to Elasticsearch. * @param batchSize the size of each batch. * @return an instance of ElasticsearchWriter Builder. */ public Builder setBatchSize(int batchSize) { this.batchSize = batchSize; return this; } /** * Set the linger time. * @param lingerMs The linger time to use in milliseconds. * @return an instance of ElasticsearchWriter Builder. */ public Builder setLingerMs(long lingerMs) { this.lingerMs = lingerMs; return this; } /** * Set the max retry for a batch * @param maxRetry The number of max retry. * @return an instance of ElasticsearchWriter Builder. */ public Builder setMaxRetry(int maxRetry) { this.maxRetry = maxRetry; return this; } /** * Set the retry backoff. * @param retryBackoffMs The retry backoff in milliseconds. * @return an instance of ElasticsearchWriter Builder. */ public Builder setRetryBackoffMs(long retryBackoffMs) { this.retryBackoffMs = retryBackoffMs; return this; } /** * Build the ElasticsearchWriter. * @return an instance of ElasticsearchWriter. */ public ElasticsearchWriter build() { return new ElasticsearchWriter( client, type, ignoreKey, ignoreSchema, topicConfigs, flushTimeoutMs, maxBufferedRecords, maxInFlightRequests, batchSize, lingerMs, maxRetry, retryBackoffMs); client, type, ignoreKey, ignoreSchema, topicConfigs, flushTimeoutMs, maxBufferedRecords, maxInFlightRequests, batchSize, lingerMs, maxRetry, retryBackoffMs ); } } public void write(Collection<SinkRecord> records) { if (bulkProcessor.getException() != null) { throw new ConnectException("BulkProcessor failed with non-retriable exception", bulkProcessor.getException()); } if (bulkProcessor.getTotalBufferedRecords() + records.size() > maxBufferedRecords) { throw new RetriableException("Exceeded max number of buffered records: " + maxBufferedRecords); } for (SinkRecord record : records) { ESRequest request = DataConverter.convertRecord(record, type, client, ignoreKey, ignoreSchema, topicConfigs, mappings); IndexingRequest request = DataConverter.convertRecord(record, type, client, ignoreKey, ignoreSchema, topicConfigs, mappings); bulkProcessor.add(request); } } public void flush() { try { if (!bulkProcessor.flush(flushTimeoutMs)) { throw new ConnectException("Cannot finish flush messages within " + flushTimeoutMs); } } catch (InterruptedException e) { Thread.currentThread().interrupt(); } catch (Throwable t) { throw new ConnectException("Flush failed with non retriable exception.", t); } bulkProcessor.flush(flushTimeoutMs); } public void start() { Loading @@ -284,14 +182,13 @@ public class ElasticsearchWriter { } public void stop() { bulkProcessor.stop(); try { bulkProcessor.awaitStop(flushTimeoutMs); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } catch (Throwable t) { throw new ConnectException("Close failed with non retriable exception", t); bulkProcessor.flush(flushTimeoutMs); } catch (Exception e) { log.warn("Failed to flush during stop", e); } bulkProcessor.stop(); bulkProcessor.awaitStop(flushTimeoutMs); } private boolean indexExists(String index) { Loading Loading @@ -329,22 +226,4 @@ public class ElasticsearchWriter { } } private Listener createDefaultListener() { return new Listener() { @Override public void beforeBulk(long executionId, RecordBatch batch) { } @Override public void afterBulk(long executionId, RecordBatch batch, Response response) { } @Override public void afterBulk(long executionId, RecordBatch batch, Throwable failure) { } }; } }