0.3.0.pre - Preparing for threadsafety

This commit is contained in:
Karl Southern
2016-07-13 17:40:35 +01:00
parent da5a3d8be3
commit fa2d226fbf
14 changed files with 409 additions and 280 deletions

View File

@@ -1,10 +1,10 @@
# encoding: utf-8
require "logstash/outputs/base"
require "logstash/namespace"
require "stud/buffer"
require "java"
require "logstash-output-jdbc_jars"
require "logstash-output-jdbc_ring-buffer"
require 'logstash/outputs/base'
require 'logstash/namespace'
require 'concurrent'
require 'stud/interval'
require 'java'
require 'logstash-output-jdbc_jars'
# Write events to a SQL engine, using JDBC.
#
@@ -12,8 +12,7 @@ require "logstash-output-jdbc_ring-buffer"
# includes correctly crafting the SQL statement, and matching the number of
# parameters correctly.
class LogStash::Outputs::Jdbc < LogStash::Outputs::Base
# Adds buffer support
include Stud::Buffer
declare_threadsafe! if self.respond_to?(:declare_threadsafe!)
STRFTIME_FMT = '%Y-%m-%d %T.%L'.freeze
@@ -33,137 +32,106 @@ class LogStash::Outputs::Jdbc < LogStash::Outputs::Base
'58', # System Error
].freeze
config_name "jdbc"
config_name 'jdbc'
# Driver class - Reintroduced for https://github.com/theangryangel/logstash-output-jdbc/issues/26
config :driver_class, :validate => :string
config :driver_class, validate: :string
# Does the JDBC driver support autocommit?
config :driver_auto_commit, :validate => :boolean, :default => true, :required => true
config :driver_auto_commit, validate: :boolean, default: true, required: true
# Where to find the jar
# Defaults to not required, and to the original behaviour
config :driver_jar_path, :validate => :string, :required => false
config :driver_jar_path, validate: :string, required: false
# jdbc connection string
config :connection_string, :validate => :string, :required => true
config :connection_string, validate: :string, required: true
# jdbc username - optional, maybe in the connection string
config :username, :validate => :string, :required => false
config :username, validate: :string, required: false
# jdbc password - optional, maybe in the connection string
config :password, :validate => :string, :required => false
config :password, validate: :string, required: false
# [ "insert into table (message) values(?)", "%{message}" ]
config :statement, :validate => :array, :required => true
config :statement, validate: :array, required: true
# If this is an unsafe statement, use event.sprintf
# This also has potential performance penalties due to having to create a
# This also has potential performance penalties due to having to create a
# new statement for each event, rather than adding to the batch and issuing
# multiple inserts in 1 go
config :unsafe_statement, :validate => :boolean, :default => false
config :unsafe_statement, validate: :boolean, default: false
# Number of connections in the pool to maintain
config :max_pool_size, :validate => :number, :default => 5
config :max_pool_size, validate: :number, default: 24
# Connection timeout
config :connection_timeout, :validate => :number, :default => 10000
config :connection_timeout, validate: :number, default: 10000
# We buffer a certain number of events before flushing that out to SQL.
# This setting controls how many events will be buffered before sending a
# batch of events.
config :flush_size, :validate => :number, :default => 1000
config :flush_size, validate: :number, default: 1000
# The amount of time since last flush before a flush is forced.
#
# This setting helps ensure slow event rates don't get stuck in Logstash.
# For example, if your `flush_size` is 100, and you have received 10 events,
# and it has been more than `idle_flush_time` seconds since the last flush,
# Logstash will flush those 10 events automatically.
#
# This helps keep both fast and slow log streams moving along in
# a timely manner.
#
# If you change this value please ensure that you change
# max_flush_exceptions accordingly.
config :idle_flush_time, :validate => :number, :default => 1
# Set initial interval in seconds between retries. Doubled on each retry up to `retry_max_interval`
config :retry_initial_interval, validate: :number, default: 2
# Maximum number of sequential flushes which encounter exceptions, before we stop retrying.
# Maximum time between retries, in seconds
config :retry_max_interval, validate: :number, default: 128
# Any additional custom, retryable SQL state codes.
# Suitable for configuring retryable custom JDBC SQL state codes.
config :retry_sql_states, validate: :array, default: []
# Maximum number of sequential failed attempts, before we stop retrying.
# If set to < 1, then it will infinitely retry.
#
# You should carefully tune this in relation to idle_flush_time if your SQL server
# is not highly available.
# i.e. If your idle_flush_time is 1, and your max_flush_exceptions is 200, and your SQL server takes
# longer than 200 seconds to reboot, then logstash will stop.
config :max_flush_exceptions, :validate => :number, :default => 0
# At the default values this is a little over 10 minutes
config :max_flush_exceptions, validate: :number, default: 10
config :max_repeat_exceptions, :obsolete => "This has been replaced by max_flush_exceptions - which behaves slightly differently. Please check the documentation."
config :max_repeat_exceptions_time, :obsolete => "This is no longer required"
config :max_repeat_exceptions, obsolete: 'This has been replaced by max_flush_exceptions - which behaves slightly differently. Please check the documentation.'
config :max_repeat_exceptions_time, obsolete: 'This is no longer required'
config :idle_flush_time, obsolete: 'No longer necessary under Logstash v5'
public
def register
@logger.info("JDBC - Starting up")
@logger.info('JDBC - Starting up')
LogStash::Logger.setup_log4j(@logger)
load_jar_files!
@exceptions_tracker = RingBuffer.new(@max_flush_exceptions)
@stopping = Concurrent::AtomicBoolean.new(false)
if (@flush_size > 1000)
@logger.warn("JDBC - Flush size is set to > 1000")
@logger.warn('JDBC - Flush size is set to > 1000') if @flush_size > 1000
if @statement.empty?
@logger.error('JDBC - No statement provided. Configuration error.')
end
if @statement.length < 1
@logger.error("JDBC - No statement provided. Configuration error.")
end
if (!@unsafe_statement and @statement.length < 2)
if !@unsafe_statement && @statement.length < 2
@logger.error("JDBC - Statement has no parameters. No events will be inserted into SQL as you're not passing any event data. Likely configuration error.")
end
setup_and_test_pool!
end
buffer_initialize(
:max_items => @flush_size,
:max_interval => @idle_flush_time,
:logger => @logger
)
def multi_receive(events)
events.each_slice(@flush_size) do |slice|
retrying_submit(slice)
end
end
def receive(event)
return unless output?(event) or event.cancelled?
return unless @statement.length > 0
buffer_receive(event)
retrying_submit([event])
end
def flush(events, teardown=false)
if @unsafe_statement == true
unsafe_flush(events, teardown)
else
safe_flush(events, teardown)
end
end
def on_flush_error(e)
return if @max_flush_exceptions < 1
@exceptions_tracker << e.class
if @exceptions_tracker.reject { |i| i.nil? }.count >= @max_flush_exceptions
@logger.error("JDBC - max_flush_exceptions has been reached")
raise LogStash::ShutdownSignal.new
end
end
def teardown
buffer_flush(:final => true)
@pool.close()
def close
@stopping.make_true
@pool.close
super
end
private
def setup_and_test_pool!
def setup_and_test_pool!
# Setup pool
@pool = Java::ComZaxxerHikari::HikariDataSource.new
@@ -181,106 +149,104 @@ class LogStash::Outputs::Jdbc < LogStash::Outputs::Base
validate_connection_timeout = (@connection_timeout / 1000) / 2
# Test connection
test_connection = @pool.getConnection()
test_connection = @pool.getConnection
unless test_connection.isValid(validate_connection_timeout)
@logger.error("JDBC - Connection is not valid. Please check connection string or that your JDBC endpoint is available.")
@logger.error('JDBC - Connection is not valid. Please check connection string or that your JDBC endpoint is available.')
end
test_connection.close()
test_connection.close
end
def load_jar_files!
# Load jar from driver path
unless @driver_jar_path.nil?
raise Exception.new("JDBC - Could not find jar file at given path. Check config.") unless File.exists? @driver_jar_path
raise LogStash::ConfigurationError, 'JDBC - Could not find jar file at given path. Check config.' unless File.exist? @driver_jar_path
require @driver_jar_path
return
end
# Revert original behaviour of loading from vendor directory
# if no path given
if ENV['LOGSTASH_HOME']
jarpath = File.join(ENV['LOGSTASH_HOME'], "/vendor/jar/jdbc/*.jar")
else
jarpath = File.join(File.dirname(__FILE__), "../../../vendor/jar/jdbc/*.jar")
end
jarpath = if ENV['LOGSTASH_HOME']
File.join(ENV['LOGSTASH_HOME'], '/vendor/jar/jdbc/*.jar')
else
File.join(File.dirname(__FILE__), '../../../vendor/jar/jdbc/*.jar')
end
@logger.debug("JDBC - jarpath", path: jarpath)
@logger.debug('JDBC - jarpath', path: jarpath)
jars = Dir[jarpath]
raise Exception.new("JDBC - No jars found in jarpath. Have you read the README?") if jars.empty?
raise LogStash::ConfigurationError, 'JDBC - No jars found. Have you read the README?' if jars.empty?
jars.each do |jar|
@logger.debug("JDBC - Loaded jar", :jar => jar)
@logger.debug('JDBC - Loaded jar', jar: jar)
require jar
end
end
def safe_flush(events, teardown=false)
def submit(events)
connection = nil
statement = nil
events_to_retry = []
begin
connection = @pool.getConnection()
connection = @pool.getConnection
rescue => e
log_jdbc_exception(e, true)
raise
# If a connection is not available, then the server has gone away
# We're not counting that towards our retry count.
return events, false
end
begin
statement = connection.prepareStatement(@statement[0])
events.each do |event|
next if event.cancelled?
next if @statement.length < 2
statement = add_statement_event_params(statement, event)
statement.addBatch()
events.each do |event|
begin
statement = connection.prepareStatement(
(@unsafe_statement == true) ? event.sprintf(@statement[0]) : @statement[0]
)
statement = add_statement_event_params(statement, event) if @statement.length > 1
statement.execute
rescue => e
if retry_exception?(e)
events_to_retry.push(event)
end
ensure
statement.close unless statement.nil?
end
statement.executeBatch()
statement.close()
@exceptions_tracker << nil
rescue => e
if retry_exception?(e)
raise
end
ensure
statement.close() unless statement.nil?
connection.close() unless connection.nil?
end
connection.close unless connection.nil?
return events_to_retry, true
end
def unsafe_flush(events, teardown=false)
connection = nil
statement = nil
begin
connection = @pool.getConnection()
rescue => e
log_jdbc_exception(e, true)
raise
end
def retrying_submit(actions)
# Initially we submit the full list of actions
submit_actions = actions
count_as_attempt = true
begin
events.each do |event|
next if event.cancelled?
attempts = 1
statement = connection.prepareStatement(event.sprintf(@statement[0]))
statement = add_statement_event_params(statement, event) if @statement.length > 1
sleep_interval = @retry_initial_interval
while @stopping.false? and (submit_actions and !submit_actions.empty?)
return if !submit_actions || submit_actions.empty? # If everything's a success we move along
# We retry whatever didn't succeed
submit_actions, count_as_attempt = submit(submit_actions)
statement.execute()
# Everything was a success!
break if !submit_actions || submit_actions.empty?
# cancel the event, since we may end up outputting the same event multiple times
# if an exception happens later down the line
event.cancel
@exceptions_tracker << nil
if @max_flush_exceptions > 0 and count_as_attempt == true
attempts += 1
if attempts > @max_flush_exceptions
@logger.error("JDBC - max_flush_exceptions has been reached. #{submit_actions.length} events have been unable to be sent to SQL and are being dropped. See previously logged exceptions for details.")
break
end
end
rescue => e
if retry_exception?(e)
raise
end
ensure
statement.close() unless statement.nil?
connection.close() unless connection.nil?
# If we're retrying the action sleep for the recommended interval
# Double the interval for the next time through to achieve exponential backoff
Stud.stoppable_sleep(sleep_interval) { @stopping.true? }
sleep_interval = next_sleep_interval(sleep_interval)
end
end
@@ -324,6 +290,12 @@ class LogStash::Outputs::Jdbc < LogStash::Outputs::Base
statement
end
def retry_exception?(exception)
retrying = (exception.respond_to? 'getSQLState' and (RETRYABLE_SQLSTATE_CLASSES.include?(exception.getSQLState.to_s[0,2]) or @retry_sql_states.include?(exception.getSQLState)))
log_jdbc_exception(exception, retrying)
retrying
end
def log_jdbc_exception(exception, retrying)
current_exception = exception
@@ -343,10 +315,8 @@ class LogStash::Outputs::Jdbc < LogStash::Outputs::Base
end
end
def retry_exception?(exception)
retrying = (exception.respond_to? 'getSQLState' and RETRYABLE_SQLSTATE_CLASSES.include?(exception.getSQLState.to_s[0,2]))
log_jdbc_exception(exception, retrying)
retrying
def next_sleep_interval(current_interval)
doubled = current_interval * 2
doubled > @retry_max_interval ? @retry_max_interval : doubled
end
end # class LogStash::Outputs::jdbc