396 lines
13 KiB
C++
396 lines
13 KiB
C++
/* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software Foundation,
|
|
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
|
|
|
|
#include "rpl_trx_tracking.h"
|
|
|
|
#include "mysqld.h"
|
|
#include "binlog.h"
|
|
|
|
|
|
Logical_clock::Logical_clock()
|
|
: state(SEQ_UNINIT), offset(0)
|
|
{}
|
|
|
|
/**
|
|
Atomically fetch the current state.
|
|
@return not subtracted "absolute" value.
|
|
*/
|
|
inline int64 Logical_clock::get_timestamp()
|
|
{
|
|
int64 retval= 0;
|
|
DBUG_ENTER("Logical_clock::get_timestamp");
|
|
retval= my_atomic_load64(&state);
|
|
DBUG_RETURN(retval);
|
|
}
|
|
|
|
/**
|
|
Steps the absolute value of the clock (state) to return
|
|
an updated value.
|
|
The caller must be sure to call the method in no concurrent
|
|
execution context so either offset and state can't change.
|
|
|
|
@return incremented "absolute" value
|
|
*/
|
|
inline int64 Logical_clock::step()
|
|
{
|
|
DBUG_ASSERT(SEQ_UNINIT == 0);
|
|
DBUG_EXECUTE_IF("logical_clock_step_2", ++state;);
|
|
return ++state;
|
|
}
|
|
|
|
/**
|
|
To try setting the clock *forward*.
|
|
The clock does not change when the new value is in the past
|
|
which is reflected by the new value and by offset.
|
|
In other words the function main effects is described as
|
|
state= max(state, new_value).
|
|
Offset that exceeds the new value indicates the binary log rotation
|
|
to render such new value useless.
|
|
|
|
@param new_val a new value (offset included)
|
|
@return a (new) value of state member regardless whether it's changed or not.
|
|
*/
|
|
inline int64 Logical_clock::set_if_greater(int64 new_val)
|
|
{
|
|
longlong old_val= new_val - 1;
|
|
bool cas_rc;
|
|
|
|
DBUG_ENTER("Logical_clock::set_if_greater");
|
|
|
|
DBUG_ASSERT(new_val > 0);
|
|
|
|
if (new_val <= offset)
|
|
{
|
|
/*
|
|
This function's invocation can be separated from the
|
|
transaction's flushing by few rotations. A late to log
|
|
transaction does not change the clock, similarly to how
|
|
its timestamps are handled at flushing.
|
|
*/
|
|
DBUG_RETURN(SEQ_UNINIT);
|
|
}
|
|
|
|
DBUG_ASSERT(new_val > 0);
|
|
|
|
while (!(cas_rc= my_atomic_cas64(&state, &old_val, new_val)) &&
|
|
old_val < new_val)
|
|
{}
|
|
|
|
DBUG_ASSERT(state >= new_val); // setting can't be done to past
|
|
|
|
DBUG_ASSERT(cas_rc || old_val >= new_val);
|
|
|
|
DBUG_RETURN(cas_rc ? new_val : old_val);
|
|
}
|
|
|
|
|
|
/**
|
|
Get the sequence_number for a transaction, and get the last_commit based
|
|
on parallel committing transactions.
|
|
|
|
@param[in] thd Current THD from which to extract trx context.
|
|
@param[in,out] sequence_number Sequence number of current transaction.
|
|
@param[in,out] commit_parent Commit_parent of current transaction,
|
|
pre-filled with the commit_parent calculated
|
|
by the logical clock logic.
|
|
*/
|
|
void
|
|
Commit_order_trx_dependency_tracker::get_dependency(THD *thd,
|
|
int64 &sequence_number,
|
|
int64 &commit_parent)
|
|
{
|
|
Transaction_ctx *trn_ctx= thd->get_transaction();
|
|
|
|
DBUG_ASSERT(trn_ctx->sequence_number
|
|
> m_max_committed_transaction.get_offset());
|
|
/*
|
|
Prepare sequence_number and commit_parent relative to the current
|
|
binlog. This is done by subtracting the binlog's clock offset
|
|
from the values.
|
|
|
|
A transaction that commits after the binlog is rotated, can have a
|
|
commit parent in the previous binlog. In this case, subtracting
|
|
the offset from the sequence number results in a negative
|
|
number. The commit parent dependency gets lost in such
|
|
case. Therefore, we log the value SEQ_UNINIT in this case.
|
|
*/
|
|
sequence_number=
|
|
trn_ctx->sequence_number - m_max_committed_transaction.get_offset();
|
|
|
|
commit_parent=
|
|
trn_ctx->last_committed <= m_max_committed_transaction.get_offset()
|
|
? SEQ_UNINIT
|
|
: trn_ctx->last_committed - m_max_committed_transaction.get_offset();
|
|
}
|
|
|
|
int64
|
|
Commit_order_trx_dependency_tracker::step()
|
|
{
|
|
return m_transaction_counter.step();
|
|
}
|
|
|
|
void
|
|
Commit_order_trx_dependency_tracker::rotate()
|
|
{
|
|
m_max_committed_transaction.
|
|
update_offset(m_transaction_counter.get_timestamp());
|
|
|
|
m_transaction_counter.
|
|
update_offset(m_transaction_counter.get_timestamp());
|
|
}
|
|
|
|
void
|
|
Commit_order_trx_dependency_tracker::update_max_committed(int64 sequence_number)
|
|
{
|
|
mysql_mutex_assert_owner(&LOCK_slave_trans_dep_tracker);
|
|
m_max_committed_transaction.set_if_greater(sequence_number);
|
|
}
|
|
|
|
/**
|
|
Get the writeset dependencies of a transaction.
|
|
This takes the commit_parent that must be previously set using
|
|
Commit_order_trx_dependency_tracker and tries to make the commit_parent as
|
|
low as possible, using the writesets of each transaction.
|
|
The commit_parent returned depends on how many row hashes are stored in the
|
|
writeset_history, which is cleared once it reaches the user-defined maximum.
|
|
|
|
@param[in] thd Current THD from which to extract trx context.
|
|
@param[in,out] sequence_number Sequence number of current transaction.
|
|
@param[in,out] commit_parent Commit_parent of current transaction,
|
|
pre-filled with the commit_parent calculated by
|
|
Commit_order_trx_dependency_tracker to use when
|
|
the writeset commit_parent is not valid.
|
|
*/
|
|
void
|
|
Writeset_trx_dependency_tracker::get_dependency(THD *thd,
|
|
int64 &sequence_number,
|
|
int64 &commit_parent)
|
|
{
|
|
Rpl_transaction_write_set_ctx *write_set_ctx=
|
|
thd->get_transaction()->get_transaction_write_set_ctx();
|
|
std::set<uint64> *writeset= write_set_ctx->get_write_set();
|
|
|
|
#ifndef DBUG_OFF
|
|
/* The writeset of an empty transaction must be empty. */
|
|
if (is_empty_transaction_in_binlog_cache(thd))
|
|
DBUG_ASSERT(writeset->size() == 0);
|
|
#endif
|
|
|
|
/*
|
|
Check if this transaction has a writeset, if the writeset will overflow the
|
|
history size, if the transaction_write_set_extraction is consistent
|
|
between session and global or if changes in the tables referenced in this
|
|
transaction cascade to other tables. If that happens revert to using the
|
|
COMMIT_ORDER and clear the history to keep data consistent.
|
|
*/
|
|
bool can_use_writesets=
|
|
// empty writeset implies DDL or similar, except if there are missing keys
|
|
(writeset->size() != 0 || write_set_ctx->get_has_missing_keys() ||
|
|
/*
|
|
The empty transactions do not need to clear the writeset history, since
|
|
they can be executed in parallel.
|
|
*/
|
|
is_empty_transaction_in_binlog_cache(thd)) &&
|
|
// hashing algorithm for the session must be the same as used by other rows in history
|
|
(global_system_variables.transaction_write_set_extraction ==
|
|
thd->variables.transaction_write_set_extraction) &&
|
|
// must not use foreign keys
|
|
!write_set_ctx->get_has_related_foreign_keys();
|
|
bool exceeds_capacity= false;
|
|
|
|
if (can_use_writesets)
|
|
{
|
|
/*
|
|
Check if adding this transaction exceeds the capacity of the writeset
|
|
history. If that happens, m_writeset_history will be cleared only after
|
|
using its information for current transaction.
|
|
*/
|
|
exceeds_capacity=
|
|
m_writeset_history.size() + writeset->size() > m_opt_max_history_size;
|
|
|
|
/*
|
|
Compute the greatest sequence_number among all conflicts and add the
|
|
transaction's row hashes to the history.
|
|
*/
|
|
int64 last_parent= m_writeset_history_start;
|
|
for (std::set<uint64>::iterator it= writeset->begin();
|
|
it != writeset->end(); ++it)
|
|
{
|
|
Writeset_history::iterator hst= m_writeset_history.find(*it);
|
|
if (hst != m_writeset_history.end())
|
|
{
|
|
if (hst->second > last_parent && hst->second < sequence_number)
|
|
last_parent= hst->second;
|
|
|
|
hst->second= sequence_number;
|
|
}
|
|
else
|
|
{
|
|
if (!exceeds_capacity)
|
|
m_writeset_history.insert(std::pair<uint64, int64>(*it, sequence_number));
|
|
}
|
|
}
|
|
|
|
/*
|
|
If the transaction references tables with missing primary keys revert to
|
|
COMMIT_ORDER, update and not reset history, as it is unnecessary because
|
|
any transaction that refers this table will also revert to COMMIT_ORDER.
|
|
*/
|
|
if (!write_set_ctx->get_has_missing_keys())
|
|
{
|
|
/*
|
|
The WRITESET commit_parent then becomes the minimum of largest parent
|
|
found using the hashes of the row touched by the transaction and the
|
|
commit parent calculated with COMMIT_ORDER.
|
|
*/
|
|
commit_parent= std::min(last_parent, commit_parent);
|
|
}
|
|
}
|
|
|
|
if (exceeds_capacity || !can_use_writesets)
|
|
{
|
|
m_writeset_history_start= sequence_number;
|
|
m_writeset_history.clear();
|
|
}
|
|
}
|
|
|
|
void
|
|
Writeset_trx_dependency_tracker::rotate(int64 start)
|
|
{
|
|
m_writeset_history_start= start;
|
|
m_writeset_history.clear();
|
|
}
|
|
|
|
/**
|
|
Get the writeset commit parent of transactions using the session dependencies.
|
|
|
|
@param[in] thd Current THD from which to extract trx context.
|
|
@param[in,out] sequence_number Sequence number of current transaction.
|
|
@param[in,out] commit_parent Commit_parent of current transaction,
|
|
pre-filled with the commit_parent calculated
|
|
by the Write_set_trx_dependency_tracker as a
|
|
fall-back.
|
|
*/
|
|
void
|
|
Writeset_session_trx_dependency_tracker::get_dependency(THD *thd,
|
|
int64 &sequence_number,
|
|
int64 &commit_parent)
|
|
{
|
|
int64 session_parent= thd->rpl_thd_ctx.dependency_tracker_ctx().
|
|
get_last_session_sequence_number();
|
|
|
|
if (session_parent != 0 && session_parent < sequence_number)
|
|
commit_parent= std::max(commit_parent, session_parent);
|
|
|
|
thd->rpl_thd_ctx.dependency_tracker_ctx().
|
|
set_last_session_sequence_number(sequence_number);
|
|
}
|
|
|
|
/**
|
|
Get the dependencies in a transaction, the main entry point for the
|
|
dependency tracking work.
|
|
*/
|
|
void
|
|
Transaction_dependency_tracker::get_dependency(THD *thd,
|
|
int64 &sequence_number,
|
|
int64 &commit_parent)
|
|
{
|
|
sequence_number= commit_parent= 0;
|
|
|
|
switch(m_opt_tracking_mode)
|
|
{
|
|
case DEPENDENCY_TRACKING_COMMIT_ORDER:
|
|
m_commit_order.get_dependency(thd, sequence_number, commit_parent);
|
|
break;
|
|
case DEPENDENCY_TRACKING_WRITESET:
|
|
m_commit_order.get_dependency(thd, sequence_number, commit_parent);
|
|
m_writeset.get_dependency(thd, sequence_number, commit_parent);
|
|
break;
|
|
case DEPENDENCY_TRACKING_WRITESET_SESSION:
|
|
m_commit_order.get_dependency(thd, sequence_number, commit_parent);
|
|
m_writeset.get_dependency(thd, sequence_number, commit_parent);
|
|
m_writeset_session.get_dependency(thd, sequence_number, commit_parent);
|
|
break;
|
|
default:
|
|
DBUG_ASSERT(0); // blow up on debug
|
|
/*
|
|
Fallback to commit order on production builds.
|
|
*/
|
|
m_commit_order.get_dependency(thd, sequence_number, commit_parent);
|
|
}
|
|
}
|
|
|
|
void
|
|
Transaction_dependency_tracker::tracking_mode_changed()
|
|
{
|
|
Logical_clock max_committed_transaction=
|
|
m_commit_order.get_max_committed_transaction();
|
|
int64 timestamp= max_committed_transaction.get_timestamp()
|
|
- max_committed_transaction.get_offset();
|
|
|
|
m_writeset.rotate(timestamp);
|
|
}
|
|
|
|
/**
|
|
The method is to be executed right before committing time.
|
|
It must be invoked even if the transaction does not commit
|
|
to engine being merely logged into the binary log.
|
|
max_committed_transaction is updated with a greater timestamp
|
|
value.
|
|
As a side effect, the transaction context's sequence_number
|
|
is reset.
|
|
|
|
@param thd a pointer to THD instance
|
|
*/
|
|
void
|
|
Transaction_dependency_tracker::update_max_committed(THD *thd)
|
|
{
|
|
Transaction_ctx *trn_ctx= thd->get_transaction();
|
|
m_commit_order.update_max_committed(trn_ctx->sequence_number);
|
|
/*
|
|
sequence_number timestamp isn't needed anymore, so it's cleared off.
|
|
*/
|
|
trn_ctx->sequence_number= SEQ_UNINIT;
|
|
|
|
DBUG_ASSERT(trn_ctx->last_committed == SEQ_UNINIT ||
|
|
thd->commit_error == THD::CE_FLUSH_ERROR);
|
|
}
|
|
|
|
int64
|
|
Transaction_dependency_tracker::step()
|
|
{
|
|
return m_commit_order.step();
|
|
}
|
|
|
|
void
|
|
Transaction_dependency_tracker::rotate()
|
|
{
|
|
m_commit_order.rotate();
|
|
/*
|
|
To make slave appliers be able to execute transactions in parallel
|
|
after rotation, set the minimum commit_parent to 1 after rotation.
|
|
*/
|
|
m_writeset.rotate(1);
|
|
if (current_thd)
|
|
current_thd->get_transaction()->sequence_number= 2;
|
|
}
|
|
|
|
int64 Transaction_dependency_tracker::get_max_committed_timestamp()
|
|
{
|
|
return m_commit_order.get_max_committed_transaction().get_timestamp();
|
|
}
|
|
|
|
|