/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at legal-notices/CDDLv1_0.txt * or http://forgerock.org/license/CDDLv1.0.html. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at legal-notices/CDDLv1_0.txt. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: * Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END * * * Copyright 2008-2010 Sun Microsystems, Inc. * Portions Copyright 2011-2016 ForgeRock AS */ package org.opends.server.replication.service; import static org.opends.messages.ReplicationMessages.*; import static org.opends.server.replication.common.AssuredMode.*; import static org.opends.server.replication.common.StatusMachine.*; import static org.opends.server.util.CollectionUtils.*; import java.io.BufferedOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.SocketTimeoutException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import net.jcip.annotations.Immutable; import org.forgerock.i18n.LocalizableMessage; import org.forgerock.i18n.slf4j.LocalizedLogger; import org.forgerock.opendj.config.server.ConfigException; import org.forgerock.opendj.ldap.ResultCode; import org.opends.server.admin.std.meta.ReplicationDomainCfgDefn.AssuredType; import org.opends.server.admin.std.server.ReplicationDomainCfg; import org.opends.server.api.DirectoryThread; import org.opends.server.backends.task.Task; import org.opends.server.replication.common.*; import org.opends.server.replication.protocol.*; import org.opends.server.tasks.InitializeTargetTask; import org.opends.server.tasks.InitializeTask; import org.opends.server.types.Attribute; import org.opends.server.types.DN; import org.opends.server.types.DirectoryException; /** * This class should be used as a base for Replication implementations. *
* It is intended that developer in need of a replication mechanism * subclass this class with their own implementation. *
* The startup phase of the ReplicationDomain subclass, * should read the list of replication servers from the configuration, * instantiate a {@link ServerState} then start the publish service * by calling {@link #startPublishService()}. * At this point it can start calling the {@link #publish(UpdateMsg)} * method if needed. *
* When the startup phase reach the point when the subclass is ready * to handle updates the Replication Domain implementation should call the * {@link #startListenService()} method. * At this point a Listener thread is created on the Replication Service * and which can start receiving updates. *
* When updates are received the Replication Service calls the * {@link #processUpdate(UpdateMsg)} method. * ReplicationDomain implementation should implement the appropriate code * for replaying the update on the local repository. * When fully done the subclass must call the * {@link #processUpdateDone(UpdateMsg, String)} method. * This allows to process the update asynchronously if necessary. * *
* To propagate changes to other replica, a ReplicationDomain implementation * must use the {@link #publish(UpdateMsg)} method. *
* If the Full Initialization process is needed then implementation * for {@code importBackend(InputStream)} and * {@code exportBackend(OutputStream)} must be * provided. *
* Full Initialization of a replica can be triggered by LDAP clients * by creating InitializeTasks or InitializeTargetTask. * Full initialization can also be triggered from the ReplicationDomain * implementation using methods {@link #initializeRemote(int, Task)} * or {@link #initializeFromRemote(int, Task)}. *
* At shutdown time, the {@link #disableService()} method should be called to
* cleanly stop the replication service.
*/
public abstract class ReplicationDomain
{
/** Contains all the attributes included for the ECL (External Changelog). */
@Immutable
private static final class ECLIncludes
{
final Map
* String format: <server id>:<number of failed updates>
*/
private final Map
* String format: <server id>:<number of failed updates>
*/
private final Map
* TODO: fill that with all currently opened urls if no urls configured
*
* @return The referrals URLs this domain publishes.
*/
public Set
* The {@code exportBackend(OutputStream)} will therefore be called
* on this server, and the {@code importBackend(InputStream)}
* will be called on the remote server.
*
* The InputStream and OutputStream given as a parameter to those
* methods will be connected through the replication protocol.
*
* @param target The server-id of the server that should be initialized.
* The target can be discovered using the
* {@link #getReplicaInfos()} method.
* @param initTask The task that triggers this initialization and that should
* be updated with its progress.
*
* @throws DirectoryException If it was not possible to publish the
* Initialization message to the Topology.
*/
public void initializeRemote(int target, Task initTask)
throws DirectoryException
{
initializeRemote(target, getServerId(), initTask, getInitWindow());
}
/**
* Process the initialization of some other server or servers in the topology
* specified by the target argument when this initialization specifying the
* server that requests the initialization.
*
* @param serverToInitialize The target server that should be initialized.
* @param serverRunningTheTask The server that initiated the export. It can
* be the serverID of this server, or the serverID of a remote server.
* @param initTask The task in this server that triggers this initialization
* and that should be updated with its progress. Null when the export is done
* following a request coming from a remote server (task is remote).
* @param initWindow The value of the initialization window for flow control
* between the importer and the exporter.
*
* @exception DirectoryException When an error occurs. No exception raised
* means success.
*/
protected void initializeRemote(int serverToInitialize,
int serverRunningTheTask, Task initTask, int initWindow)
throws DirectoryException
{
final ImportExportContext ieCtx = acquireIEContext(false);
/*
We manage the list of servers to initialize in order :
- to test at the end that all expected servers have reconnected
after their import and with the right genId
- to update the task with the server(s) where this test failed
*/
if (serverToInitialize == RoutableMsg.ALL_SERVERS)
{
logger.info(NOTE_FULL_UPDATE_ENGAGED_FOR_REMOTE_START_ALL,
countEntries(), getBaseDN(), getServerId());
ieCtx.startList.addAll(getReplicaInfos().keySet());
// We manage the list of servers with which a flow control can be enabled
for (DSInfo dsi : getReplicaInfos().values())
{
if (dsi.getProtocolVersion()>= ProtocolVersion.REPLICATION_PROTOCOL_V4)
{
ieCtx.setAckVal(dsi.getDsId(), 0);
}
}
}
else
{
logger.info(NOTE_FULL_UPDATE_ENGAGED_FOR_REMOTE_START, countEntries(),
getBaseDN(), getServerId(), serverToInitialize);
ieCtx.startList.add(serverToInitialize);
// We manage the list of servers with which a flow control can be enabled
for (DSInfo dsi : getReplicaInfos().values())
{
if (dsi.getDsId() == serverToInitialize &&
dsi.getProtocolVersion()>= ProtocolVersion.REPLICATION_PROTOCOL_V4)
{
ieCtx.setAckVal(dsi.getDsId(), 0);
}
}
}
DirectoryException exportRootException = null;
// loop for the case where the exporter is the initiator
int attempt = 0;
boolean done = false;
while (!done && ++attempt < 2) // attempt loop
{
try
{
ieCtx.exportTarget = serverToInitialize;
if (initTask != null)
{
ieCtx.initializeTask = initTask;
}
ieCtx.initializeCounters(countEntries());
ieCtx.msgCnt = 0;
ieCtx.initNumLostConnections = broker.getNumLostConnections();
ieCtx.initWindow = initWindow;
// Send start message to the peer
InitializeTargetMsg initTargetMsg = new InitializeTargetMsg(
getBaseDN(), getServerId(), serverToInitialize,
serverRunningTheTask, ieCtx.entryCount, initWindow);
broker.publish(initTargetMsg);
// Wait for all servers to be ok
waitForRemoteStartOfInit(ieCtx);
// Servers that left in the list are those for which we could not test
// that they have been successfully initialized.
if (!ieCtx.failureList.isEmpty())
{
throw new DirectoryException(
ResultCode.OTHER,
ERR_INIT_NO_SUCCESS_START_FROM_SERVERS.get(getBaseDN(), ieCtx.failureList));
}
exportBackend(new BufferedOutputStream(new ReplOutputStream(this)));
// Notify the peer of the success
broker.publish(
new DoneMsg(getServerId(), initTargetMsg.getDestination()));
}
catch(DirectoryException exportException)
{
// Give priority to the first exception raised - stored in the context
final DirectoryException ieEx = ieCtx.exception;
exportRootException = ieEx != null ? ieEx : exportException;
}
if (logger.isTraceEnabled())
{
logger.trace("[IE] In " + broker.getReplicationMonitorInstanceName()
+ " export ends with connected=" + broker.isConnected()
+ " exportRootException=" + exportRootException);
}
if (exportRootException != null)
{
try
{
/*
Handling the errors during export
Note: we could have lost the connection and another thread
the listener one) has already managed to reconnect.
So we MUST rely on the test broker.isConnected()
ONLY to do 'wait to be reconnected by another thread'
(if not yet reconnected already).
*/
if (!broker.isConnected())
{
// We are still disconnected, so we wait for the listener thread
// to reconnect - wait 10s
if (logger.isTraceEnabled())
{
logger.trace("[IE] Exporter wait for reconnection by the listener thread");
}
int att=0;
while (!broker.shuttingDown()
&& !broker.isConnected()
&& ++att < 100)
{
try { Thread.sleep(100); }
catch(Exception e){ /* do nothing */ }
}
}
if (initTask != null
&& broker.isConnected()
&& serverToInitialize != RoutableMsg.ALL_SERVERS)
{
/*
NewAttempt case : In the case where
- it's not an InitializeAll
- AND the previous export attempt failed
- AND we are (now) connected
- and we own the task and this task is not an InitializeAll
Let's :
- sleep to let time to the other peer to reconnect if needed
- and launch another attempt
*/
try { Thread.sleep(1000); }
catch(Exception e){ /* do nothing */ }
logger.info(NOTE_RESENDING_INIT_TARGET, exportRootException.getLocalizedMessage());
continue;
}
broker.publish(new ErrorMsg(
serverToInitialize, exportRootException.getMessageObject()));
}
catch(Exception e)
{
// Ignore the failure raised while proceeding the root failure
}
}
// We are always done for this export ...
// ... except in the NewAttempt case (see above)
done = true;
} // attempt loop
// Wait for all servers to be ok, and build the failure list
waitForRemoteEndOfInit(ieCtx);
// Servers that left in the list are those for which we could not test
// that they have been successfully initialized.
if (!ieCtx.failureList.isEmpty() && exportRootException == null)
{
exportRootException = new DirectoryException(ResultCode.OTHER,
ERR_INIT_NO_SUCCESS_END_FROM_SERVERS.get(getGenerationID(), ieCtx.failureList));
}
// Don't forget to release IEcontext acquired at beginning.
releaseIEContext(); // FIXME should not this be in a finally?
final String cause = exportRootException == null ? ""
: exportRootException.getLocalizedMessage();
if (serverToInitialize == RoutableMsg.ALL_SERVERS)
{
logger.info(NOTE_FULL_UPDATE_ENGAGED_FOR_REMOTE_END_ALL,
getBaseDN(), getServerId(), cause);
}
else
{
logger.info(NOTE_FULL_UPDATE_ENGAGED_FOR_REMOTE_END,
getBaseDN(), getServerId(), serverToInitialize, cause);
}
if (exportRootException != null)
{
throw exportRootException;
}
}
/**
* For all remote servers in the start list:
* - wait it has finished the import and present the expected generationID,
* - build the failureList.
*/
private void waitForRemoteStartOfInit(ImportExportContext ieCtx)
{
final Set
* When this method is called, a request for initialization is sent to the
* remote source server requesting initialization.
*
*
* @param source The server-id of the source from which to initialize.
* The source can be discovered using the
* {@link #getReplicaInfos()} method.
*
* @param initTask The task that launched the initialization
* and should be updated of its progress.
*
* @throws DirectoryException If it was not possible to publish the
* Initialization message to the Topology.
* The task state is updated.
*/
public void initializeFromRemote(int source, Task initTask)
throws DirectoryException
{
if (logger.isTraceEnabled())
{
logger.trace("[IE] Entering initializeFromRemote for " + this);
}
LocalizableMessage errMsg = !broker.isConnected()
? ERR_INITIALIZATION_FAILED_NOCONN.get(getBaseDN())
: null;
/*
We must not test here whether the remote source is connected to
the topology by testing if it stands in the replicas list since.
In the case of a re-attempt of initialization, the listener thread is
running this method directly coming from initialize() method and did
not processed any topology message in between the failure and the
new attempt.
*/
try
{
/*
We must immediately acquire a context to store the task inside
The context will be used when we (the listener thread) will receive
the InitializeTargetMsg, process the import, and at the end
update the task.
*/
final ImportExportContext ieCtx = acquireIEContext(true);
ieCtx.initializeTask = initTask;
ieCtx.attemptCnt = 0;
ieCtx.initReqMsgSent = new InitializeRequestMsg(
getBaseDN(), getServerId(), source, getInitWindow());
broker.publish(ieCtx.initReqMsgSent);
/*
The normal success processing is now to receive InitTargetMsg then
entries from the remote server.
The error cases are :
- either local error immediately caught below
- a remote error we will receive as an ErrorMsg
*/
}
catch(DirectoryException de)
{
errMsg = de.getMessageObject();
}
catch(Exception e)
{
// Should not happen
errMsg = LocalizableMessage.raw(e.getLocalizedMessage());
logger.error(errMsg);
}
// When error, update the task and raise the error to the caller
if (errMsg != null)
{
// No need to call here updateTaskCompletionState - will be done
// by the caller
releaseIEContext();
throw new DirectoryException(ResultCode.OTHER, errMsg);
}
}
/**
* Processes an InitializeTargetMsg received from a remote server
* meaning processes an initialization from the entries expected to be
* received now.
*
* @param initTargetMsgReceived The message received from the remote server.
*
* @param requesterServerId The serverId of the server that requested the
* initialization meaning the server where the
* task has initially been created (this server,
* or the remote server).
*/
private void initialize(InitializeTargetMsg initTargetMsgReceived, int requesterServerId)
{
if (logger.isTraceEnabled())
{
logger.trace("[IE] Entering initialize - domain=" + this);
}
InitializeTask initFromTask = null;
int source = initTargetMsgReceived.getSenderID();
ImportExportContext ieCtx = importExportContext.get();
try
{
// Log starting
logger.info(NOTE_FULL_UPDATE_ENGAGED_FROM_REMOTE_START, getBaseDN(),
initTargetMsgReceived.getSenderID(), getServerId());
// Go into full update status
setNewStatus(StatusMachineEvent.TO_FULL_UPDATE_STATUS_EVENT);
// Acquire an import context if no already done (and initialize).
if (initTargetMsgReceived.getInitiatorID() != getServerId())
{
/*
The initTargetMsgReceived is for an import initiated by the remote server.
Test and set if no import already in progress
*/
ieCtx = acquireIEContext(true);
}
// Initialize stuff
ieCtx.importSource = source;
ieCtx.initializeCounters(initTargetMsgReceived.getEntryCount());
ieCtx.initWindow = initTargetMsgReceived.getInitWindow();
ieCtx.exporterProtocolVersion = getProtocolVersion(source);
initFromTask = (InitializeTask) ieCtx.initializeTask;
// Launch the import
importBackend(new ReplInputStream(this));
}
catch (DirectoryException e)
{
/*
Store the exception raised. It will be considered if no other exception
has been previously stored in the context
*/
ieCtx.setExceptionIfNoneSet(e);
}
finally
{
if (logger.isTraceEnabled())
{
logger.trace("[IE] Domain=" + this
+ " ends import with exception=" + ieCtx.getException()
+ " connected=" + broker.isConnected());
}
/*
It is necessary to restart (reconnect to RS) for different reasons
- when everything went well, reconnect in order to exchange
new state, new generation ID
- when we have connection failure, reconnect to retry a new import
right here, right now
we never want retryOnFailure if we fails reconnecting in the restart.
*/
broker.reStart(false);
if (ieCtx.getException() != null
&& broker.isConnected()
&& initFromTask != null
&& ++ieCtx.attemptCnt < 2)
{
/*
Worth a new attempt
since initFromTask is in this server, connection is ok
*/
try
{
/*
Wait for the exporter to stabilize - eventually reconnect as
well if it was connected to the same RS than the one we lost ...
*/
Thread.sleep(1000);
/*
Restart the whole import protocol exchange by sending again
the request
*/
logger.info(NOTE_RESENDING_INIT_FROM_REMOTE_REQUEST,
ieCtx.getException().getLocalizedMessage());
broker.publish(ieCtx.initReqMsgSent);
ieCtx.initializeCounters(0);
ieCtx.exception = null;
ieCtx.msgCnt = 0;
// Processing of the received initTargetMsgReceived is done
// let's wait for the next one
return;
}
catch(Exception e)
{
/*
An error occurs when sending a new request for a new import.
This error is not stored, preferring to keep the initial one.
*/
logger.error(ERR_SENDING_NEW_ATTEMPT_INIT_REQUEST,
e.getLocalizedMessage(), ieCtx.getException().getLocalizedMessage());
}
}
// ===================
// No new attempt case
if (logger.isTraceEnabled())
{
logger.trace("[IE] Domain=" + this
+ " ends initialization with exception=" + ieCtx.getException()
+ " connected=" + broker.isConnected()
+ " task=" + initFromTask
+ " attempt=" + ieCtx.attemptCnt);
}
try
{
if (broker.isConnected() && ieCtx.getException() != null)
{
// Let's notify the exporter
ErrorMsg errorMsg = new ErrorMsg(requesterServerId,
ieCtx.getException().getMessageObject());
broker.publish(errorMsg);
}
/*
Update the task that initiated the import must be the last thing.
Particularly, broker.restart() after import success must be done
before some other operations/tasks to be launched,
like resetting the generation ID.
*/
if (initFromTask != null)
{
initFromTask.updateTaskCompletionState(ieCtx.getException());
}
}
finally
{
String errorMsg = ieCtx.getException() != null ? ieCtx.getException().getLocalizedMessage() : "";
logger.info(NOTE_FULL_UPDATE_ENGAGED_FROM_REMOTE_END,
getBaseDN(), initTargetMsgReceived.getSenderID(), getServerId(), errorMsg);
releaseIEContext();
} // finally
} // finally
}
/**
* Return the protocol version of the DS related to the provided serverId.
* Returns -1 when the protocol version is not known.
* @param dsServerId The provided serverId.
* @return The protocol version.
*/
private short getProtocolVersion(int dsServerId)
{
final DSInfo dsInfo = getReplicaInfos().get(dsServerId);
if (dsInfo != null)
{
return dsInfo.getProtocolVersion();
}
return -1;
}
/**
* Sets the status to a new value depending of the passed status machine
* event.
* @param event The event that may make the status be changed
*/
protected void signalNewStatus(StatusMachineEvent event)
{
setNewStatus(event);
broker.signalStatusChange(status);
}
private void setNewStatus(StatusMachineEvent event)
{
ServerStatus newStatus = StatusMachine.computeNewStatus(status, event);
if (newStatus == ServerStatus.INVALID_STATUS)
{
logger.error(ERR_DS_CANNOT_CHANGE_STATUS, getBaseDN(), getServerId(), status, event);
return;
}
if (newStatus != status)
{
// Reset status date
lastStatusChangeDate = new Date();
// Reset monitoring counters if reconnection
if (newStatus == ServerStatus.NOT_CONNECTED_STATUS)
{
resetMonitoringCounters();
}
status = newStatus;
if (logger.isTraceEnabled())
{
logger.trace("Replication domain " + getBaseDN()
+ " new status is: " + status);
}
// Perform whatever actions are needed to apply properties for being
// compliant with new status
updateDomainForNewStatus();
}
}
/**
* Returns a boolean indicating if an import or export is currently
* processed.
*
* @return The status
*/
public boolean ieRunning()
{
return importExportContext.get() != null;
}
/**
* Check the value of the Replication Servers generation ID.
*
* @param generationID The expected value of the generation ID.
*
* @throws DirectoryException When the generation ID of the Replication
* Servers is not the expected value.
*/
private void checkGenerationID(long generationID) throws DirectoryException
{
boolean allSet = true;
for (int i = 0; i< 50; i++)
{
allSet = true;
for (RSInfo rsInfo : getRsInfos())
{
// the 'empty' RSes (generationId==-1) are considered as good citizens
if (rsInfo.getGenerationId() != -1 &&
rsInfo.getGenerationId() != generationID)
{
try
{
Thread.sleep(i*100);
} catch (InterruptedException e)
{
Thread.currentThread().interrupt();
}
allSet = false;
break;
}
}
if (allSet)
{
break;
}
}
if (!allSet)
{
LocalizableMessage message = ERR_RESET_GENERATION_ID_FAILED.get(getBaseDN());
throw new DirectoryException(ResultCode.OTHER, message);
}
}
/**
* Reset the Replication Log.
* Calling this method will remove all the Replication information that
* was kept on all the Replication Servers currently connected in the
* topology.
*
* @throws DirectoryException If this ReplicationDomain is not currently
* connected to a Replication Server or it
* was not possible to contact it.
*/
void resetReplicationLog() throws DirectoryException
{
// Reset the Generation ID to -1 to clean the ReplicationServers.
resetGenerationId(-1L);
// check that at least one ReplicationServer did change its generation-id
checkGenerationID(-1);
// Reconnect to the Replication Server so that it adopts our GenerationID.
restartService();
// wait for the domain to reconnect.
int count = 0;
while (!isConnected() && count < 10)
{
try
{
Thread.sleep(100);
} catch (InterruptedException e)
{
Thread.currentThread().interrupt();
}
}
resetGenerationId(getGenerationID());
// check that at least one ReplicationServer did change its generation-id
checkGenerationID(getGenerationID());
}
/**
* Reset the generationId of this domain in the whole topology.
* A message is sent to the Replication Servers for them to reset
* their change dbs.
*
* @param generationIdNewValue The new value of the generation Id.
* @throws DirectoryException When an error occurs
*/
public void resetGenerationId(Long generationIdNewValue)
throws DirectoryException
{
if (logger.isTraceEnabled())
{
logger.trace("Server id " + getServerId() + " and domain "
+ getBaseDN() + " resetGenerationId " + generationIdNewValue);
}
ResetGenerationIdMsg genIdMessage =
new ResetGenerationIdMsg(getGenId(generationIdNewValue));
if (!isConnected())
{
LocalizableMessage message = ERR_RESET_GENERATION_CONN_ERR_ID.get(getBaseDN(),
getServerId(), genIdMessage.getGenerationId());
throw new DirectoryException(ResultCode.OTHER, message);
}
broker.publish(genIdMessage);
// check that at least one ReplicationServer did change its generation-id
checkGenerationID(getGenId(generationIdNewValue));
}
private long getGenId(Long generationIdNewValue)
{
if (generationIdNewValue != null)
{
return generationIdNewValue;
}
return getGenerationID();
}
/*
******** End of The total Update code *********
*/
/*
******* Start of Monitoring Code **********
*/
/**
* Get the maximum receive window size.
*
* @return The maximum receive window size.
*/
int getMaxRcvWindow()
{
if (broker != null)
{
return broker.getMaxRcvWindow();
}
return 0;
}
/**
* Get the current receive window size.
*
* @return The current receive window size.
*/
int getCurrentRcvWindow()
{
if (broker != null)
{
return broker.getCurrentRcvWindow();
}
return 0;
}
/**
* Get the maximum send window size.
*
* @return The maximum send window size.
*/
int getMaxSendWindow()
{
if (broker != null)
{
return broker.getMaxSendWindow();
}
return 0;
}
/**
* Get the current send window size.
*
* @return The current send window size.
*/
int getCurrentSendWindow()
{
if (broker != null)
{
return broker.getCurrentSendWindow();
}
return 0;
}
/**
* Get the number of times the replication connection was lost.
* @return The number of times the replication connection was lost.
*/
int getNumLostConnections()
{
if (broker != null)
{
return broker.getNumLostConnections();
}
return 0;
}
/**
* Determine whether the connection to the replication server is encrypted.
* @return true if the connection is encrypted, false otherwise.
*/
boolean isSessionEncrypted()
{
return broker != null && broker.isSessionEncrypted();
}
/**
* Check if the domain is connected to a ReplicationServer.
*
* @return true if the server is connected, false if not.
*/
public boolean isConnected()
{
return broker != null && broker.isConnected();
}
/**
* Check if the domain has a connection error.
* A Connection error happens when the broker could not be created
* or when the broker could not find any ReplicationServer to connect to.
*
* @return true if the domain has a connection error.
*/
public boolean hasConnectionError()
{
return broker == null || broker.hasConnectionError();
}
/**
* Get the name of the replicationServer to which this domain is currently
* connected.
*
* @return the name of the replicationServer to which this domain
* is currently connected.
*/
public String getReplicationServer()
{
if (broker != null)
{
return broker.getReplicationServer();
}
return ReplicationBroker.NO_CONNECTED_SERVER;
}
/**
* Gets the number of updates sent in assured safe read mode.
* @return The number of updates sent in assured safe read mode.
*/
public int getAssuredSrSentUpdates()
{
return assuredSrSentUpdates.get();
}
/**
* Gets the number of updates sent in assured safe read mode that have been
* acknowledged without errors.
* @return The number of updates sent in assured safe read mode that have been
* acknowledged without errors.
*/
public int getAssuredSrAcknowledgedUpdates()
{
return assuredSrAcknowledgedUpdates.get();
}
/**
* Gets the number of updates sent in assured safe read mode that have not
* been acknowledged.
* @return The number of updates sent in assured safe read mode that have not
* been acknowledged.
*/
public int getAssuredSrNotAcknowledgedUpdates()
{
return assuredSrNotAcknowledgedUpdates.get();
}
/**
* Gets the number of updates sent in assured safe read mode that have not
* been acknowledged due to timeout error.
* @return The number of updates sent in assured safe read mode that have not
* been acknowledged due to timeout error.
*/
public int getAssuredSrTimeoutUpdates()
{
return assuredSrTimeoutUpdates.get();
}
/**
* Gets the number of updates sent in assured safe read mode that have not
* been acknowledged due to wrong status error.
* @return The number of updates sent in assured safe read mode that have not
* been acknowledged due to wrong status error.
*/
public int getAssuredSrWrongStatusUpdates()
{
return assuredSrWrongStatusUpdates.get();
}
/**
* Gets the number of updates sent in assured safe read mode that have not
* been acknowledged due to replay error.
* @return The number of updates sent in assured safe read mode that have not
* been acknowledged due to replay error.
*/
public int getAssuredSrReplayErrorUpdates()
{
return assuredSrReplayErrorUpdates.get();
}
/**
* Gets the number of updates sent in assured safe read mode that have not
* been acknowledged per server.
* @return A copy of the map that contains the number of updates sent in
* assured safe read mode that have not been acknowledged per server.
*/
public Map
* After this method has been called, the Replication Service will start
* calling the {@link #processUpdate(UpdateMsg)}.
*
* This method must be called once and must be called after the
* {@link #startPublishService()}.
*/
public void startListenService()
{
synchronized (sessionLock)
{
if (listenerThread != null)
{
return;
}
final String threadName = "Replica DS(" + getServerId() + ") listener for domain \"" + getBaseDN() + "\"";
listenerThread = new DirectoryThread(new Runnable()
{
@Override
public void run()
{
if (logger.isTraceEnabled())
{
logger.trace("Replication Listener thread starting.");
}
// Loop processing any incoming update messages.
while (!listenerThread.isShutdownInitiated())
{
final UpdateMsg updateMsg = receive();
if (updateMsg == null)
{
// The server is shutting down.
listenerThread.initiateShutdown();
}
else if (processUpdate(updateMsg)
&& updateMsg.contributesToDomainState())
{
/*
* Warning: in synchronous mode, no way to tell the replay of an
* update went wrong Just put null in processUpdateDone so that if
* assured replication is used the ack is sent without error at
* replay flag.
*/
processUpdateDone(updateMsg, null);
state.update(updateMsg.getCSN());
}
}
if (logger.isTraceEnabled())
{
logger.trace("Replication Listener thread stopping.");
}
}
}, threadName);
listenerThread.start();
}
}
/**
* Temporarily disable the Replication Service.
* The Replication Service can be enabled again using
* {@link #enableService()}.
*
* It can be useful to disable the Replication Service when the
* repository where the replicated information is stored becomes
* temporarily unavailable and replicated updates can therefore not
* be replayed during a while. This method is not MT safe.
*/
public void disableService()
{
synchronized (sessionLock)
{
/*
Stop the broker first in order to prevent the listener from
reconnecting - see OPENDJ-457.
*/
if (broker != null)
{
broker.stop();
}
// Stop the listener thread
if (listenerThread != null)
{
listenerThread.initiateShutdown();
try
{
listenerThread.join();
}
catch (InterruptedException e)
{
// Give up waiting.
}
listenerThread = null;
}
}
}
/**
* Returns {@code true} if the listener thread is shutting down or has
* shutdown.
*
* @return {@code true} if the listener thread is shutting down or has
* shutdown.
*/
protected final boolean isListenerShuttingDown()
{
final DirectoryThread tmp = listenerThread;
return tmp == null || tmp.isShutdownInitiated();
}
/**
* Restart the Replication service after a {@link #disableService()}.
*
* The Replication Service will restart from the point indicated by the
* {@link ServerState} that was given as a parameter to the
* {@link #startPublishService()} at startup time.
*
* If some data have changed in the repository during the period of time when
* the Replication Service was disabled, this {@link ServerState} should
* therefore be updated by the Replication Domain subclass before calling this
* method. This method is not MT safe.
*/
public void enableService()
{
synchronized (sessionLock)
{
broker.start();
startListenService();
}
}
/**
* Change some ReplicationDomain parameters.
*
* @param config
* The new configuration that this domain should now use.
*/
protected void changeConfig(ReplicationDomainCfg config)
{
if (broker != null && broker.changeConfig(config))
{
restartService();
}
}
/**
* Applies a configuration change to the attributes which should be included
* in the ECL.
*
* @param includeAttributes
* attributes to be included with all change records.
* @param includeAttributesForDeletes
* additional attributes to be included with delete change records.
*/
public void changeConfig(Set
* This method will be called by a single thread and should therefore should
* not be blocking.
*
* @param updateMsg
* The {@link UpdateMsg} that was received.
* @return A boolean indicating if the processing is completed at return time.
* If
* It is useful for implementation needing to process the update in an
* asynchronous way or using several threads, but must be called even by
* implementation doing it in a synchronous, single-threaded way.
*
* @param msg
* The UpdateMsg whose processing was completed.
* @param replayErrorMsg
* if not null, this means an error occurred during the replay of
* this update, and this is the matching human readable message
* describing the problem.
*/
protected void processUpdateDone(UpdateMsg msg, String replayErrorMsg)
{
broker.updateWindowAfterReplay();
/*
Send an ack if it was requested and the group id is the same of the RS
one. Only Safe Read mode makes sense in DS for returning an ack.
*/
// Assured feature is supported starting from replication protocol V2
if (msg.isAssured()
&& broker.getProtocolVersion() >= ProtocolVersion.REPLICATION_PROTOCOL_V2)
{
if (msg.getAssuredMode() == AssuredMode.SAFE_READ_MODE)
{
if (broker.getRsGroupId() == getGroupId())
{
// Send the ack
AckMsg ackMsg = new AckMsg(msg.getCSN());
if (replayErrorMsg != null)
{
// Mark the error in the ack
// -> replay error occurred
ackMsg.setHasReplayError(true);
// -> replay error occurred in our server
ackMsg.setFailedServers(newArrayList(getServerId()));
}
broker.publish(ackMsg);
if (replayErrorMsg != null)
{
assuredSrReceivedUpdatesNotAcked.incrementAndGet();
}
else
{
assuredSrReceivedUpdatesAcked.incrementAndGet();
}
}
}
else if (getAssuredMode() != AssuredMode.SAFE_DATA_MODE)
{
logger.error(ERR_DS_UNKNOWN_ASSURED_MODE, getServerId(), msg.getAssuredMode(), getBaseDN(), msg);
}
// Nothing to do in Assured safe data mode, only RS ack updates.
}
incProcessedUpdates();
}
/**
* Prepare a message if it is to be sent in assured mode.
* If the assured mode is enabled, this method should be called before
* publish(UpdateMsg msg) method. This will configure the update accordingly
* before it is sent and will prepare the mechanism that will block until the
* matching ack is received. To wait for the ack after publish call, use
* the waitForAckIfAssuredEnabled() method.
* The expected typical usage in a service inheriting from this class is
* the following sequence:
* UpdateMsg msg = xxx;
* prepareWaitForAckIfAssuredEnabled(msg);
* publish(msg);
* waitForAckIfAssuredEnabled(msg);
*
* Note: prepareWaitForAckIfAssuredEnabled and waitForAckIfAssuredEnabled have
* no effect if assured replication is disabled.
* Note: this mechanism should not be used if using publish(byte[] msg)
* version as usage of these methods is already hidden inside.
*
* @param msg The update message to be sent soon.
*/
protected void prepareWaitForAckIfAssuredEnabled(UpdateMsg msg)
{
/*
* If assured configured, set message accordingly to request an ack in the
* right assured mode.
* No ack requested for a RS with a different group id.
* Assured replication supported for the same locality,
* i.e: a topology working in the same geographical location).
* If we are connected to a RS which is not in our locality,
* no need to ask for an ack.
*/
if (needsAck())
{
msg.setAssured(true);
msg.setAssuredMode(getAssuredMode());
if (getAssuredMode() == AssuredMode.SAFE_DATA_MODE)
{
msg.setSafeDataLevel(getAssuredSdLevel());
}
// Add the assured message to the list of update that are waiting for acks
waitingAckMsgs.put(msg.getCSN(), msg);
}
}
private boolean needsAck()
{
return isAssured() && broker.getRsGroupId() == getGroupId();
}
/**
* Wait for the processing of an assured message after it has been sent, if
* assured replication is configured, otherwise, do nothing.
* The prepareWaitForAckIfAssuredEnabled method should have been called
* before, see its comment for the full picture.
*
* @param msg The UpdateMsg for which we are waiting for an ack.
* @throws TimeoutException When the configured timeout occurs waiting for the
* ack.
*/
protected void waitForAckIfAssuredEnabled(UpdateMsg msg)
throws TimeoutException
{
if (needsAck())
{
// Increment assured replication monitoring counters
switch (getAssuredMode())
{
case SAFE_READ_MODE:
assuredSrSentUpdates.incrementAndGet();
break;
case SAFE_DATA_MODE:
assuredSdSentUpdates.incrementAndGet();
break;
default:
// Should not happen
}
} else
{
// Not assured or bad group id, return immediately
return;
}
// Wait for the ack to be received, timing out if necessary
long startTime = System.currentTimeMillis();
synchronized (msg)
{
CSN csn = msg.getCSN();
while (waitingAckMsgs.containsKey(csn))
{
try
{
/*
WARNING: this timeout may be difficult to optimize: too low, it
may use too much CPU, too high, it may penalize performance...
*/
msg.wait(10);
} catch (InterruptedException e)
{
if (logger.isTraceEnabled())
{
logger.trace("waitForAck method interrupted for replication " +
"baseDN: " + getBaseDN());
}
break;
}
// Timeout ?
if (System.currentTimeMillis() - startTime >= getAssuredTimeout())
{
/*
Timeout occurred, be sure that ack is not being received and if so,
remove the update from the wait list, log the timeout error and
also update assured monitoring counters
*/
final UpdateMsg update = waitingAckMsgs.remove(csn);
if (update == null)
{
// Ack received just before timeout limit: we can exit
break;
}
// No luck, this is a real timeout
// Increment assured replication monitoring counters
switch (msg.getAssuredMode())
{
case SAFE_READ_MODE:
assuredSrNotAcknowledgedUpdates.incrementAndGet();
assuredSrTimeoutUpdates.incrementAndGet();
// Increment number of errors for our RS
updateAssuredErrorsByServer(assuredSrServerNotAcknowledgedUpdates,
broker.getRsServerId());
break;
case SAFE_DATA_MODE:
assuredSdTimeoutUpdates.incrementAndGet();
// Increment number of errors for our RS
updateAssuredErrorsByServer(assuredSdServerTimeoutUpdates,
broker.getRsServerId());
break;
default:
// Should not happen
}
throw new TimeoutException("No ack received for message csn: " + csn
+ " and replication domain: " + getBaseDN() + " after "
+ getAssuredTimeout() + " ms.");
}
}
}
}
/**
* Publish an {@link UpdateMsg} to the Replication Service.
*
* The Replication Service will handle the delivery of this {@link UpdateMsg}
* to all the participants of this Replication Domain. These members will be
* receive this {@link UpdateMsg} through a call of the
* {@link #processUpdate(UpdateMsg)} message.
*
* @param msg The UpdateMsg that should be published.
*/
public void publish(UpdateMsg msg)
{
broker.publish(msg);
if (msg.contributesToDomainState())
{
state.update(msg.getCSN());
}
numSentUpdates.incrementAndGet();
}
/**
* Publishes a replica offline message if all pending changes for current
* replica have been sent out.
*/
public void publishReplicaOfflineMsg()
{
// Here to be overridden
}
/**
* This method should return the generationID to use for this
* ReplicationDomain.
* This method can be called at any time after the ReplicationDomain
* has been started.
*
* @return The GenerationID.
*/
public long getGenerationID()
{
return generationId;
}
/**
* Sets the generationId for this replication domain.
*
* @param generationId
* the generationId to set
*/
public void setGenerationID(long generationId)
{
this.generationId = generationId;
}
/**
* Subclasses should use this method to add additional monitoring information
* in the ReplicationDomain.
*
* @return Additional monitoring attributes that will be added in the
* ReplicationDomain monitoring entry.
*/
public Collection true is returned, no further processing is
* necessary. If false is returned, the subclass should
* call the method {@link #processUpdateDone(UpdateMsg, String)} and
* update the ServerState When this processing is complete.
*/
public abstract boolean processUpdate(UpdateMsg updateMsg);
/**
* This method must be called after each call to
* {@link #processUpdate(UpdateMsg)} when the processing of the
* update is completed.
*