Fix for 2963: dsreplication status large value for missing changes
dsreplication status sometimes returns very unexpected high values. This
is due to the fact that the replication server monitoring algorithm
sometimes does not compute well the max server state available in the
whole topology and thus is sometimes calling
ChangeNumber.diffSeqNum(CN1,CN2) with a CN1 lower than CN2.
| | |
| | | { |
| | | Date date = new Date(timeStamp); |
| | | return String.format( |
| | | "%016x%04x%08x (%s,%d,%d)", |
| | | "%016x%04x%08x (sid=%d,tsd=%s,ts=%d,seqnum=%d)", |
| | | timeStamp, serverId, seqnum, |
| | | date.toString(), serverId, seqnum); |
| | | serverId, date.toString(), timeStamp, seqnum); |
| | | } |
| | | |
| | | /** |
| | |
| | | * @param op2 the second ChangeNumber |
| | | * @return the difference |
| | | */ |
| | | public static int diffSeqNum(ChangeNumber op1, ChangeNumber op2) |
| | | public static int diffSeqNum(ChangeNumber op1, ChangeNumber op2) |
| | | { |
| | | int totalCount = 0; |
| | | int max = op1.getSeqnum(); |
| | | long maxTimeStamp = op1.getTime(); |
| | | if (op1 != null) |
| | | if (op1 == null) |
| | | { |
| | | if (op2 != null) |
| | | return 0; |
| | | } |
| | | if (op2 == null) |
| | | { |
| | | return op1.getSeqnum(); |
| | | } |
| | | if (op2.newerOrEquals(op1)) |
| | | { |
| | | return 0; |
| | | } |
| | | |
| | | int seqnum1 = op1.getSeqnum(); |
| | | long time1 = op1.getTime(); |
| | | int seqnum2 = op2.getSeqnum(); |
| | | long time2 = op2.getTime(); |
| | | |
| | | if (time2 <= time1) |
| | | { |
| | | if (seqnum2 <= seqnum1) |
| | | { |
| | | int current = op2.getSeqnum(); |
| | | long currentTimestamp = op2.getTime(); |
| | | if (current != max) |
| | | { |
| | | if (currentTimestamp <= maxTimeStamp) |
| | | { |
| | | if (current < max) |
| | | { |
| | | totalCount += max - current; |
| | | } else |
| | | { |
| | | totalCount += Integer.MAX_VALUE - (current - max) + 1; |
| | | } |
| | | } |
| | | } |
| | | return seqnum1 - seqnum2; |
| | | } else |
| | | { |
| | | totalCount += max; |
| | | return Integer.MAX_VALUE - (seqnum2 - seqnum1) + 1; |
| | | } |
| | | } else |
| | | { |
| | | return 0; |
| | | } |
| | | return totalCount; |
| | | } |
| | | |
| | | /** |
| | |
| | | { |
| | | ChangeNumber change = list.get(key); |
| | | buffer.append(" "); |
| | | buffer.append(change); |
| | | buffer.append(change.toStringUI()); |
| | | } |
| | | } |
| | | |
| | |
| | | "+ diff("+lsjMaxCN+"-" |
| | | +lsiLastCN+")="+missingChangesLsiLsj; |
| | | |
| | | // Regarding a DS that is generating changes. If it is a local DS1, |
| | | // we get its server state, store it, then retrieve server states of |
| | | // remote DSs. When a remote server state is coming, it may contain |
| | | // a change number for DS1 which is newer than the one we locally |
| | | // stored in the server state of DS1. To prevent seeing DS1 has |
| | | // missing changes whereas it is wrong, we replace the value with 0 |
| | | // if it is a low value. We cannot overwrite big values as they may be |
| | | // useful for a local server retrieving changes it generated earlier, |
| | | // when it is recovering from an old snapshot and the local RS is |
| | | // sending him the changes it is missing. |
| | | if (lsjSid.equals(lsiSid)) { |
| | | if (missingChangesLsiLsj <= 50) |
| | | { |
| | | missingChangesLsiLsj = 0; |
| | | mds += " (diff replaced by 0 as for server id " + lsiSid + ")"; |
| | | } |
| | | } |
| | | |
| | | lsiMissingChanges += missingChangesLsiLsj; |
| | | } |
| | | } |
| | |
| | | "Complete monitor data : Missing changes ("+ lsiSid +")=" + mds); |
| | | } |
| | | this.setBuildDate(TimeThread.getTime()); |
| | | } |
| | | } |
| | | |
| | | /** |
| | | * Returns a <code>String</code> object representing this |
| | |
| | | while (rsite.hasNext()) |
| | | { |
| | | Short sid = rsite.next(); |
| | | mds += "\nRSData(" + sid + ")=\t "+ "afmd=" + fmRSDate.get(sid); |
| | | mds += "\nfmRSDate(" + sid + ")=\t "+ "afmd=" + fmRSDate.get(sid); |
| | | } |
| | | |
| | | // maxCNs |
| | |
| | | { |
| | | Short sid = itc.next(); |
| | | ChangeNumber cn = maxCNs.get(sid); |
| | | mds += "\nmaxCNs(" + sid + ")= " + cn.toString(); |
| | | mds += "\nmaxCNs(" + sid + ")= " + cn.toStringUI(); |
| | | } |
| | | |
| | | // LDAP data |
| | |
| | | } |
| | | mds +=" missingCount=" + missingChanges.get(sid); |
| | | } |
| | | |
| | | // RS data |
| | | rsite = RSStates.keySet().iterator(); |
| | | while (rsite.hasNext()) |
| | | { |
| | | Short sid = rsite.next(); |
| | | ServerState ss = RSStates.get(sid); |
| | | mds += "\nRSData(" + sid + ")=\t" + "state=[" + ss.toString() |
| | | + "] missingCount=" + missingChangesRS.get(sid); |
| | | } |
| | | |
| | | // |
| | | mds += "--"; |
| | | mds += "\n--"; |
| | | return mds; |
| | | } |
| | | |
| | |
| | | // - from our own local db state |
| | | // - whatever they are directly or undirectly connected |
| | | ServerState dbServerState = getDbServerState(); |
| | | wrkMonitorData.setRSState(replicationServer.getServerId(), dbServerState); |
| | | Iterator<Short> it = dbServerState.iterator(); |
| | | while (it.hasNext()) |
| | | { |
| | |
| | | while (lsidIterator.hasNext()) |
| | | { |
| | | short sid = lsidIterator.next(); |
| | | wrkMonitorData.setLDAPServerState(sid, |
| | | msg.getLDAPServerState(sid).duplicate()); |
| | | ServerState dsServerState = msg.getLDAPServerState(sid); |
| | | wrkMonitorData.setMaxCNs(dsServerState); |
| | | wrkMonitorData.setLDAPServerState(sid, dsServerState); |
| | | wrkMonitorData.setFirstMissingDate(sid, |
| | | msg.getLDAPApproxFirstMissingDate(sid)); |
| | | } |
| | |
| | | // 3-0 = 3 |
| | | CN2 = new ChangeNumber((long)0, 0, (short)0); |
| | | assertEquals(ChangeNumber.diffSeqNum(CN1, CN2), 3); |
| | | |
| | | |
| | | // 3-1 = 2 |
| | | CN2 = new ChangeNumber((long)0, 1, (short)0); |
| | | assertEquals(ChangeNumber.diffSeqNum(CN1, CN2), 2); |
| | | |
| | | |
| | | // 3-3 = 0 |
| | | CN2 = new ChangeNumber((long)0, 3, (short)0); |
| | | assertEquals(ChangeNumber.diffSeqNum(CN1, CN2), 0); |
| | | |
| | | // 3-4 == MAXINT (modulo) |
| | | // 3-4 = 0 (CN1 must be newer otherwise 0 should be returned) |
| | | CN2 = new ChangeNumber((long)0, 4, (short)0); |
| | | assertEquals(ChangeNumber.diffSeqNum(CN1, CN2), Integer.MAX_VALUE); |
| | | assertEquals(ChangeNumber.diffSeqNum(CN1, CN2), 0); |
| | | |
| | | CN1 = new ChangeNumber((long)0, 0, (short)0); |
| | | |
| | | // 0-0 = 0 |
| | | CN2 = new ChangeNumber((long)0, 0, (short)0); |
| | | assertEquals(ChangeNumber.diffSeqNum(CN1, CN2), 0); |
| | | |
| | | // 0-1 = MAXINT(modulo) |
| | | |
| | | // 0-1 = 0 (CN1 must be newer otherwise 0 should be returned) |
| | | CN2 = new ChangeNumber((long)0, 1, (short)0); |
| | | assertEquals(ChangeNumber.diffSeqNum(CN1, CN2), Integer.MAX_VALUE); |
| | | assertEquals(ChangeNumber.diffSeqNum(CN1, CN2), 0); |
| | | |
| | | CN1 = new ChangeNumber((long)0, 5, (short)0); |
| | | CN2 = new ChangeNumber((long)0, 2, (short)0); |
| | | |
| | | // 5-null = 5 |
| | | assertEquals(ChangeNumber.diffSeqNum(CN1, null), 5); |
| | | |
| | | // null-2 = 0 |
| | | assertEquals(ChangeNumber.diffSeqNum(null, CN2), 0); |
| | | |
| | | // null-null = 0 |
| | | assertEquals(ChangeNumber.diffSeqNum(null, null), 0); |
| | | |
| | | CN1 = new ChangeNumber((long)1111111, 2, (short)0); |
| | | CN2 = new ChangeNumber((long)3333333, 4, (short)0); |
| | | |
| | | // CN1 older than CN2 -> 0 |
| | | assertEquals(ChangeNumber.diffSeqNum(CN1, CN2), 0); |
| | | |
| | | CN1 = new ChangeNumber((long)3333333, 1, (short)0); |
| | | CN2 = new ChangeNumber((long)1111111, Integer.MAX_VALUE-1, (short)0); |
| | | |
| | | // CN1 seqnum looped |
| | | assertEquals(ChangeNumber.diffSeqNum(CN1, CN2), 3); |
| | | } |
| | | } |