Skip to content

Commit e68ffb9

Browse files
jiajunwangJiajun Wang
authored andcommitted
Refactor soft constraints to simply the algorithm and fix potential issues. (#520)
* Refactor soft constraints to simply the algorithm and fix potential issues. 1. Check for zero weight so as to avoid unnecessary calculations. 2. Simply the soft constraint interfaces and implementations. Avoid duplicate code. 3. Adjust partition movements constraint logic to reduce the chance of moving partition when the baseline and best possible assignment diverge. 4. Estimate utilization in addition to the other usage estimation. The estimation will be used as a base when calculating the capacity usage score. This is to ensure the algorithm treats different clusters with different overall usage in the same way. 5. Fix the issue that high utilization calculation does not consider the current proposed replica usage. 6. Use Sigmoid to calculate usage-based soft constraints score. This enhances the assignment result of the algorithm. 7. Adjust the related test cases.
1 parent 7af2a4a commit e68ffb9

27 files changed

Lines changed: 330 additions & 240 deletions

helix-core/pom.xml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ under the License.
3737
org.I0Itec.zkclient*,
3838
org.apache.commons.cli*;version="[1.2,2)",
3939
org.apache.commons.io*;version="[1.4,2)",
40-
org.apache.commons.math*;version="[2.1,3)",
40+
org.apache.commons.math*;version="[2.1,4)",
4141
org.apache.jute*;resolution:=optional,
4242
org.apache.zookeeper.server.persistence*;resolution:=optional,
4343
org.apache.zookeeper.server.util*;resolution:=optional,
@@ -139,6 +139,11 @@ under the License.
139139
<artifactId>commons-math</artifactId>
140140
<version>2.1</version>
141141
</dependency>
142+
<dependency>
143+
<groupId>org.apache.commons</groupId>
144+
<artifactId>commons-math3</artifactId>
145+
<version>3.6.1</version>
146+
</dependency>
142147
<dependency>
143148
<groupId>commons-codec</groupId>
144149
<artifactId>commons-codec</artifactId>

helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,19 +109,22 @@ private Optional<AssignableNode> getNodeWithHighestPoints(AssignableReplica repl
109109
return Optional.empty();
110110
}
111111

112-
Function<AssignableNode, Float> calculatePoints =
112+
Function<AssignableNode, Double> calculatePoints =
113113
(candidateNode) -> getAssignmentNormalizedScore(candidateNode, replica, clusterContext);
114114

115115
return candidateNodes.stream().max(Comparator.comparing(calculatePoints));
116116
}
117117

118-
private float getAssignmentNormalizedScore(AssignableNode node, AssignableReplica replica,
118+
private double getAssignmentNormalizedScore(AssignableNode node, AssignableReplica replica,
119119
ClusterContext clusterContext) {
120-
float sum = 0;
120+
double sum = 0;
121121
for (Map.Entry<SoftConstraint, Float> softConstraintEntry : _softConstraints.entrySet()) {
122122
SoftConstraint softConstraint = softConstraintEntry.getKey();
123123
float weight = softConstraintEntry.getValue();
124-
sum += weight * softConstraint.getAssignmentNormalizedScore(node, replica, clusterContext);
124+
if (weight != 0) {
125+
// Skip calculating zero weighted constraints.
126+
sum += weight * softConstraint.getAssignmentNormalizedScore(node, replica, clusterContext);
127+
}
125128
}
126129
return sum;
127130
}

helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithmFactory.java

Lines changed: 14 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -24,31 +24,25 @@
2424
import java.util.Map;
2525
import java.util.Properties;
2626

27+
import com.google.common.collect.ImmutableList;
28+
import com.google.common.collect.Maps;
2729
import org.apache.helix.HelixManagerProperties;
2830
import org.apache.helix.SystemPropertyKeys;
2931
import org.apache.helix.controller.rebalancer.waged.RebalanceAlgorithm;
3032
import org.apache.helix.model.ClusterConfig;
3133

32-
import com.google.common.collect.ImmutableList;
33-
import com.google.common.collect.Maps;
34-
3534
/**
3635
* The factory class to create an instance of {@link ConstraintBasedAlgorithm}
3736
*/
3837
public class ConstraintBasedAlgorithmFactory {
39-
// Evenness constraints tend to score within a smaller range.
40-
// In order to let their scores cause enough difference in the final evaluation result, we need to
41-
// enlarge the overall weight of the evenness constraints compared with the movement constraint.
42-
// TODO: Tune or make the following factor configurable.
43-
private static final int EVENNESS_PREFERENCE_NORMALIZE_FACTOR = 50;
4438
private static final Map<String, Float> MODEL = new HashMap<String, Float>() {
4539
{
4640
// The default setting
47-
put(PartitionMovementConstraint.class.getSimpleName(), 1f);
48-
put(InstancePartitionsCountConstraint.class.getSimpleName(), 0.3f);
49-
put(ResourcePartitionAntiAffinityConstraint.class.getSimpleName(), 0.1f);
50-
put(ResourceTopStateAntiAffinityConstraint.class.getSimpleName(), 0.1f);
51-
put(MaxCapacityUsageInstanceConstraint.class.getSimpleName(), 0.5f);
41+
put(PartitionMovementConstraint.class.getSimpleName(), 2f);
42+
put(InstancePartitionsCountConstraint.class.getSimpleName(), 1f);
43+
put(ResourcePartitionAntiAffinityConstraint.class.getSimpleName(), 1f);
44+
put(ResourceTopStateAntiAffinityConstraint.class.getSimpleName(), 3f);
45+
put(MaxCapacityUsageInstanceConstraint.class.getSimpleName(), 5f);
5246
}
5347
};
5448

@@ -68,21 +62,19 @@ public static RebalanceAlgorithm getInstance(
6862
new ValidGroupTagConstraint(), new SamePartitionOnInstanceConstraint());
6963

7064
int evennessPreference =
71-
preferences.getOrDefault(ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS, 1)
72-
* EVENNESS_PREFERENCE_NORMALIZE_FACTOR;
65+
preferences.getOrDefault(ClusterConfig.GlobalRebalancePreferenceKey.EVENNESS, 1);
7366
int movementPreference =
7467
preferences.getOrDefault(ClusterConfig.GlobalRebalancePreferenceKey.LESS_MOVEMENT, 1);
75-
float evennessRatio = (float) evennessPreference / (evennessPreference + movementPreference);
76-
float movementRatio = (float) movementPreference / (evennessPreference + movementPreference);
7768

78-
List<SoftConstraint> softConstraints = ImmutableList.of(new PartitionMovementConstraint(),
79-
new InstancePartitionsCountConstraint(), new ResourcePartitionAntiAffinityConstraint(),
80-
new ResourceTopStateAntiAffinityConstraint(), new MaxCapacityUsageInstanceConstraint());
69+
List<SoftConstraint> softConstraints = ImmutableList
70+
.of(new PartitionMovementConstraint(), new InstancePartitionsCountConstraint(),
71+
new ResourcePartitionAntiAffinityConstraint(),
72+
new ResourceTopStateAntiAffinityConstraint(), new MaxCapacityUsageInstanceConstraint());
8173
Map<SoftConstraint, Float> softConstraintsWithWeight = Maps.toMap(softConstraints, key -> {
8274
String name = key.getClass().getSimpleName();
8375
float weight = MODEL.get(name);
84-
return name.equals(PartitionMovementConstraint.class.getSimpleName()) ? movementRatio * weight
85-
: evennessRatio * weight;
76+
return name.equals(PartitionMovementConstraint.class.getSimpleName()) ?
77+
movementPreference * weight : evennessPreference * weight;
8678
});
8779

8880
return new ConstraintBasedAlgorithm(hardConstraints, softConstraintsWithWeight);

helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/InstancePartitionsCountConstraint.java

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,20 +29,13 @@
2929
* Discourage the assignment if the instance's occupancy rate is above average
3030
* The normalized score will be within [0, 1]
3131
*/
32-
class InstancePartitionsCountConstraint extends SoftConstraint {
33-
private static final float MAX_SCORE = 1f;
34-
private static final float MIN_SCORE = 0f;
35-
36-
InstancePartitionsCountConstraint() {
37-
super(MAX_SCORE, MIN_SCORE);
38-
}
32+
class InstancePartitionsCountConstraint extends UsageSoftConstraint {
3933

4034
@Override
41-
protected float getAssignmentScore(AssignableNode node, AssignableReplica replica,
35+
protected double getAssignmentScore(AssignableNode node, AssignableReplica replica,
4236
ClusterContext clusterContext) {
43-
float doubleEstimatedMaxPartitionCount = 2 * clusterContext.getEstimatedMaxPartitionCount();
44-
float currentPartitionCount = node.getAssignedReplicaCount();
45-
return Math.max((doubleEstimatedMaxPartitionCount - currentPartitionCount)
46-
/ doubleEstimatedMaxPartitionCount, 0);
37+
int estimatedMaxPartitionCount = clusterContext.getEstimatedMaxPartitionCount();
38+
int currentPartitionCount = node.getAssignedReplicaCount();
39+
return computeUtilizationScore(estimatedMaxPartitionCount, currentPartitionCount);
4740
}
4841
}

helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/MaxCapacityUsageInstanceConstraint.java

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,13 @@
3030
* that it is that much less desirable to assign anything on the given node.
3131
* It is a greedy approach since it evaluates only on the most used capacity key.
3232
*/
33-
class MaxCapacityUsageInstanceConstraint extends SoftConstraint {
34-
private static final float MIN_SCORE = 0;
35-
private static final float MAX_SCORE = 1;
36-
37-
MaxCapacityUsageInstanceConstraint() {
38-
super(MAX_SCORE, MIN_SCORE);
39-
}
33+
class MaxCapacityUsageInstanceConstraint extends UsageSoftConstraint {
4034

4135
@Override
42-
protected float getAssignmentScore(AssignableNode node, AssignableReplica replica,
36+
protected double getAssignmentScore(AssignableNode node, AssignableReplica replica,
4337
ClusterContext clusterContext) {
44-
float maxCapacityUsage = node.getHighestCapacityUtilization();
45-
return 1.0f - maxCapacityUsage / 2.0f;
38+
float estimatedMaxUtilization = clusterContext.getEstimatedMaxUtilization();
39+
float projectedHighestUtilization = node.getProjectedHighestUtilization(replica.getCapacity());
40+
return computeUtilizationScore(estimatedMaxUtilization, projectedHighestUtilization);
4641
}
4742
}

helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/PartitionMovementConstraint.java

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -38,39 +38,33 @@
3838
* evaluated score will become lower.
3939
*/
4040
class PartitionMovementConstraint extends SoftConstraint {
41-
private static final float MAX_SCORE = 1f;
42-
private static final float MIN_SCORE = 0f;
41+
private static final double MAX_SCORE = 1f;
42+
private static final double MIN_SCORE = 0f;
4343
//TODO: these factors will be tuned based on user's preference
4444
// This factor indicates the default score that is evaluated if only partition allocation matches
4545
// (states are different).
46-
private static final float ALLOCATION_MATCH_FACTOR = 0.5f;
47-
// This factor indicates the contribution of the Baseline assignment matching to the final score.
48-
private static final float BASELINE_MATCH_FACTOR = 0.25f;
46+
private static final double ALLOCATION_MATCH_FACTOR = 0.5;
4947

5048
PartitionMovementConstraint() {
5149
super(MAX_SCORE, MIN_SCORE);
5250
}
5351

5452
@Override
55-
protected float getAssignmentScore(AssignableNode node, AssignableReplica replica,
53+
protected double getAssignmentScore(AssignableNode node, AssignableReplica replica,
5654
ClusterContext clusterContext) {
57-
Map<String, String> bestPossibleStateMap =
55+
// Prioritize the previous Best Possible assignment
56+
Map<String, String> bestPossibleAssignment =
5857
getStateMap(replica, clusterContext.getBestPossibleAssignment());
59-
Map<String, String> baselineStateMap =
58+
if (!bestPossibleAssignment.isEmpty()) {
59+
return calculateAssignmentScale(node, replica, bestPossibleAssignment);
60+
}
61+
// else, compare the baseline only if the best possible assignment does not contain the replica
62+
Map<String, String> baselineAssignment =
6063
getStateMap(replica, clusterContext.getBaselineAssignment());
61-
62-
// Prioritize the matching of the previous Best Possible assignment.
63-
float scale = calculateAssignmentScale(node, replica, bestPossibleStateMap);
64-
// If the baseline is also provided, adjust the final score accordingly.
65-
scale = scale * (1 - BASELINE_MATCH_FACTOR)
66-
+ calculateAssignmentScale(node, replica, baselineStateMap) * BASELINE_MATCH_FACTOR;
67-
68-
return scale;
69-
}
70-
71-
@Override
72-
NormalizeFunction getNormalizeFunction() {
73-
return score -> score * (getMaxScore() - getMinScore()) + getMinScore();
64+
if (!baselineAssignment.isEmpty()) {
65+
return calculateAssignmentScale(node, replica, baselineAssignment);
66+
}
67+
return 0;
7468
}
7569

7670
private Map<String, String> getStateMap(AssignableReplica replica,
@@ -83,14 +77,20 @@ private Map<String, String> getStateMap(AssignableReplica replica,
8377
return assignment.get(resourceName).getReplicaMap(new Partition(partitionName));
8478
}
8579

86-
private float calculateAssignmentScale(AssignableNode node, AssignableReplica replica,
80+
private double calculateAssignmentScale(AssignableNode node, AssignableReplica replica,
8781
Map<String, String> instanceToStateMap) {
8882
String instanceName = node.getInstanceName();
8983
if (!instanceToStateMap.containsKey(instanceName)) {
9084
return 0;
9185
} else {
92-
return (instanceToStateMap.get(instanceName).equals(replica.getReplicaState()) ? 1
93-
: ALLOCATION_MATCH_FACTOR);
86+
return (instanceToStateMap.get(instanceName).equals(replica.getReplicaState()) ? 1 :
87+
ALLOCATION_MATCH_FACTOR);
9488
}
9589
}
90+
91+
@Override
92+
protected NormalizeFunction getNormalizeFunction() {
93+
// PartitionMovementConstraint already scale the score properly.
94+
return (score) -> score;
95+
}
9696
}

helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ResourcePartitionAntiAffinityConstraint.java

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29,24 +29,15 @@
2929
* the same resource be assigned to the same node to minimize the impact of node failure scenarios.
3030
* The score is higher the fewer the partitions are on the node belonging to the same resource.
3131
*/
32-
class ResourcePartitionAntiAffinityConstraint extends SoftConstraint {
33-
private static final float MAX_SCORE = 1f;
34-
private static final float MIN_SCORE = 0f;
35-
36-
ResourcePartitionAntiAffinityConstraint() {
37-
super(MAX_SCORE, MIN_SCORE);
38-
}
39-
32+
class ResourcePartitionAntiAffinityConstraint extends UsageSoftConstraint {
4033
@Override
41-
protected float getAssignmentScore(AssignableNode node, AssignableReplica replica,
34+
protected double getAssignmentScore(AssignableNode node, AssignableReplica replica,
4235
ClusterContext clusterContext) {
4336
String resource = replica.getResourceName();
4437
int curPartitionCountForResource = node.getAssignedPartitionsByResource(resource).size();
45-
int doubleMaxPartitionCountForResource =
46-
2 * clusterContext.getEstimatedMaxPartitionByResource(resource);
47-
// The score measures the twice the max allowed count versus current counts
48-
// The returned value is a measurement of remaining quota ratio, in the case of exceeding allowed counts, return 0
49-
return Math.max(((float) doubleMaxPartitionCountForResource - curPartitionCountForResource)
50-
/ doubleMaxPartitionCountForResource, 0);
38+
int estimatedMaxPartitionCountForResource =
39+
clusterContext.getEstimatedMaxPartitionByResource(resource);
40+
return computeUtilizationScore(estimatedMaxPartitionCountForResource,
41+
curPartitionCountForResource);
5142
}
5243
}

helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ResourceTopStateAntiAffinityConstraint.java

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -28,26 +28,17 @@
2828
* The higher number the number of top state partitions assigned to the instance, the lower the
2929
* score, vice versa.
3030
*/
31-
class ResourceTopStateAntiAffinityConstraint extends SoftConstraint {
32-
private static final float MAX_SCORE = 1f;
33-
private static final float MIN_SCORE = 0f;
34-
35-
ResourceTopStateAntiAffinityConstraint() {
36-
super(MAX_SCORE, MIN_SCORE);
37-
}
38-
31+
class ResourceTopStateAntiAffinityConstraint extends UsageSoftConstraint {
3932
@Override
40-
protected float getAssignmentScore(AssignableNode node, AssignableReplica replica,
33+
protected double getAssignmentScore(AssignableNode node, AssignableReplica replica,
4134
ClusterContext clusterContext) {
4235
if (!replica.isReplicaTopState()) {
43-
return (getMaxScore() + getMinScore()) / 2.0f;
36+
// For non top state replica, this constraint is not applicable.
37+
// So return zero on any assignable node candidate.
38+
return 0;
4439
}
45-
4640
int curTopPartitionCountForResource = node.getAssignedTopStatePartitionsCount();
47-
int doubleMaxTopStateCount = 2 * clusterContext.getEstimatedMaxTopStateCount();
48-
49-
return Math.max(
50-
((float) doubleMaxTopStateCount - curTopPartitionCountForResource) / doubleMaxTopStateCount,
51-
0);
41+
int estimatedMaxTopStateCount = clusterContext.getEstimatedMaxTopStateCount();
42+
return computeUtilizationScore(estimatedMaxTopStateCount, curTopPartitionCountForResource);
5243
}
5344
}

helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/SoftConstraint.java

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@
3030
* The lower score the score, the worse the assignment; Intuitively, the assignment is penalized.
3131
*/
3232
abstract class SoftConstraint {
33-
private float _maxScore = 1000f;
34-
private float _minScore = -1000f;
33+
private final double _maxScore;
34+
private final double _minScore;
3535

3636
interface NormalizeFunction {
3737
/**
@@ -40,47 +40,41 @@ interface NormalizeFunction {
4040
* @param originScore The origin score
4141
* @return The normalized value between (0, 1)
4242
*/
43-
float scale(float originScore);
44-
}
45-
46-
/**
47-
* Default constructor, uses default min/max scores
48-
*/
49-
SoftConstraint() {
43+
double scale(double originScore);
5044
}
5145

5246
/**
5347
* Child class customize the min/max score on its own
5448
* @param maxScore The max score
5549
* @param minScore The min score
5650
*/
57-
SoftConstraint(float maxScore, float minScore) {
51+
SoftConstraint(double maxScore, double minScore) {
5852
_maxScore = maxScore;
5953
_minScore = minScore;
6054
}
6155

62-
float getMaxScore() {
56+
protected double getMaxScore() {
6357
return _maxScore;
6458
}
6559

66-
float getMinScore() {
60+
protected double getMinScore() {
6761
return _minScore;
6862
}
6963

7064
/**
7165
* Evaluate and give a score for an potential assignment partition -> instance
7266
* Child class only needs to care about how the score is implemented
73-
* @return The score of the assignment in float value
67+
* @return The score of the assignment in double value
7468
*/
75-
protected abstract float getAssignmentScore(AssignableNode node, AssignableReplica replica,
69+
protected abstract double getAssignmentScore(AssignableNode node, AssignableReplica replica,
7670
ClusterContext clusterContext);
7771

7872
/**
7973
* Evaluate and give a score for an potential assignment partition -> instance
8074
* It's the only exposed method to the caller
8175
* @return The score is normalized to be within MinScore and MaxScore
8276
*/
83-
float getAssignmentNormalizedScore(AssignableNode node, AssignableReplica replica,
77+
double getAssignmentNormalizedScore(AssignableNode node, AssignableReplica replica,
8478
ClusterContext clusterContext) {
8579
return getNormalizeFunction().scale(getAssignmentScore(node, replica, clusterContext));
8680
}
@@ -90,7 +84,7 @@ float getAssignmentNormalizedScore(AssignableNode node, AssignableReplica replic
9084
* Child class could override the method and customize the method on its own
9185
* @return The MinMaxScaler instance by default
9286
*/
93-
NormalizeFunction getNormalizeFunction() {
87+
protected NormalizeFunction getNormalizeFunction() {
9488
return (score) -> (score - getMinScore()) / (getMaxScore() - getMinScore());
9589
}
9690
}

0 commit comments

Comments
 (0)