@@ -67,7 +67,6 @@ public static void setupCluster() throws Exception {
6767 cluster =
6868 configureCluster (NUM_NODES ).addConfig ("conf1" , configPath ).withEmbeddedZkQuorum ().build ();
6969 cluster .waitForAllNodes (60 );
70- log .info ("Cluster configured with {} nodes" , NUM_NODES );
7170 }
7271
7372 @ Test
@@ -77,16 +76,12 @@ public void testBasicQuorumFunctionality()
7776 JettySolrRunner node = cluster .getJettySolrRunner (i );
7877 assertTrue ("Node " + i + " should be running" , node .isRunning ());
7978 assertNotNull ("Node " + i + " should have a NodeName" , node .getNodeName ());
80- if (log .isInfoEnabled ()) {
81- log .info ("Node {} is running: {}" , i , node .getNodeName ());
82- }
8379 }
8480 }
8581
8682 @ Test
8783 public void testCollectionIndexing () throws Exception {
8884 try (CloudSolrClient client = cluster .getSolrClient (COLLECTION_NAME )) {
89- log .info ("Creating collection: {}" , COLLECTION_NAME );
9085 CollectionAdminRequest .Create createCmd =
9186 CollectionAdminRequest .createCollection (COLLECTION_NAME , "conf1" , 1 , 3 );
9287 createCmd .process (client );
@@ -145,9 +140,6 @@ public void testQuorumResilienceWithNodeFailure() throws Exception {
145140
146141 try {
147142 privateCluster .waitForAllNodes (60 );
148- log .info (
149- "Private cluster configured with {} nodes for testQuorumResilienceWithNodeFailure" ,
150- NUM_NODES );
151143
152144 // Create collection with replica on each node
153145 CollectionAdminRequest .createCollection (collectionName , "conf1" , 1 , 3 )
@@ -163,7 +155,9 @@ public void testQuorumResilienceWithNodeFailure() throws Exception {
163155 // Stop one node (quorum maintained with 2/3 nodes)
164156 JettySolrRunner stoppedNode = privateCluster .getJettySolrRunner (2 );
165157 String stoppedNodeName = stoppedNode .getNodeName ();
166- log .info ("Stopping node to test quorum resilience: {}" , stoppedNodeName );
158+ if (log .isInfoEnabled ()) {
159+ log .info ("Stopping node to test quorum resilience: {}" , stoppedNodeName );
160+ }
167161 privateCluster .stopJettySolrRunner (stoppedNode );
168162
169163 // Wait for ZK to detect node loss and verify cluster still operational
@@ -175,10 +169,10 @@ public void testQuorumResilienceWithNodeFailure() throws Exception {
175169 "documents while node down" ,
176170 120 ,
177171 TimeUnit .SECONDS );
178- log .info ("Cluster operational with 2/3 nodes (quorum maintained)" );
172+ if (log .isInfoEnabled ()) {
173+ log .info ("Starting node {} again and testing functionality" , stoppedNodeName );
174+ }
179175
180- // Restart node with same ports (critical for ZK quorum rejoining)
181- log .info ("Restarting node: {}" , stoppedNodeName );
182176 privateCluster .startJettySolrRunner (stoppedNode , true );
183177 privateCluster .waitForNode (stoppedNode , 120 );
184178
@@ -187,7 +181,7 @@ public void testQuorumResilienceWithNodeFailure() throws Exception {
187181
188182 // CRITICAL: Wait for collection to become active (replicas up, leader elected)
189183 // before attempting to index documents
190- privateCluster .waitForActiveCollection (collectionName , 1 , 3 );
184+ privateCluster .waitForActiveCollection (collectionName , 120 , TimeUnit . SECONDS , 1 , 3 );
191185
192186 privateCluster .waitForDocCount (
193187 collectionName ,
@@ -204,19 +198,10 @@ public void testQuorumResilienceWithNodeFailure() throws Exception {
204198 "all documents" ,
205199 120 ,
206200 TimeUnit .SECONDS );
207-
208- log .info (
209- "Node {} successfully rejoined quorum and cluster is fully operational" ,
210- stoppedNodeName );
211201 }
212202 } finally {
213- // Clean up collection and cluster
214- try {
215- CollectionAdminRequest .deleteCollection (collectionName )
216- .process (privateCluster .getSolrClient ());
217- } catch (Exception e ) {
218- log .warn ("Failed to delete collection {}: {}" , collectionName , e .getMessage ());
219- }
203+ CollectionAdminRequest .deleteCollection (collectionName )
204+ .process (privateCluster .getSolrClient ());
220205 privateCluster .shutdown ();
221206 }
222207 }
@@ -250,7 +235,6 @@ public void testQuorumLossAndRecovery() throws Exception {
250235
251236 try {
252237 privateCluster .waitForAllNodes (60 );
253- log .info ("Private cluster configured with {} nodes for testQuorumLossAndRecovery" , NUM_NODES );
254238
255239 // Create collection with 3 replicas (one on each node) to ensure at least
256240 // one replica survives when we stop 2 nodes
@@ -269,68 +253,55 @@ public void testQuorumLossAndRecovery() throws Exception {
269253 String node1Name = node1 .getNodeName ();
270254 String node2Name = node2 .getNodeName ();
271255
272- log .info ("Stopping 2 nodes to lose quorum: {}, {}" , node1Name , node2Name );
256+ if (log .isInfoEnabled ()) {
257+ log .info ("Stopping 2 nodes to lose quorum: {}, {}" , node1Name , node2Name );
258+ }
273259 privateCluster .stopJettySolrRunner (node1 );
274260 privateCluster .stopJettySolrRunner (node2 );
275261
276262 // Wait for ZK to detect quorum loss
277263 privateCluster .waitForLiveNodes (1 , 120 );
278- log .info ("Quorum lost - only 1/3 nodes remaining" );
279264
280265 // Restart both nodes to restore quorum
281- log .info ("Restarting nodes to restore quorum" );
266+ if (log .isInfoEnabled ()) {
267+ log .info ("Restarting nodes to restore quorum" );
268+ }
282269 privateCluster .startJettySolrRunner (node1 , true );
283270 privateCluster .startJettySolrRunner (node2 , true );
284271
285272 // Wait for both nodes to register with ZK (they should appear in live_nodes)
286273 // but we don't require them to be fully recovered immediately
287- try {
288- privateCluster .waitForNode (node1 , 120 );
289- privateCluster .waitForNode (node2 , 120 );
290- log .info ("Both nodes registered with ZooKeeper" );
291- } catch (Exception e ) {
292- log .warn (
293- "One or more nodes failed to fully register: {}. Continuing test to verify basic cluster operation." ,
294- e .getMessage ());
295- }
274+ privateCluster .waitForNode (node1 , 120 );
275+ privateCluster .waitForNode (node2 , 120 );
296276 privateCluster .waitForLiveNodes (3 , 120 );
297277
298278 // CRITICAL: Wait for collection to become active (replicas up, leader elected)
299279 // After catastrophic failure, we need to ensure at least one replica is active
300280 // before attempting operations
301- log .info ("Waiting for collection to become active..." );
302281 privateCluster .waitForActiveCollection (collectionName , 120 , TimeUnit .SECONDS , 1 , 1 );
303282
304283 // After catastrophic failure, the cluster should be operational with quorum restored
305284 // even if not all replicas are immediately active
306- log .info ("Verifying cluster can query existing data..." );
307285 try {
308286 privateCluster .waitForDocCount (
309287 collectionName , 1 , "document after recovery" , 120 , TimeUnit .SECONDS );
310288
311289 // Verify cluster accepts writes
312- log .info ("Verifying cluster accepts writes..." );
313290 indexDocuments (client , 1 , 1 , "after_recovery" );
314291 privateCluster .waitForDocCount (
315292 collectionName , 2 , "all documents after recovery" , 120 , TimeUnit .SECONDS );
316293
317- log .info ("Quorum restored successfully - cluster is operational" );
318294 } catch (Exception e ) {
319- log .error (
320- "Cluster failed to become operational after quorum restoration: {}" , e .getMessage ());
295+ if (log .isErrorEnabled ()) {
296+ log .error ("Cluster failed to become operational after quorum restoration" );
297+ }
321298 throw e ;
322299 }
323-
324- log .info ("Quorum restored and cluster fully operational" );
325300 }
326301 } finally {
327302 // Clean up collection and cluster
328- try {
329- CollectionAdminRequest .deleteCollection (collectionName )
330- .process (privateCluster .getSolrClient ());
331- } catch (Exception e ) {
332- log .warn ("Failed to delete collection {}: {}" , collectionName , e .getMessage ());
333- }
303+ CollectionAdminRequest .deleteCollection (collectionName )
304+ .process (privateCluster .getSolrClient ());
334305 privateCluster .shutdown ();
335306 }
336307 }
0 commit comments