Change parameter name to check_npartitions_every

chinmaychandak · chinmaychandak · commit 5ecf91857141 · 2020-10-19T11:36:37.000-07:00
diff --git a/streamz/sources.py b/streamz/sources.py
@@ -453,7 +453,7 @@ def _close_consumer(self):
 class FromKafkaBatched(Stream):
     """Base class for both local and cluster-based batched kafka processing"""
     def __init__(self, topic, consumer_params, poll_interval='1s',
-                 npartitions=None, refresh_cycles=None,
+                 npartitions=None, check_npartitions_every=None,
                  max_batch_size=10000, keys=False,
                  engine=None, **kwargs):
         self.consumer_params = consumer_params
@@ -463,7 +463,7 @@ def __init__(self, topic, consumer_params, poll_interval='1s',
             consumer_params['auto.offset.reset'] = 'latest'
         self.topic = topic
         self.npartitions = npartitions
-        self.refresh_cycles = refresh_cycles
+        self.check_npartitions_every = check_npartitions_every
         if self.npartitions is not None and self.npartitions <= 0:
             raise ValueError("Number of Kafka topic partitions must be > 0.")
         self.poll_interval = convert_interval(poll_interval)
@@ -511,12 +511,12 @@ def checkpoint_emit(_part):
                 break
 
         try:
-            if self.refresh_cycles is not None:
+            if self.check_npartitions_every is not None:
                 cycles = 0
             while not self.stopped:
                 out = []
 
-                if self.refresh_cycles is not None and cycles == 0:
+                if self.check_npartitions_every is not None and cycles == 0:
                     kafka_cluster_metadata = self.consumer.list_topics(self.topic)
                     if self.engine == "cudf":  # pragma: no cover
                         new_partitions = len(kafka_cluster_metadata[self.topic.encode('utf-8')])
@@ -548,8 +548,8 @@ def checkpoint_emit(_part):
                         self.positions[partition] = high
                 self.consumer_params['auto.offset.reset'] = 'earliest'
 
-                if self.refresh_cycles is not None:
-                    cycles = (cycles + 1) % self.refresh_cycles
+                if self.check_npartitions_every is not None:
+                    cycles = (cycles + 1) % self.check_npartitions_every
 
                 for part in out:
                     yield self.loop.add_callback(checkpoint_emit, part)
@@ -580,7 +580,7 @@ def start(self):
 
 @Stream.register_api(staticmethod)
 def from_kafka_batched(topic, consumer_params, poll_interval='1s',
-                       npartitions=None, refresh_cycles=None,
+                       npartitions=None, check_npartitions_every=None,
                        start=False, dask=False,
                        max_batch_size=10000, keys=False,
                        engine=None, **kwargs):
@@ -617,12 +617,12 @@ def from_kafka_batched(topic, consumer_params, poll_interval='1s',
     npartitions: int (None)
         | Number of partitions in the topic.
         | If None, streamz will poll Kafka to get the number of partitions.
-     refresh_cycles: int (None)
+     check_npartitions_every: int (None)
         | Useful if the user expects to increase the number of partitions on the fly,
         | maybe to handle spikes in load, etc. Streamz polls Kafka after every
-        | 'refresh cycles' number of batches to determine the current number of topic
-        | partitions. If partitions have been added, streamz will automatically start
-        | reading data from the new partitions as well.
+        | 'check_npartitions_every' number of batches/cycles to determine the current
+        | number of topic partitions. If partitions have been added, streamz will
+        | automatically start reading data from the new partitions as well.
         | If set to None, streamz will not accommodate changing partitions on the fly.
         | It is recommended to restart the stream after decreasing the number of partitions.
     start: bool (False)
@@ -675,7 +675,7 @@ def from_kafka_batched(topic, consumer_params, poll_interval='1s',
     source = FromKafkaBatched(topic, consumer_params,
                               poll_interval=poll_interval,
                               npartitions=npartitions,
-                              refresh_cycles=refresh_cycles,
+                              check_npartitions_every=check_npartitions_every,
                               max_batch_size=max_batch_size,
                               keys=keys,
                               engine=engine,
diff --git a/streamz/tests/test_kafka.py b/streamz/tests/test_kafka.py
@@ -290,7 +290,7 @@ def test_kafka_batch_npartitions():
         stream3.upstream.stopped = True
 
 
-def test_kafka_refresh_cycles():
+def test_kafka_check_npartitions_every():
     j1 = random.randint(0, 10000)
     ARGS = {'bootstrap.servers': 'localhost:9092',
             'group.id': 'streamz-test%i' % j1,
@@ -315,7 +315,7 @@ def test_kafka_refresh_cycles():
 
         stream = Stream.from_kafka_batched(TOPIC, ARGS,
                                            asynchronous=True,
-                                           refresh_cycles=1,
+                                           check_npartitions_every=1,
                                            poll_interval='2s')
         out = stream.gather().sink_to_list()
         stream.start()