44import os
55import pytest
66import random
7- import requests
87import shlex
98import subprocess
109import time
2221ck = pytest .importorskip ('confluent_kafka' )
2322
2423
25- def download_kafka (target ):
26- r = requests .get ('http://apache.mirror.globo.tech/kafka/1.0.0/'
27- '%s.tgz' % KAFKA_FILE , stream = True )
28- with open (target , 'wb' ) as f :
29- for chunk in r .iter_content (2 ** 20 ):
30- f .write (chunk )
31- subprocess .check_call (['tar' , 'xzf' , KAFKA_FILE ],
32- cwd = os .path .dirname (target ))
33-
34-
3524def stop_docker (name = 'streamz-kafka' , cid = None , let_fail = False ):
3625 """Stop docker container with given name tag
3726
@@ -61,6 +50,7 @@ def stop_docker(name='streamz-kafka', cid=None, let_fail=False):
6150
6251def launch_kafka ():
6352 stop_docker (let_fail = True )
53+ subprocess .call (shlex .split ("docker pull spotify/kafka" ))
6454 cmd = ("docker run -d -p 2181:2181 -p 9092:9092 --env "
6555 "ADVERTISED_HOST=127.0.0.1 --env ADVERTISED_PORT=9092 "
6656 "--name streamz-kafka spotify/kafka" )
@@ -244,6 +234,61 @@ def test_kafka_dask_batch(c, s, w1, w2):
244234 stream .upstream .stopped = True
245235
246236
237+ def test_kafka_batch_npartitions ():
238+ j1 = random .randint (0 , 10000 )
239+ ARGS1 = {'bootstrap.servers' : 'localhost:9092' ,
240+ 'group.id' : 'streamz-test%i' % j1 ,
241+ 'enable.auto.commit' : False ,
242+ 'auto.offset.reset' : 'earliest' }
243+ j2 = j1 + 1
244+ ARGS2 = {'bootstrap.servers' : 'localhost:9092' ,
245+ 'group.id' : 'streamz-test%i' % j2 ,
246+ 'enable.auto.commit' : False ,
247+ 'auto.offset.reset' : 'earliest' }
248+ with kafka_service () as kafka :
249+ kafka , TOPIC = kafka
250+
251+ TOPIC = "test-partitions"
252+ subprocess .call (shlex .split ("docker exec streamz-kafka "
253+ "/opt/kafka_2.11-0.10.1.0/bin/kafka-topics.sh "
254+ "--create --zookeeper localhost:2181 "
255+ "--replication-factor 1 --partitions 2 "
256+ "--topic test-partitions" ))
257+ time .sleep (5 )
258+
259+ for i in range (10 ):
260+ if i % 2 == 0 :
261+ kafka .produce (TOPIC , b'value-%d' % i , partition = 0 )
262+ else :
263+ kafka .produce (TOPIC , b'value-%d' % i , partition = 1 )
264+ kafka .flush ()
265+
266+ with pytest .raises (ValueError ):
267+ stream1 = Stream .from_kafka_batched (TOPIC , ARGS1 ,
268+ asynchronous = True ,
269+ npartitions = 0 )
270+ stream1 .gather ().sink_to_list ()
271+ stream1 .start ()
272+
273+ stream2 = Stream .from_kafka_batched (TOPIC , ARGS1 ,
274+ asynchronous = True ,
275+ npartitions = 1 )
276+ out2 = stream2 .gather ().sink_to_list ()
277+ stream2 .start ()
278+ time .sleep (5 )
279+ assert (len (out2 ) == 1 and len (out2 [0 ]) == 5 )
280+ stream2 .upstream .stopped = True
281+
282+ stream3 = Stream .from_kafka_batched (TOPIC , ARGS2 ,
283+ asynchronous = True ,
284+ npartitions = 4 )
285+ out3 = stream3 .gather ().sink_to_list ()
286+ stream3 .start ()
287+ time .sleep (5 )
288+ assert (len (out3 ) == 2 and (len (out3 [0 ]) + len (out3 [1 ])) == 10 )
289+ stream3 .upstream .stopped = True
290+
291+
247292def test_kafka_batch_checkpointing_sync_nodes ():
248293 '''
249294 Streams 1 and 3 have different consumer groups, while Stream 2
@@ -254,11 +299,13 @@ def test_kafka_batch_checkpointing_sync_nodes():
254299 j1 = random .randint (0 , 10000 )
255300 ARGS1 = {'bootstrap.servers' : 'localhost:9092' ,
256301 'group.id' : 'streamz-test%i' % j1 ,
257- 'enable.auto.commit' : False }
302+ 'enable.auto.commit' : False ,
303+ 'auto.offset.reset' : 'earliest' }
258304 j2 = j1 + 1
259305 ARGS2 = {'bootstrap.servers' : 'localhost:9092' ,
260306 'group.id' : 'streamz-test%i' % j2 ,
261- 'enable.auto.commit' : False }
307+ 'enable.auto.commit' : False ,
308+ 'auto.offset.reset' : 'earliest' }
262309 with kafka_service () as kafka :
263310 kafka , TOPIC = kafka
264311 for i in range (10 ):
@@ -291,11 +338,13 @@ def test_kafka_dask_checkpointing_sync_nodes(c, s, w1, w2):
291338 j1 = random .randint (0 , 10000 )
292339 ARGS1 = {'bootstrap.servers' : 'localhost:9092' ,
293340 'group.id' : 'streamz-test%i' % j1 ,
294- 'enable.auto.commit' : False }
341+ 'enable.auto.commit' : False ,
342+ 'auto.offset.reset' : 'earliest' }
295343 j2 = j1 + 1
296344 ARGS2 = {'bootstrap.servers' : 'localhost:9092' ,
297345 'group.id' : 'streamz-test%i' % j2 ,
298- 'enable.auto.commit' : False }
346+ 'enable.auto.commit' : False ,
347+ 'auto.offset.reset' : 'earliest' }
299348 with kafka_service () as kafka :
300349 kafka , TOPIC = kafka
301350 for i in range (10 ):
0 commit comments