22
33import operator
44from collections import OrderedDict
5- from time import time
65import numpy as np
76import pandas as pd
87import toolz
@@ -802,10 +801,35 @@ def _accumulate(self, Agg, **kwargs):
802801 return Streaming (outstream , example , stream_type = stream_type )
803802
804803
805- def _random_df (tup ):
806- last , now , freq = tup
807- index = pd .date_range (start = (last + freq .total_seconds ()) * 1e9 ,
808- end = now * 1e9 , freq = freq )
804+ def random_datapoint (now , ** kwargs ):
805+ """Example of querying a single current value"""
806+ return pd .DataFrame (
807+ {'a' : np .random .random (1 )}, index = [now ])
808+
809+
810+ def random_datablock (last , now , ** kwargs ):
811+ """
812+ Example of querying over a time range since last update
813+
814+ Parameters
815+ ----------
816+ last: pd.Timestamp
817+ Time of previous call to this function.
818+ now: pd.Timestamp
819+ Current time.
820+ freq: pd.Timedelta, optional
821+ The time interval between individual records to be returned.
822+ For good throughput, should be much smaller than the
823+ interval at which this function is called.
824+
825+ Returns a pd.DataFrame with random values where:
826+
827+ The x column is uniformly distributed.
828+ The y column is Poisson distributed.
829+ The z column is normally distributed.
830+ """
831+ freq = kwargs .get ("freq" , pd .Timedelta ("100ms" ))
832+ index = pd .date_range (start = last + freq , end = now , freq = freq )
809833
810834 df = pd .DataFrame ({'x' : np .random .random (len (index )),
811835 'y' : np .random .poisson (size = len (index )),
@@ -814,47 +838,50 @@ def _random_df(tup):
814838 return df
815839
816840
817- class Random (DataFrame ):
818- """ A streaming dataframe of random data
819-
820- The x column is uniformly distributed.
821- The y column is poisson distributed.
822- The z column is normally distributed.
823-
824- This class is experimental and will likely be removed in the future
841+ class PeriodicDataFrame (DataFrame ):
842+ """A streaming dataframe using the asyncio ioloop to poll a callback fn
825843
826844 Parameters
827845 ----------
828- freq: timedelta
829- The time interval between records
846+ datafn: callable
847+ Callback function accepting **kwargs and returning a
848+ pd.DataFrame. kwargs will include at least
849+ 'last' (time.time() datafn was last invoked), and
850+ 'now' (current time.time()).
830851 interval: timedelta
831- The time interval between new dataframes, should be significantly
832- larger than freq
852+ The time interval between new dataframes.
853+ dask: boolean
854+ If true, uses a DaskStream instead of a regular Source.
855+ **kwargs:
856+ Optional keyword arguments to be passed into the callback function.
857+
858+ By default, returns a three-column random pd.DataFrame generated
859+ by the 'random_datablock' function.
833860
834861 Example
835862 -------
836- >>> source = Random(freq='100ms ', interval='1s' ) # doctest: +SKIP
863+ >>> df = PeriodicDataFrame(interval='1s ', datafn=random_datapoint ) # doctest: +SKIP
837864 """
838865
839- def __init__ (self , freq = '100ms' , interval = '500ms' , dask = False ):
866+ def __init__ (self , datafn = random_datablock , interval = '500ms' , dask = False , ** kwargs ):
840867 if dask :
841868 from streamz .dask import DaskStream
842869 source = DaskStream ()
843870 loop = source .loop
844871 else :
845872 source = Source ()
846873 loop = IOLoop .current ()
847- self .freq = pd .Timedelta (freq )
848874 self .interval = pd .Timedelta (interval ).total_seconds ()
849875 self .source = source
850876 self .continue_ = [True ]
877+ self .kwargs = kwargs
851878
852- stream = self .source .map (_random_df )
853- example = _random_df (( time ( ), time (), self . freq ) )
879+ stream = self .source .map (lambda x : datafn ( ** x , ** kwargs ) )
880+ example = datafn ( last = pd . Timestamp . now ( ), now = pd . Timestamp . now (), ** kwargs )
854881
855- super (Random , self ).__init__ (stream , example )
882+ super (PeriodicDataFrame , self ).__init__ (stream , example )
856883
857- loop .add_callback (self ._cb , self .interval , self .freq , self . source ,
884+ loop .add_callback (self ._cb , self .interval , self .source ,
858885 self .continue_ )
859886
860887 def __del__ (self ):
@@ -865,15 +892,34 @@ def stop(self):
865892
866893 @staticmethod
867894 @gen .coroutine
868- def _cb (interval , freq , source , continue_ ):
869- last = time ()
895+ def _cb (interval , source , continue_ ):
896+ last = pd . Timestamp . now ()
870897 while continue_ [0 ]:
871898 yield gen .sleep (interval )
872- now = time ()
873- yield source ._emit ((last , now , freq ))
899+ now = pd . Timestamp . now ()
900+ yield source ._emit (dict (last = last , now = now ))
874901 last = now
875902
876903
904+ class Random (PeriodicDataFrame ):
905+ """PeriodicDataFrame providing random values by default
906+
907+ Accepts same parameters as PeriodicDataFrame, plus
908+ `freq`, a string that will be converted to a pd.Timedelta
909+ and passed to the 'datafn'.
910+
911+ Useful mainly for examples and docs.
912+
913+ Example
914+ -------
915+ >>> source = Random(freq='100ms', interval='1s') # doctest: +SKIP
916+ """
917+
918+ def __init__ (self , freq = '100ms' , interval = '500ms' , dask = False ,
919+ datafn = random_datablock ):
920+ super (Random , self ).__init__ (datafn , interval , dask , freq = pd .Timedelta (freq ))
921+
922+
877923_stream_types ['streaming' ].append ((is_dataframe_like , DataFrame ))
878924_stream_types ['streaming' ].append ((is_index_like , Index ))
879925_stream_types ['streaming' ].append ((is_series_like , Series ))
0 commit comments