33"""
44
55# Standard library
6+ import logging
67import os .path
78import re
89import sys
2425# Set the current working directory
2526PATH_WORK_DIR = os .path .dirname (os .path .abspath (__file__ ))
2627
28+ # Set the current working directory
29+ CWD = os .path .dirname (os .path .abspath (__file__ ))
30+
31+ # Set up the logger
32+ LOG = logging .getLogger (__name__ )
33+ LOG .setLevel (logging .INFO )
34+
35+ # Define both the handler and the formatter
36+ handler = logging .StreamHandler ()
37+ formatter = logging .Formatter (
38+ "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
39+ )
40+
41+ # Add formatter to the handler
42+ handler .setFormatter (formatter )
43+
44+ # Add handler to the logger
45+ LOG .addHandler (handler )
46+
47+ # Log the start of the script execution
48+ LOG .info ("Script execution started." )
49+
2750
2851def tags_frequency (csv_path , column_names ):
2952 """
@@ -36,6 +59,8 @@ def tags_frequency(csv_path, column_names):
3659 Example: ["tags", "description"]
3760
3861 """
62+ LOG .info ("Generating word cloud based on tags." )
63+
3964 df = pd .read_csv (csv_path )
4065 # Process each column containing tags
4166 for column_name in column_names :
@@ -54,7 +79,7 @@ def tags_frequency(csv_path, column_names):
5479 and str (row ) != ""
5580 and str (row ) != "nan"
5681 ):
57- print ( str ( row ) )
82+ LOG . debug ( f"Processing row: { row } " )
5883 if "ChineseinUS.org" in str (row ):
5984 row = "ChineseinUS"
6085 list2 += re .split (r"\s|(?<!\d)[,.](?!\d)" , str (row ))
@@ -143,6 +168,8 @@ def time_trend_helper(df):
143168 Returns:
144169 - DataFrame: DataFrame with counts of entries per year.
145170 """
171+ LOG .info ("Extracting year-wise count of entries." )
172+
146173 year_list = []
147174 for date_row in df ["dates" ][0 :]:
148175 date_list = str (date_row ).split ()
@@ -169,6 +196,8 @@ def time_trend(csv_path):
169196 Args:
170197 - csv_path (str): Path to the CSV file.
171198 """
199+ LOG .info ("Generating time trend line graph." )
200+
172201 df = pd .read_csv (csv_path )
173202 count_df = time_trend_helper (df )
174203
@@ -210,6 +239,8 @@ def time_trend_compile_helper(yearly_count):
210239 Returns:
211240 - DataFrame: Filtered yearly count data.
212241 """
242+ LOG .info ("Filtering yearly trend data." )
243+
213244 Years = np .arange (2018 , 2023 )
214245 yearly_count ["year" ] = list (yearly_count .index )
215246 counts = []
@@ -218,7 +249,7 @@ def time_trend_compile_helper(yearly_count):
218249 int (yearly_count ["year" ][num ]) >= 2018
219250 ):
220251 counts .append (yearly_count ["Counts" ][num ])
221- print ( counts )
252+ LOG . info ( f" { counts } " )
222253 final_yearly_count = pd .DataFrame (
223254 list (zip (Years , counts )), columns = ["Years" , "Yearly_counts" ]
224255 )
@@ -229,6 +260,8 @@ def time_trend_compile():
229260 """
230261 Compile yearly trends for different licenses and plot them.
231262 """
263+ LOG .info ("Compiling yearly trends for different licenses." )
264+
232265 license1 = pd .read_csv ("../flickr/dataset/cleaned_license1.csv" )
233266 license2 = pd .read_csv ("../flickr/dataset/cleaned_license2.csv" )
234267 license3 = pd .read_csv ("../flickr/dataset/cleaned_license3.csv" )
@@ -286,7 +319,7 @@ def time_trend_compile():
286319 yearly_count6 = time_trend_compile_helper (yearly_count6 )
287320 yearly_count9 = time_trend_compile_helper (yearly_count9 )
288321 yearly_count10 = time_trend_compile_helper (yearly_count10 )
289- print ( yearly_count1 )
322+ LOG . info ( f" { yearly_count1 } " )
290323
291324 # Plot yearly trend for all licenses
292325 plt .plot (
@@ -375,17 +408,21 @@ def view_compare_helper(df):
375408 Returns:
376409 - int: Maximum views.
377410 """
411+ LOG .info ("Calculating maximum views of pictures under a license." )
412+
378413 highest_view = int (max (df ["views" ]))
379414 df = df .sort_values ("views" , ascending = False )
415+ LOG .info (f"DataFrame sorted by views in descending order: { df } " )
416+ LOG .info (f"Maximum views found: { highest_view } " )
380417 return highest_view
381- print (df )
382- print (highest_view )
383418
384419
385420def view_compare ():
386421 """
387422 Compare maximum views of pictures under different licenses.
388423 """
424+ LOG .info ("Comparing maximum views of pictures under different licenses." )
425+
389426 license1 = pd .read_csv (
390427 os .path .join (PATH_WORK_DIR , "../flickr/dataset/cleaned_license1.csv" )
391428 )
@@ -424,7 +461,7 @@ def view_compare():
424461 maxs = []
425462 for lic in licenses :
426463 maxs .append (view_compare_helper (lic ))
427- print ( maxs )
464+ LOG . info ( f" { maxs } " )
428465 # Create DataFrame to store license and their maximum views
429466 temp_data = pd .DataFrame ()
430467 temp_data ["Licenses" ] = [
@@ -480,6 +517,8 @@ def total_usage():
480517 """
481518 Generate a bar plot showing the total usage of different licenses.
482519 """
520+ LOG .info ("Generating bar plot showing total usage of different licenses." )
521+
483522 # Reads the license total file as the input dataset
484523 df = pd .read_csv (
485524 os .path .join (PATH_WORK_DIR , "../flickr/dataset/license_total.csv" )
@@ -499,15 +538,15 @@ def main():
499538
500539
501540if __name__ == "__main__" :
502- # Exception handling
541+ # Exception Handling
503542 try :
504543 main ()
505544 except SystemExit as e :
545+ LOG .error (f"System exit with code: { e .code } " )
506546 sys .exit (e .code )
507547 except KeyboardInterrupt :
508- print ( "INFO (130) Halted via KeyboardInterrupt.", file = sys . stderr )
548+ LOG . info ( " (130) Halted via KeyboardInterrupt." )
509549 sys .exit (130 )
510550 except Exception :
511- print ("ERROR (1) Unhandled exception:" , file = sys .stderr )
512- print (traceback .print_exc (), file = sys .stderr )
513- sys .exit (1 )
551+ LOG .error (f"(1) Unhandled exception: { traceback .format_exc ()} " )
552+ sys .exit (1 )
0 commit comments