Skip to content

Commit f691591

Browse files
committed
code cleanup
1 parent d4aafc2 commit f691591

1 file changed

Lines changed: 52 additions & 55 deletions

File tree

dateandtimeparser/core_date_parser.py

Lines changed: 52 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,15 @@
44

55
# formats = ['%b %d %Y','%b %d, %Y','%b %d, %Y','%B %d, %Y','%B %d %Y','%d/%m/%Y','%d/%m/%y','%b %Y','%B%Y','%b %d,%Y','%d%b%y','%d%b','%b%y','%d %b %y','%m/%d','%m/%d%y']
66

7+
78
class DateParser():
8-
def __init__(self,formats,start_year=1900,end_year=2100):
9+
def __init__(self, formats, start_year=1900, end_year=2100):
910
self.formats = formats
1011
self.start_year = start_year
1112
self.end_year = end_year
1213
self.values = self.get_values()
1314
self.patterns = self.get_patterns()
14-
15+
1516
def get_patterns(self):
1617
final_patterns = {}
1718
for fmat in self.formats:
@@ -21,84 +22,85 @@ def get_patterns(self):
2122
if v in fmat:
2223
present_sub_formats.append(v)
2324
i = 0
24-
while(i<len(present_sub_formats)):
25+
while(i < len(present_sub_formats)):
2526
v = present_sub_formats[i]
2627
if len(patterns) == 0:
2728
for val in self.values[v]:
28-
patterns.append(fmat.replace(v,val))
29+
patterns.append(fmat.replace(v, val))
2930
else:
3031
new_patterns = []
3132
for p in patterns:
3233
for val in self.values[v]:
33-
new_patterns.append(p.replace(v,val))
34+
new_patterns.append(p.replace(v, val))
3435
patterns = copy.deepcopy(new_patterns)
3536
i += 1
36-
final_patterns[fmat] = {k:None for k in sorted(patterns)}
37+
final_patterns[fmat] = {k: None for k in sorted(patterns)}
3738
return final_patterns
38-
39-
39+
4040
def get_values(self):
4141
values = {
42-
"%b": ["jan","feb","mar","apr","may","jun","jul","aug","sep","oct","nov","dec"],
42+
"%b": ["jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"],
4343
"%d": self.get_dates(n_digits=2),
4444
"%-d": self.get_dates(),
45-
"%Y": self.get_year_value(start_year=self.start_year,end_year=self.end_year),
46-
"%B": ["january","february","march","april","may","june","july","august","september","october","november","december"],
45+
"%Y": self.get_year_value(start_year=self.start_year, end_year=self.end_year),
46+
"%B": ["january", "february", "march", "april", "may", "june", "july", "august", "september", "october", "november", "december"],
4747
"%m": self.get_months(),
4848
"%-m": self.get_months(n_digits=1),
49-
"%y": self.get_year_value(self,n_digits=2)
49+
"%y": self.get_year_value(self, n_digits=2)
5050
}
5151
return values
5252

53-
def get_year_value(self,start_year=1900,end_year=2100,n_digits=4):
54-
if n_digits==4:
55-
return [str(d) for d in range(start_year,end_year+1)]
53+
def get_year_value(self, start_year=1900, end_year=2100, n_digits=4):
54+
if n_digits == 4:
55+
return [str(d) for d in range(start_year, end_year+1)]
5656
else:
57-
return ["{:02d}".format(d) for d in range(0,100)]
57+
return ["{:02d}".format(d) for d in range(0, 100)]
5858

59-
def get_dates(self,n_digits=1):
60-
if n_digits==1:
61-
return [str(d) for d in range(1,32)]
59+
def get_dates(self, n_digits=1):
60+
if n_digits == 1:
61+
return [str(d) for d in range(1, 32)]
6262
else:
63-
return ["{:02d}".format(d) for d in range(1,32)]
63+
return ["{:02d}".format(d) for d in range(1, 32)]
6464

65-
def get_months(self,n_digits=2):
66-
if n_digits==2:
67-
return ["{:02d}".format(d) for d in range(1,13)]
65+
def get_months(self, n_digits=2):
66+
if n_digits == 2:
67+
return ["{:02d}".format(d) for d in range(1, 13)]
6868
else:
69-
return [str(d) for d in range(1,13)]
70-
71-
def find_repeat_matches(self,query_string,sub_string,pattern):
69+
return [str(d) for d in range(1, 13)]
70+
71+
def find_repeat_matches(self, query_string, sub_string, pattern):
7272
qs = copy.deepcopy(query_string)
7373
ret_list = []
7474
while True:
7575
orig_string = copy.deepcopy(qs)
7676
flag = False
7777
if sub_string in orig_string:
78-
ret_list.append((sub_string,orig_string.index(sub_string),orig_string.index(sub_string) + len(sub_string),pattern))
79-
qs = copy.deepcopy(orig_string[:orig_string.index(sub_string)] + " "*len(sub_string) + orig_string[orig_string.index(sub_string) + len(sub_string):])
78+
ret_list.append((sub_string, orig_string.index(
79+
sub_string), orig_string.index(sub_string) + len(sub_string), pattern))
80+
qs = copy.deepcopy(orig_string[:orig_string.index(sub_string)] + " "*len(
81+
sub_string) + orig_string[orig_string.index(sub_string) + len(sub_string):])
8082
flag = True
8183
if not flag:
8284
break
8385
return ret_list
8486

85-
def parse_string(self,query_string):
87+
def parse_string(self, query_string):
8688
query_string = query_string.lower()
8789
matches = {}
8890
for k in self.patterns:
8991
for v in self.patterns[k]:
9092
if v in query_string:
9193
if k not in matches:
9294
matches[k] = []
93-
matches[k] += self.find_repeat_matches(query_string,v,k)
95+
matches[k] += self.find_repeat_matches(query_string, v, k)
9496
priority_matches = self.priority_matches(matches)
95-
if len(priority_matches.keys())>0:
97+
if len(priority_matches.keys()) > 0:
9698
token_spans = self.get_token_spans(query_string)
97-
match_tokens = self.get_match_tokens(priority_matches,token_spans)
99+
match_tokens = self.get_match_tokens(priority_matches, token_spans)
98100
return match_tokens
99101
return None
100102

101-
def get_match_tokens(self,priority_matches,token_spans):
103+
def get_match_tokens(self, priority_matches, token_spans):
102104
ret_list = []
103105
for key in priority_matches:
104106
for pm in priority_matches[key]:
@@ -107,13 +109,14 @@ def get_match_tokens(self,priority_matches,token_spans):
107109
start_token = 1000
108110
end_token = -1
109111
for idx in token_spans:
110-
ls1 = LineString([(char_start,0),(char_end,0)])
111-
ls2 = LineString([(token_spans[idx][1],0),(token_spans[idx][2],0)])
112+
ls1 = LineString([(char_start, 0), (char_end, 0)])
113+
ls2 = LineString(
114+
[(token_spans[idx][1], 0), (token_spans[idx][2], 0)])
112115
if ls1.intersects(ls2):
113-
start_token = min(start_token,token_spans[idx][3])
114-
end_token = max(end_token,token_spans[idx][3])
115-
ret_list.append(list(pm) + [start_token,end_token])
116-
ret_list = sorted(ret_list,key=lambda x:x[2]-x[1],reverse=True)
116+
start_token = min(start_token, token_spans[idx][3])
117+
end_token = max(end_token, token_spans[idx][3])
118+
ret_list.append(list(pm) + [start_token, end_token])
119+
ret_list = sorted(ret_list, key=lambda x: x[2]-x[1], reverse=True)
117120
final_ret_list = []
118121
for rl in ret_list:
119122
flag = False
@@ -125,10 +128,11 @@ def get_match_tokens(self,priority_matches,token_spans):
125128
final_ret_list.append(rl)
126129
return final_ret_list
127130

128-
def priority_matches(self,matches):
131+
def priority_matches(self, matches):
129132
unique_found_formats = []
130133
found_formats = matches.keys()
131-
found_formats = sorted(found_formats,key=lambda x:len(x),reverse=True)
134+
found_formats = sorted(
135+
found_formats, key=lambda x: len(x), reverse=True)
132136
for f in found_formats:
133137
flag = False
134138
for uf in unique_found_formats:
@@ -137,22 +141,15 @@ def priority_matches(self,matches):
137141
break
138142
if not flag:
139143
unique_found_formats.append(f)
140-
return {k:matches[k] for k in unique_found_formats}
144+
return {k: matches[k] for k in unique_found_formats}
141145

142-
def get_token_spans(self,query_string):
146+
def get_token_spans(self, query_string):
143147
query_string = query_string.lower()
144148
tokens = query_string.split()
145149
ret_obj = {}
146-
for idx,k in enumerate(tokens):
147-
ret_obj[idx] = (k,query_string.index(k),query_string.index(k) + len(k),idx)
148-
query_string = query_string[:query_string.index(k)] + " "*len(k) + query_string[query_string.index(k) + len(k):]
150+
for idx, k in enumerate(tokens):
151+
ret_obj[idx] = (k, query_string.index(
152+
k), query_string.index(k) + len(k), idx)
153+
query_string = query_string[:query_string.index(
154+
k)] + " "*len(k) + query_string[query_string.index(k) + len(k):]
149155
return ret_obj
150-
151-
# if __name__ == "__main__":
152-
# dp = DateParser(formats,start_year=2015,end_year=2015)
153-
# query_string = "Dec 01 2015-Dec 31 2015"
154-
# print(dp.parse_string(query_string))
155-
156-
# dp = DateParser(formats,start_year=1900,end_year=2100)
157-
# query_string = "Today is 10/12/16 and tomorrow is 10/12/16."
158-
# print(dp.parse_string(query_string))

0 commit comments

Comments
 (0)