Skip to content

Commit 37ccc9f

Browse files
committed
Attempt to debug macOS sevenzip issues
Add CI debug for 7zip on macOS Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
1 parent a71f06a commit 37ccc9f

3 files changed

Lines changed: 252 additions & 62 deletions

File tree

src/extractcode/sevenzip.py

Lines changed: 37 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,9 @@ def get_7z_errors(stdout, stderr):
9494
# FIXME: we should use only one pass over stdout for errors and warnings
9595
if not stdout or not stdout.strip():
9696
return
97-
97+
9898
# ERROR: Can not create symbolic link : A required privilege is not held by the client. : .\2-SYMTYPE
99-
find_7z_errors = re.compile('^Error:(.*)$', re.MULTILINE | re.DOTALL | re.IGNORECASE).findall
99+
find_7z_errors = re.compile('^Error:(.*)$', re.MULTILINE | re.DOTALL | re.IGNORECASE).findall # NOQA
100100

101101
stdlow = stderr.lower()
102102
for err, msg in sevenzip_errors:
@@ -153,7 +153,7 @@ def list_extracted_7z_files(stdout):
153153
static const char *kExtractingString = "Extracting ";
154154
"""
155155
# FIXME: handle Unicode paths with 7zip command line flags
156-
get_file_list = re.compile('Extracting ' + '(.*)$', re.M).findall
156+
get_file_list = re.compile('Extracting ' + '(.*)$', re.MULTILINE).findall # NOQA
157157
return get_file_list(stdout)
158158

159159

@@ -180,8 +180,7 @@ def get_bin_locations():
180180
return lib_dir, cmd_loc
181181

182182

183-
def extract(location, target_dir, arch_type='*', file_by_file=on_mac, log=on_mac,
184-
skip_symlinks=True):
183+
def extract(location, target_dir, arch_type='*', file_by_file=on_mac, skip_symlinks=True):
185184
"""
186185
Extract all files from a 7zip-supported archive file at location in the
187186
target_dir directory. `skip_symlinks` by default.
@@ -200,7 +199,7 @@ def extract(location, target_dir, arch_type='*', file_by_file=on_mac, log=on_mac
200199
if not os.path.exists(abs_location):
201200
raise ExtractErrorFailedToExtract(
202201
'The system cannot find the path specified: {}'.format(repr(abs_location)))
203-
202+
204203
if is_rar(location):
205204
raise ExtractErrorFailedToExtract(
206205
'RAR extraction disactivated: {}'.format(repr(location)))
@@ -213,11 +212,13 @@ def extract(location, target_dir, arch_type='*', file_by_file=on_mac, log=on_mac
213212

214213
extractor = extract_file_by_file if file_by_file else extract_all_files_at_once
215214
return extractor(
216-
location=abs_location, target_dir=abs_target_dir, arch_type=arch_type,
217-
log=log, skip_symlinks=skip_symlinks)
215+
location=abs_location,
216+
target_dir=abs_target_dir,
217+
arch_type=arch_type,
218+
skip_symlinks=skip_symlinks)
218219

219220

220-
def extract_all_files_at_once(location, target_dir, arch_type='*', log=on_mac, skip_symlinks=True):
221+
def extract_all_files_at_once(location, target_dir, arch_type='*', skip_symlinks=True):
221222
"""
222223
Extract all files from a 7zip-supported archive file at `location` in the
223224
`target_dir` directory.
@@ -234,7 +235,7 @@ def extract_all_files_at_once(location, target_dir, arch_type='*', log=on_mac, s
234235
# note: there are some issues with the extraction of debian .deb ar files
235236
# see sevenzip bug http://sourceforge.net/p/sevenzip/bugs/1472/
236237
ex_args = build_7z_extract_command(
237-
location=location, target_dir=target_dir, arch_type=arch_type, log=log)
238+
location=location, target_dir=target_dir, arch_type=arch_type)
238239

239240
rc, stdout, stderr = command.execute2(**ex_args)
240241

@@ -251,8 +252,7 @@ def extract_all_files_at_once(location, target_dir, arch_type='*', log=on_mac, s
251252
return convert_warnings_to_list(get_7z_warnings(stdout))
252253

253254

254-
def build_7z_extract_command(
255-
location, target_dir, single_entry=None, arch_type='*', log=on_mac):
255+
def build_7z_extract_command(location, target_dir, single_entry=None, arch_type='*'):
256256
"""
257257
Return a mapping of 7z command line aguments to extract the archive at
258258
`location` to `target_dir`.
@@ -327,7 +327,6 @@ def build_7z_extract_command(
327327
lib_dir=lib_dir,
328328
cwd=target_dir,
329329
env=timezone,
330-
log=log
331330
)
332331

333332
if TRACE:
@@ -337,7 +336,7 @@ def build_7z_extract_command(
337336
return ex_args
338337

339338

340-
def extract_file_by_file(location, target_dir, arch_type='*', log=on_mac, skip_symlinks=True):
339+
def extract_file_by_file(location, target_dir, arch_type='*', skip_symlinks=True):
341340
"""
342341
Extract all files using a one-by-one process from a 7zip-supported archive
343342
file at location in the `target_dir` directory.
@@ -354,30 +353,23 @@ def extract_file_by_file(location, target_dir, arch_type='*', log=on_mac, skip_s
354353
entries, errors_msgs = list_entries(location, arch_type)
355354
entries = list(entries)
356355

357-
# determine if we need a one-by-one approach:
358-
# we have files that are in the same dir and have the same name when the case is ignored
359-
360-
filenames_by_parent_dir = defaultdict(list)
361-
for ent in entries:
362-
if skip_symlinks and ent.is_symlink:
363-
continue
364-
pth = ent.path
365-
if pth:
366-
pth = pth.rstrip('/')
367-
else:
368-
raise Exception(ent.to_dict())
369-
370-
parent, filename = os.path.split(pth)
371-
filenames_by_parent_dir[parent].append(filename)
356+
# Determine if we need a one-by-one approach: technically the aproach is to
357+
# check if we have files that are in the same dir and have the same name
358+
# when the case is ignored. We take a simpler approach: we check if all
359+
# paths are unique when we ignore the case: for that we only check that the
360+
# length of two paths sets are the same: one set as-is and the other
361+
# lowercased.
372362

373-
need_by_file = any(
374-
len(fns) != len(set(fns))
375-
for fns in filenames_by_parent_dir.values())
363+
paths_as_is = set(e.path for e in entries)
364+
paths_no_case = set(p.lower() for p in paths_as_is)
365+
need_by_file = len(paths_as_is) != len(paths_no_case)
376366

377367
if not need_by_file:
378368
# use regular extract
379369
return extract_all_files_at_once(
380-
location=location, target_dir=target_dir, arch_type=arch_type, log=log)
370+
location=location,
371+
target_dir=target_dir,
372+
arch_type=arch_type)
381373

382374
# now we are extracting one file at a time. this is a tad painful because we
383375
# are dealing with a full command execution at each time.
@@ -398,7 +390,6 @@ def extract_file_by_file(location, target_dir, arch_type='*', log=on_mac, skip_s
398390
target_dir=tmp_extract_dir,
399391
single_entry=entry,
400392
arch_type=arch_type,
401-
log=log
402393
)
403394
rc, stdout, stderr = command.execute2(**ex_args)
404395

@@ -441,7 +432,7 @@ def extract_file_by_file(location, target_dir, arch_type='*', log=on_mac, skip_s
441432
if TRACE:
442433
logger.debug('extract: unique_target_file_loc: from {} to {}'.format(
443434
target_file_loc, unique_target_file_loc))
444-
435+
445436
if os.path.isfile(source_file_loc):
446437
fileutils.copyfile(source_file_loc, unique_target_file_loc)
447438
else:
@@ -629,7 +620,7 @@ def parse_7z_listing(location, utf=False):
629620
header_sep = b'\n----------\n'
630621
empty = b''
631622
body_sep = b'\n\n\n'
632-
path_block_sep = b'\n\nPath ='
623+
path_block_sep = b'Path ='
633624
msg_sep = b':'
634625
equal_sep = b'='
635626
errror_line_starters = b'Open Warning:', b'Errors:', b'Warnings:'
@@ -641,7 +632,7 @@ def parse_7z_listing(location, utf=False):
641632
print(text)
642633
print('--------------------------------------')
643634

644-
header_tail = re.split(header_sep, text, flags=re.MULTILINE)
635+
header_tail = re.split(header_sep, text, flags=re.MULTILINE) # NOQA
645636
if len(header_tail) != 2:
646637
# we more than one a header, confusion entails.
647638
raise ExtractWarningIncorrectEntry(
@@ -653,7 +644,7 @@ def parse_7z_listing(location, utf=False):
653644

654645
# FIXME: do something with header and footer?
655646
_header, body = header_tail
656-
body_and_footer = re.split(body_sep, body, flags=re.MULTILINE)
647+
body_and_footer = re.split(body_sep, body, flags=re.MULTILINE) # NOQA
657648
no_footer = len(body_and_footer) == 1
658649
multiple_footers = len(body_and_footer) > 2
659650
_footer = empty
@@ -673,7 +664,7 @@ def parse_7z_listing(location, utf=False):
673664
print(body)
674665

675666
path_blocks = [pb.strip() for pb in
676-
re.split(path_block_sep, body, flags=re.MULTILINE) if pb and pb.strip()]
667+
re.split(path_block_sep, body, flags=re.MULTILINE) if pb and pb.strip()] # NOQA
677668

678669
if TRACE_DEEP:
679670
logger.debug('parse_7z_listing: path_blocks:')
@@ -687,10 +678,15 @@ def parse_7z_listing(location, utf=False):
687678
infos = {}
688679

689680
lines = path_block.splitlines(False)
690-
# thfirst line is the Path line
681+
682+
if len(lines) == 1:
683+
# a temp macOS debug statement
684+
raise Exception(text)
685+
686+
# the first line is the Path line
691687
path_line = lines.pop(0).strip()
692688
if 'Path =' in path_line:
693-
_, _, path= path_line.partition('Path =')
689+
_, _, path = path_line.partition('Path =')
694690
path = path.lstrip()
695691
else:
696692
path = path_line

0 commit comments

Comments
 (0)