5454else :
5555 from pipes import quote as shlex_quote
5656
57-
5857"""
5958Low level support for p/7zip-based archive extraction.
6059"""
6160
62-
6361logger = logging .getLogger (__name__ )
6462
6563TRACE = False
6664TRACE_DEEP = False
65+ TRACE_ENTRIES = False
6766
68- if TRACE or TRACE_DEEP :
67+ if TRACE or TRACE_DEEP or TRACE_ENTRIES :
6968 import sys
7069 logging .basicConfig (stream = sys .stdout )
7170 logger .setLevel (logging .DEBUG )
@@ -518,45 +517,6 @@ def list_entries(location, arch_type='*'):
518517 return parse_7z_listing (stdout , utf ), error_messages
519518
520519
521- def as_entry (infos ):
522- """
523- Return an Entry built from a 7zip path listing data in the `infos` mapping.
524- """
525- is_symlink = False
526- is_hardlink = False
527- link_target = None
528-
529- sl = infos .get ('Symbolic Link' )
530-
531- if sl :
532- is_symlink = True
533- link_target = sl
534-
535- hl = infos .get ('Hard Link' )
536- if hl :
537- is_hardlink = True
538- link_target = hl
539-
540- if sl and hl :
541- from pprint import pformat
542- raise ExtractWarningIncorrectEntry (
543- 'A symlink cannot be also a hardlink: {}' .format (pformat (infos )))
544-
545- is_dir = infos .get ('Folder' , False ) == '+'
546-
547- e = Entry (
548- path = infos .get ('Path' ),
549- size = infos .get ('Size' , 0 ),
550- date = infos .get ('Modified' , None ),
551- is_dir = is_dir ,
552- is_file = not is_dir ,
553- is_symlink = is_symlink ,
554- is_hardlink = is_hardlink ,
555- link_target = link_target ,
556- )
557- return e
558-
559-
560520def parse_7z_listing (location , utf = False ):
561521 """
562522 Return a list Entry objects from parsing a long format 7zip listing from a
@@ -567,16 +527,18 @@ def parse_7z_listing(location, utf=False):
567527
568528 The 7zip -slt format looks like this:
569529
530+ 1. a header with:
570531 - copyright and version details
571532 - '--' line
572533 - archive header info, varying based on the archive types and subtype
573534 - lines of key=value pairs
574- - Errors: followed by one or more message lines
575- - Warnings: followed by one or more message lines
576- - Open Warning: : followed by one or more message lines
577- - sometimes a '---' line
535+ - ERRORS: followed by one or more message lines
536+ - WARNINGS: followed by one or more message lines
578537 - blank line
579- - '----------' line
538+
539+ 2. blocks of path aka. entry data, one for each path with:
540+
541+ - '----------' line once as the indicator of path blocks starting
580542 - for each archive member:
581543 - lines of either
582544 - key = value pairs, with a possible twist that the Path may
@@ -585,162 +547,94 @@ def parse_7z_listing(location, utf=False):
585547 - Warnings: followed by one or more message lines
586548 - Open Warning: : followed by one or more message lines
587549 - blank line
588- - two blank lines
550+
551+ 3. a footer
552+ - blank line
589553 - footer sometimes with lines with summary stats
590554 such as Warnings: 1 Errors: 1
591555 - a line with two or more dashes or an empty line
556+
557+ We ignore the header and footer in a listing.
592558 """
593559
594560 if utf or py3 :
595561 # read to unicode
596562 with io .open (location , 'r' , encoding = 'utf-8' ) as listing :
597563 text = listing .read ()
598- if TRACE_DEEP :
599- print ('=====================================================' )
600- print (text )
601- print ('=====================================================' )
602-
603564 text = text .replace (u'\r \n ' , u'\n ' )
604565
605- header_sep = u'\n ----------\n '
606- empty = u''
607- body_sep = u'\n \n \n '
608- path_block_sep = u'Path ='
609- msg_sep = u':'
610- equal_sep = u'='
611- errror_line_starters = 'Open Warning:' , 'Errors:' , 'Warnings:'
612- line_sep = u'\n '
566+ end_of_header = u'----------\n '
567+ path_key = u'Path'
568+ kv_sep = u'='
569+ path_blocks_sep = u'\n \n '
570+ line_sep = u'\n '
613571
614572 else :
615573 # read to bytes
616574 with io .open (location , 'rb' ) as listing :
617575 text = listing .read ()
618576 text = text .replace (b'\r \n ' , b'\n ' )
619577
620- header_sep = b'\n ----------\n '
621- empty = b''
622- body_sep = b'\n \n \n '
623- path_block_sep = b'Path ='
624- msg_sep = b':'
625- equal_sep = b'='
626- errror_line_starters = b'Open Warning:' , b'Errors:' , b'Warnings:'
627- line_sep = b'\n '
578+ end_of_header = b'----------\n '
579+ path_key = b'Path'
580+ kv_sep = b'='
581+ path_blocks_sep = b'\n \n '
582+ line_sep = b'\n '
628583
629584 if TRACE :
630585 logger .debug ('parse_7z_listing: initial text: type: ' + repr (type (text )))
631586 print ('--------------------------------------' )
632587 print (text )
633588 print ('--------------------------------------' )
634589
635- header_tail = re .split (header_sep , text , flags = re .MULTILINE ) # NOQA
636- if len (header_tail ) != 2 :
637- # we more than one a header, confusion entails.
638- raise ExtractWarningIncorrectEntry (
639- 'Incorrect 7zip listing with multiple headers: {}' .format (repr (header_tail )))
590+ # for now we ignore the header
591+ _header , _ , paths = text .rpartition (end_of_header )
640592
641- if len ( header_tail ) == 1 :
593+ if not paths :
642594 # we have only a header, likely an error condition or an empty archive
643595 return []
644596
645- # FIXME: do something with header and footer?
646- _header , body = header_tail
647- body_and_footer = re .split (body_sep , body , flags = re .MULTILINE ) # NOQA
648- no_footer = len (body_and_footer ) == 1
649- multiple_footers = len (body_and_footer ) > 2
650- _footer = empty
651-
652- if no_footer :
653- body = body_and_footer [0 ]
654- elif multiple_footers :
655- raise ExtractWarningIncorrectEntry (
656- 'Incorrect 7zip listing with multiple footers: {}' .format (repr (body_and_footer )))
657- else :
658- body , _footer == body_and_footer
659-
660- entries = []
597+ # each block representing one path or file:
598+ # - starts with a "Path = <some/path>" key/value
599+ # - continues with key = value pairs each on a single line
600+ # (unless there is a \n in file name which is an error condition)
601+ # - ends with an empty line
602+ # then we have a global footer
661603
662- if TRACE :
663- logger .debug ('parse_7z_listing: body:' )
664- print (body )
604+ path_blocks = [pb for pb in paths .split (path_blocks_sep ) if pb and path_key in pb ]
665605
666- path_blocks = [pb .strip () for pb in
667- re .split (path_block_sep , body , flags = re .MULTILINE ) if pb and pb .strip ()] # NOQA
668-
669- if TRACE_DEEP :
670- logger .debug ('parse_7z_listing: path_blocks:' )
671- pprint .pprint (path_blocks )
606+ entries = []
672607
673608 for path_block in path_blocks :
674- if TRACE :
675- logger .debug ('parse_7z_listing: path_block: {}' .format (path_block ))
676-
677- errors = []
678- infos = {}
679-
680- lines = path_block .splitlines (False )
681-
682- if len (lines ) == 1 :
683- # a temp macOS debug statement
684- raise Exception (text )
685-
686- # the first line is the Path line
687- path_line = lines .pop (0 ).strip ()
688- if 'Path =' in path_line :
689- _ , _ , path = path_line .partition ('Path =' )
690- path = path .lstrip ()
691- else :
692- path = path_line
693-
694- second = lines [0 ]
695-
696- if equal_sep not in second :
697- # the path contain line breaks and the next line continues the name
698- path = line_sep .join ([path , second ])
699- lines .pop (0 )
700-
701- infos ['Path' ] = path
702-
703- is_err = False
704-
705- # process the remainining non-path lines
706- for line in lines :
707- if TRACE_DEEP :
708- logger .debug ('parse_7z_listing: line: "{}"' .format (line ))
709-
710- line = line .strip ()
711-
712- if not line :
713- continue
714-
715- if line .startswith (errror_line_starters ):
716- is_err = True
717- messages = line .split (msg_sep , 1 )
718- errors .append (messages )
719- continue
720-
721- if equal_sep not in line and is_err :
722- # not a key = value line, an error message
723- errors .append (line )
724- continue
725-
726- parts = line .split (equal_sep , 1 )
727-
728- if len (parts ) != 2 :
729- raise ExtractWarningIncorrectEntry (
730- 'Incorrect 7zip listing line with no key=value: {}' .format (repr (line )))
731-
732- is_err = False
733- key , value = parts
734- key = key .strip ()
735- value = value .strip ()
736- assert key not in infos , 'Duplicate keys in 7zip listing'
737- infos [key ] = value or empty
738-
739- if infos :
740- entr = as_entry (infos )
741- entries .append (entr )
742-
743- if TRACE_DEEP :
609+ # we ignore empty lines as well as lines that do not contain a key
610+ lines = [line .strip () for line in path_block .splitlines (False ) if line .strip ()]
611+ if not lines :
612+ continue
613+ # we have a weird case of path with line returns in the file name
614+ # we concatenate these in the first Path line
615+ while len (lines ) > 1 and lines [0 ].startswith (path_key ) and kv_sep not in lines [1 ]:
616+ first_line = lines [0 ]
617+ second_line = lines .pop (1 )
618+ first_line = line_sep .join ([first_line , second_line ])
619+ lines [0 ] = first_line
620+
621+ dangling_lines = [line for line in lines if kv_sep not in line ]
622+ entry_errors = []
623+ if dangling_lines :
624+ emsg = 'Invalid 7z listing path block missing "=" as key/value separator: {}' .format (repr (path_block ))
625+ entry_errors .append (emsg )
626+
627+ entry_attributes = {}
628+ key_lines = [line for line in lines if kv_sep in line ]
629+ for line in key_lines :
630+ k , _ , v = line .partition (kv_sep )
631+ k = k .strip ()
632+ v = v .strip ()
633+ entry_attributes [k ] = v
634+
635+ entries .append (Entry .from_dict (infos = entry_attributes , errors = entry_errors ))
636+
637+ if TRACE_ENTRIES :
744638 logger .debug ('parse_7z_listing: entries# {}\n ' .format (len (entries )))
745639 for entry in entries :
746640 logger .debug (' ' + repr (entry .to_dict ()))
@@ -777,12 +671,9 @@ class Entry(object):
777671 link_target = attr .ib (default = None )
778672 errors = attr .ib (default = attr .Factory (list ))
779673
780- def parent (self ):
781- return posixpath .dirname (self .path .rstrip ('/' ))
782-
783674 def to_dict (self , full = False ):
784675 data = attr .asdict (self )
785- data .pop ('errors' , None )
676+ # data.pop('errors', None)
786677 if not full :
787678 data .pop ('date' , None )
788679 return data
@@ -795,3 +686,56 @@ def is_relative_path(self):
795686
796687 def is_empty (self ):
797688 return not self .size
689+
690+ @classmethod
691+ def from_dict (cls , infos , errors = None ):
692+ """
693+ Return an Entry built from a 7zip path listing data in the `infos` mapping.
694+ """
695+ is_symlink = False
696+ is_hardlink = False
697+ link_target = None
698+
699+ sl = infos .get ('Symbolic Link' )
700+
701+ if sl :
702+ is_symlink = True
703+ link_target = sl
704+
705+ hl = infos .get ('Hard Link' )
706+ if hl :
707+ is_hardlink = True
708+ link_target = hl
709+
710+ if sl and hl :
711+ from pprint import pformat
712+ raise ExtractWarningIncorrectEntry (
713+ 'A symlink cannot be also a hardlink: {}' .format (pformat (infos )))
714+
715+ # depending on the type of arhcive the file vs dir flags are in
716+ # diiferent attributes :|
717+ is_dir = (
718+ # in some listings we have this: Mode = drwxrwxr-x
719+ infos .get ('Mode' , '' ).lower ().startswith ('d' )
720+ or
721+ # in cpio and a few more we have a Folder attrib
722+ infos .get ('Folder' , '' ).startswith ('+' )
723+ or
724+ # in 7z listing we have this: Attributes = D_ drwxrwxr-x
725+ infos .get ('Attributes' , '' ).lower ().startswith ('d_' )
726+ ) or False
727+
728+ is_file = not is_dir
729+
730+ e = cls (
731+ path = infos .get ('Path' ),
732+ size = infos .get ('Size' , 0 ),
733+ date = infos .get ('Modified' , None ),
734+ is_dir = is_dir ,
735+ is_file = is_file ,
736+ is_symlink = is_symlink ,
737+ is_hardlink = is_hardlink ,
738+ link_target = link_target ,
739+ errors = errors or [],
740+ )
741+ return e
0 commit comments