misc/libphysfs/lzma/7zFormat.txt
author nemo
Mon, 10 Apr 2017 12:06:43 -0400
changeset 12218 bb5522e88ab2
permissions -rw-r--r--
bulk copy of latest physfs to our misc/libphysfs since this seems to fix an off-by-1 error reliably hit in readln read of 1 byte probably introduced in the addition of the buffered read. Whether this is excessive or whether libphysfs should even be maintained by us is another matter. But at least we shouldn't crash

7z Format description (2.30 Beta 25)
-----------------------------------

This file contains description of 7z archive format. 
7z archive can contain files compressed with any method.
See "Methods.txt" for description for defined compressing methods.


Format structure Overview
-------------------------

Some fields can be optional.

Archive structure
~~~~~~~~~~~~~~~~~  
SignatureHeader
[PackedStreams]
[PackedStreamsForHeaders]
[
  Header 
  or 
  {
    Packed Header
    HeaderInfo
  }
]



Header structure
~~~~~~~~~~~~~~~~  
{
  ArchiveProperties
  AdditionalStreams
  {
    PackInfo
    {
      PackPos
      NumPackStreams
      Sizes[NumPackStreams]
      CRCs[NumPackStreams]
    }
    CodersInfo
    {
      NumFolders
      Folders[NumFolders]
      {
        NumCoders
        CodersInfo[NumCoders]
        {
          ID
          NumInStreams;
          NumOutStreams;
          PropertiesSize
          Properties[PropertiesSize]
        }
        NumBindPairs
        BindPairsInfo[NumBindPairs]
        {
          InIndex;
          OutIndex;
        }
        PackedIndices
      }
      UnPackSize[Folders][Folders.NumOutstreams]
      CRCs[NumFolders]
    }
    SubStreamsInfo
    {
      NumUnPackStreamsInFolders[NumFolders];
      UnPackSizes[]
      CRCs[]
    }
  }
  MainStreamsInfo
  {
    (Same as in AdditionalStreams)
  }
  FilesInfo
  {
    NumFiles
    Properties[]
    {
      ID
      Size
      Data
    }
  }
}

HeaderInfo structure
~~~~~~~~~~~~~~~~~~~~
{
  (Same as in AdditionalStreams)
}



Notes about Notation and encoding
---------------------------------

7z uses little endian encoding.

7z archive format has optional headers that are marked as
[]
Header
[]

REAL_UINT64 means real UINT64.

UINT64 means real UINT64 encoded with the following scheme:

  Size of encoding sequence depends from first byte:
  First_Byte  Extra_Bytes        Value
  (binary)   
  0xxxxxxx               : ( xxxxxxx           )
  10xxxxxx    BYTE y[1]  : (  xxxxxx << (8 * 1)) + y
  110xxxxx    BYTE y[2]  : (   xxxxx << (8 * 2)) + y
  ...
  1111110x    BYTE y[6]  : (       x << (8 * 6)) + y
  11111110    BYTE y[7]  :                         y
  11111111    BYTE y[8]  :                         y



Property IDs
------------

0x00 = kEnd,

0x01 = kHeader,

0x02 = kArchiveProperties,
    
0x03 = kAdditionalStreamsInfo,
0x04 = kMainStreamsInfo,
0x05 = kFilesInfo,
    
0x06 = kPackInfo,
0x07 = kUnPackInfo,
0x08 = kSubStreamsInfo,

0x09 = kSize,
0x0A = kCRC,

0x0B = kFolder,

0x0C = kCodersUnPackSize,
0x0D = kNumUnPackStream,

0x0E = kEmptyStream,
0x0F = kEmptyFile,
0x10 = kAnti,

0x11 = kName,
0x12 = kCreationTime,
0x13 = kLastAccessTime,
0x14 = kLastWriteTime,
0x15 = kWinAttributes,
0x16 = kComment,

0x17 = kEncodedHeader,


7z format headers
-----------------

SignatureHeader
~~~~~~~~~~~~~~~
  BYTE kSignature[6] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};

  ArchiveVersion
  {
    BYTE Major;   // now = 0
    BYTE Minor;   // now = 2
  };

  UINT32 StartHeaderCRC;

  StartHeader
  {
    REAL_UINT64 NextHeaderOffset
    REAL_UINT64 NextHeaderSize
    UINT32 NextHeaderCRC
  }


...........................


ArchiveProperties
~~~~~~~~~~~~~~~~~
BYTE NID::kArchiveProperties (0x02)
for (;;)
{
  BYTE PropertyType;
  if (aType == 0)
    break;
  UINT64 PropertySize;
  BYTE PropertyData[PropertySize];
}


Digests (NumStreams)
~~~~~~~~~~~~~~~~~~~~~
  BYTE AllAreDefined
  if (AllAreDefined == 0)
  {
    for(NumStreams)
      BIT Defined
  }
  UINT32 CRCs[NumDefined]


PackInfo
~~~~~~~~~~~~
  BYTE NID::kPackInfo  (0x06)
  UINT64 PackPos
  UINT64 NumPackStreams

  []
  BYTE NID::kSize    (0x09)
  UINT64 PackSizes[NumPackStreams]
  []

  []
  BYTE NID::kCRC      (0x0A)
  PackStreamDigests[NumPackStreams]
  []

  BYTE NID::kEnd


Folder
~~~~~~
  UINT64 NumCoders;
  for (NumCoders)
  {
    BYTE 
    {
      0:3 DecompressionMethod.IDSize
      4:
        0 - IsSimple
        1 - Is not simple
      5:
        0 - No Attributes
        1 - There Are Attributes
      7:
        0 - Last Method in Alternative_Method_List
        1 - There are more alternative methods
    } 
    BYTE DecompressionMethod.ID[DecompressionMethod.IDSize]
    if (!IsSimple)
    {
      UINT64 NumInStreams;
      UINT64 NumOutStreams;
    }
    if (DecompressionMethod[0] != 0)
    {
      UINT64 PropertiesSize
      BYTE Properties[PropertiesSize]
    }
  }
    
  NumBindPairs = NumOutStreamsTotal - 1;

  for (NumBindPairs)
  {
    UINT64 InIndex;
    UINT64 OutIndex;
  }

  NumPackedStreams = NumInStreamsTotal - NumBindPairs;
  if (NumPackedStreams > 1)
    for(NumPackedStreams)
    {
      UINT64 Index;
    };




Coders Info
~~~~~~~~~~~

  BYTE NID::kUnPackInfo  (0x07)


  BYTE NID::kFolder  (0x0B)
  UINT64 NumFolders
  BYTE External
  switch(External)
  {
    case 0:
      Folders[NumFolders]
    case 1:
      UINT64 DataStreamIndex
  }


  BYTE ID::kCodersUnPackSize  (0x0C)
  for(Folders)
    for(Folder.NumOutStreams)
     UINT64 UnPackSize;


  []
  BYTE NID::kCRC   (0x0A)
  UnPackDigests[NumFolders]
  []

  

  BYTE NID::kEnd



SubStreams Info
~~~~~~~~~~~~~~
  BYTE NID::kSubStreamsInfo; (0x08)

  []
  BYTE NID::kNumUnPackStream; (0x0D)
  UINT64 NumUnPackStreamsInFolders[NumFolders];
  []


  []
  BYTE NID::kSize  (0x09)
  UINT64 UnPackSizes[]
  []


  []
  BYTE NID::kCRC  (0x0A)
  Digests[Number of streams with unknown CRC]
  []

  
  BYTE NID::kEnd


Streams Info
~~~~~~~~~~~~

  []
  PackInfo
  []


  []
  CodersInfo
  []


  []
  SubStreamsInfo
  []

  BYTE NID::kEnd


FilesInfo
~~~~~~~~~
  BYTE NID::kFilesInfo;  (0x05)
  UINT64 NumFiles

  for (;;)
  {
    BYTE PropertyType;
    if (aType == 0)
      break;

    UINT64 Size;

    switch(PropertyType)
    {
      kEmptyStream:   (0x0E)
        for(NumFiles)
          BIT IsEmptyStream

      kEmptyFile:     (0x0F)
        for(EmptyStreams)
          BIT IsEmptyFile

      kAnti:          (0x10)
        for(EmptyStreams)
          BIT IsAntiFile
      
      case kCreationTime:   (0x12)
      case kLastAccessTime: (0x13)
      case kLastWriteTime:  (0x14)
        BYTE AllAreDefined
        if (AllAreDefined == 0)
        {
          for(NumFiles)
            BIT TimeDefined
        }
        BYTE External;
        if(External != 0)
          UINT64 DataIndex
        []
        for(Definded Items)
          UINT32 Time
        []
      
      kNames:     (0x11)
        BYTE External;
        if(External != 0)
          UINT64 DataIndex
        []
        for(Files)
        {
          wchar_t Names[NameSize];
          wchar_t 0;
        }
        []

      kAttributes:  (0x15)
        BYTE AllAreDefined
        if (AllAreDefined == 0)
        {
          for(NumFiles)
            BIT AttributesAreDefined
        }
        BYTE External;
        if(External != 0)
          UINT64 DataIndex
        []
        for(Definded Attributes)
          UINT32 Attributes
        []
    }
  }


Header
~~~~~~
  BYTE NID::kHeader (0x01)

  []
  ArchiveProperties
  []

  []
  BYTE NID::kAdditionalStreamsInfo; (0x03)
  StreamsInfo
  []

  []
  BYTE NID::kMainStreamsInfo;    (0x04)
  StreamsInfo
  []

  []
  FilesInfo
  []

  BYTE NID::kEnd


HeaderInfo
~~~~~~~~~~
  []
  BYTE NID::kEncodedHeader; (0x17)
  StreamsInfo for Encoded Header
  []


---
End of document