This commit is contained in:
Igor Pavlov
2021-07-22 23:00:14 +01:00
committed by Kornel
parent 4a960640a3
commit 585698650f
619 changed files with 34904 additions and 10859 deletions

View File

@@ -24,7 +24,9 @@ using namespace NWindows;
namespace NArchive {
namespace NTar {
static const UINT k_DefaultCodePage = CP_OEMCP; // it uses it if UTF8 check in names shows error
// 21.02: we use UTF8 code page by default, even if some files show error
// before 21.02 : CP_OEMCP;
// static const UINT k_DefaultCodePage = CP_UTF8;
static const Byte kProps[] =
@@ -39,13 +41,15 @@ static const Byte kProps[] =
kpidGroup,
kpidSymLink,
kpidHardLink,
kpidCharacts
// kpidLinkType
};
static const Byte kArcProps[] =
{
kpidHeadersSize,
kpidCodePage
kpidCodePage,
kpidCharacts
};
IMP_IInArchive_Props
@@ -67,8 +71,12 @@ STDMETHODIMP CHandler::GetArchiveProperty(PROPID propID, PROPVARIANT *value)
{
case k_ErrorType_UnexpectedEnd: flags = kpv_ErrorFlags_UnexpectedEnd; break;
case k_ErrorType_Corrupted: flags = kpv_ErrorFlags_HeadersError; break;
// case k_ErrorType_OK: break;
// case k_ErrorType_Warning: break;
default: break;
}
prop = flags;
if (flags != 0)
prop = flags;
break;
}
@@ -96,6 +104,13 @@ STDMETHODIMP CHandler::GetArchiveProperty(PROPID propID, PROPVARIANT *value)
prop = name;
break;
}
case kpidCharacts:
{
AString s = _encodingCharacts.GetCharactsString();
prop = s;
break;
}
}
prop.Detach(value);
return S_OK;
@@ -119,12 +134,64 @@ HRESULT CHandler::ReadItem2(ISequentialInStream *stream, bool &filled, CItemEx &
*/
if (item.IsPaxExtendedHeader())
_thereIsPaxExtendedHeader = true;
if (item.IsThereWarning())
_warning = true;
}
_phySize += item.HeaderSize;
_headersSize += item.HeaderSize;
return S_OK;
}
void CEncodingCharacts::Check(const AString &s)
{
IsAscii = s.IsAscii();
if (!IsAscii)
{
/*
{
Oem_Checked = true;
UString u;
MultiByteToUnicodeString2(u, s, CP_OEMCP);
Oem_Ok = (u.Find((wchar_t)0xfffd) <= 0);
}
Utf_Checked = true;
*/
UtfCheck.Check_AString(s);
}
}
AString CEncodingCharacts::GetCharactsString() const
{
AString s;
if (IsAscii)
{
s += "ASCII";
}
/*
if (Oem_Checked)
{
s.Add_Space_if_NotEmpty();
s += (Oem_Ok ? "oem-ok" : "oem-error");
}
if (Utf_Checked)
*/
else
{
s.Add_Space_if_NotEmpty();
s += (UtfCheck.IsOK() ? "UTF8" : "UTF8-ERROR"); // "UTF8-error"
{
AString s2;
UtfCheck.PrintStatus(s2);
s.Add_Space_if_NotEmpty();
s += s2;
}
}
return s;
}
HRESULT CHandler::Open2(IInStream *stream, IArchiveOpenCallback *callback)
{
UInt64 endPos = 0;
@@ -135,12 +202,7 @@ HRESULT CHandler::Open2(IInStream *stream, IArchiveOpenCallback *callback)
_phySizeDefined = true;
bool utf8_OK = true;
if (!_forceCodePage)
{
if (!utf8_OK)
_curCodePage = k_DefaultCodePage;
}
// bool utf8_OK = true;
for (;;)
{
@@ -151,8 +213,8 @@ HRESULT CHandler::Open2(IInStream *stream, IArchiveOpenCallback *callback)
break;
_isArc = true;
_items.Add(item);
/*
if (!_forceCodePage)
{
if (utf8_OK) utf8_OK = CheckUTF8(item.Name, item.NameCouldBeReduced);
@@ -160,8 +222,14 @@ HRESULT CHandler::Open2(IInStream *stream, IArchiveOpenCallback *callback)
if (utf8_OK) utf8_OK = CheckUTF8(item.User);
if (utf8_OK) utf8_OK = CheckUTF8(item.Group);
}
RINOK(stream->Seek(item.GetPackSizeAligned(), STREAM_SEEK_CUR, &_phySize));
*/
item.EncodingCharacts.Check(item.Name);
_encodingCharacts.Update(item.EncodingCharacts);
_items.Add(item);
RINOK(stream->Seek((Int64)item.GetPackSizeAligned(), STREAM_SEEK_CUR, &_phySize));
if (_phySize > endPos)
{
_error = k_ErrorType_UnexpectedEnd;
@@ -188,11 +256,13 @@ HRESULT CHandler::Open2(IInStream *stream, IArchiveOpenCallback *callback)
}
}
/*
if (!_forceCodePage)
{
if (!utf8_OK)
_curCodePage = k_DefaultCodePage;
}
*/
_openCodePage = _curCodePage;
if (_items.Size() == 0)
@@ -255,6 +325,7 @@ STDMETHODIMP CHandler::Close()
_latestIsRead = false;
// _isSparse = false;
_thereIsPaxExtendedHeader = false;
_encodingCharacts.Clear();
_items.Clear();
_seqStream.Release();
_stream.Release();
@@ -315,7 +386,8 @@ void CHandler::TarStringToUnicode(const AString &s, NWindows::NCOM::CPropVariant
else
MultiByteToUnicodeString2(dest, s, _curCodePage);
if (toOs)
NItemName::ReplaceToOsSlashes_Remove_TailSlash(dest);
NItemName::ReplaceToOsSlashes_Remove_TailSlash(dest,
true); // useBackslashReplacement
prop = dest;
}
@@ -358,6 +430,17 @@ STDMETHODIMP CHandler::GetProperty(UInt32 index, PROPID propID, PROPVARIANT *val
case kpidSymLink: if (item->LinkFlag == NFileHeader::NLinkFlag::kSymLink && !item->LinkName.IsEmpty()) TarStringToUnicode(item->LinkName, prop); break;
case kpidHardLink: if (item->LinkFlag == NFileHeader::NLinkFlag::kHardLink && !item->LinkName.IsEmpty()) TarStringToUnicode(item->LinkName, prop); break;
// case kpidLinkType: prop = (int)item->LinkFlag; break;
case kpidCharacts:
{
AString s = item->EncodingCharacts.GetCharactsString();
if (item->IsThereWarning())
{
s.Add_Space_if_NotEmpty();
s += "HEADER_ERROR";
}
prop = s;
break;
}
}
prop.Detach(value);
return S_OK;
@@ -407,7 +490,7 @@ HRESULT CHandler::Extract(const UInt32 *indices, UInt32 numItems,
Int32 askMode = testMode ?
NExtract::NAskMode::kTest :
NExtract::NAskMode::kExtract;
Int32 index = allFilesMode ? i : indices[i];
const UInt32 index = allFilesMode ? i : indices[i];
const CItemEx *item;
if (seqMode)
{
@@ -475,7 +558,7 @@ HRESULT CHandler::Extract(const UInt32 *indices, UInt32 numItems,
{
if (!seqMode)
{
RINOK(_stream->Seek(item->GetDataPosition(), STREAM_SEEK_SET, NULL));
RINOK(_stream->Seek((Int64)item->GetDataPosition(), STREAM_SEEK_SET, NULL));
}
streamSpec->Init(item->GetPackSizeAligned());
RINOK(copyCoder->Code(inStream2, outStream, NULL, NULL, progress));
@@ -566,7 +649,7 @@ STDMETHODIMP CSparseStream::Read(void *data, UInt32 size, UInt32 *processedSize)
UInt64 phyPos = PhyOffsets[left] + relat;
if (_needStartSeek || _phyPos != phyPos)
{
RINOK(Handler->_stream->Seek(item.GetDataPosition() + phyPos, STREAM_SEEK_SET, NULL));
RINOK(Handler->_stream->Seek((Int64)(item.GetDataPosition() + phyPos), STREAM_SEEK_SET, NULL));
_needStartSeek = false;
_phyPos = phyPos;
}
@@ -604,7 +687,7 @@ STDMETHODIMP CSparseStream::Seek(Int64 offset, UInt32 seekOrigin, UInt64 *newPos
}
if (offset < 0)
return HRESULT_WIN32_ERROR_NEGATIVE_SEEK;
_virtPos = offset;
_virtPos = (UInt64)offset;
if (newPosition)
*newPosition = _virtPos;
return S_OK;
@@ -650,7 +733,6 @@ STDMETHODIMP CHandler::GetStream(UInt32 index, ISequentialInStream **stream)
void CHandler::Init()
{
_forceCodePage = false;
// _codePage = CP_OEMCP;
_curCodePage = _specifiedCodePage = CP_UTF8; // CP_OEMCP;
_thereIsPaxExtendedHeader = false;
}

View File

@@ -48,6 +48,8 @@ private:
UInt32 _curCodePage;
UInt32 _openCodePage;
CEncodingCharacts _encodingCharacts;
NCompress::CCopyCoder *copyCoderSpec;
CMyComPtr<ICompressCoder> copyCoder;

View File

@@ -25,8 +25,8 @@ STDMETHODIMP CHandler::GetFileTimeType(UInt32 *type)
return S_OK;
}
HRESULT GetPropString(IArchiveUpdateCallback *callback, UInt32 index, PROPID propId,
AString &res, UINT codePage, bool convertSlash = false)
HRESULT GetPropString(IArchiveUpdateCallback *callback, UInt32 index, PROPID propId, AString &res,
UINT codePage, unsigned utfFlags, bool convertSlash)
{
NCOM::CPropVariant prop;
RINOK(callback->GetProperty(index, propId, &prop));
@@ -39,7 +39,7 @@ HRESULT GetPropString(IArchiveUpdateCallback *callback, UInt32 index, PROPID pro
if (codePage == CP_UTF8)
{
ConvertUnicodeToUTF8(s, res);
ConvertUnicodeToUTF8_Flags(s, res, utfFlags);
// if (!ConvertUnicodeToUTF8(s, res)) // return E_INVALIDARG;
}
else
@@ -56,8 +56,8 @@ HRESULT GetPropString(IArchiveUpdateCallback *callback, UInt32 index, PROPID pro
static int CompareUpdateItems(void *const *p1, void *const *p2, void *)
{
const CUpdateItem &u1 = *(*((const CUpdateItem **)p1));
const CUpdateItem &u2 = *(*((const CUpdateItem **)p2));
const CUpdateItem &u1 = *(*((const CUpdateItem *const *)p1));
const CUpdateItem &u2 = *(*((const CUpdateItem *const *)p2));
if (!u1.NewProps)
{
if (u2.NewProps)
@@ -78,8 +78,15 @@ STDMETHODIMP CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt
if ((_stream && (_error != k_ErrorType_OK || _warning /* || _isSparse */)) || _seqStream)
return E_NOTIMPL;
CObjectVector<CUpdateItem> updateItems;
UINT codePage = (_forceCodePage ? _specifiedCodePage : _openCodePage);
const UINT codePage = (_forceCodePage ? _specifiedCodePage : _openCodePage);
const unsigned utfFlags = g_Unicode_To_UTF8_Flags;
/*
// for debug only:
unsigned utfFlags = 0;
utfFlags |= UTF_FLAG__TO_UTF8__EXTRACT_BMP_ESCAPE;
utfFlags |= UTF_FLAG__TO_UTF8__SURROGATE_ERROR;
*/
for (UInt32 i = 0; i < numItems; i++)
{
CUpdateItem ui;
@@ -94,7 +101,7 @@ STDMETHODIMP CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt
ui.NewProps = IntToBool(newProps);
ui.NewData = IntToBool(newData);
ui.IndexInArc = indexInArc;
ui.IndexInArc = (int)indexInArc;
ui.IndexInClient = i;
if (IntToBool(newProps))
@@ -138,11 +145,11 @@ STDMETHODIMP CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt
ui.MTime = NTime::FileTimeToUnixTime64(prop.filetime);
}
RINOK(GetPropString(callback, i, kpidPath, ui.Name, codePage, true));
RINOK(GetPropString(callback, i, kpidPath, ui.Name, codePage, utfFlags, true));
if (ui.IsDir && !ui.Name.IsEmpty() && ui.Name.Back() != '/')
ui.Name += '/';
RINOK(GetPropString(callback, i, kpidUser, ui.User, codePage));
RINOK(GetPropString(callback, i, kpidGroup, ui.Group, codePage));
RINOK(GetPropString(callback, i, kpidUser, ui.User, codePage, utfFlags, false));
RINOK(GetPropString(callback, i, kpidGroup, ui.Group, codePage, utfFlags, false));
}
if (IntToBool(newData))
@@ -168,7 +175,7 @@ STDMETHODIMP CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt
updateItems.Sort(CompareUpdateItems, NULL);
}
return UpdateArchive(_stream, outStream, _items, updateItems, codePage, callback);
return UpdateArchive(_stream, outStream, _items, updateItems, codePage, utfFlags, callback);
COM_TRY_END
}

View File

@@ -58,6 +58,7 @@ namespace NFileHeader
const char kGnu_LongLink = 'K';
const char kGnu_LongName = 'L';
const char kSparse = 'S';
const char kLabel = 'V';
const char kDumpDir = 'D'; /* GNUTYPE_DUMPDIR.
data: list of files created by the --incremental (-G) option
Each file name is preceded by either

View File

@@ -81,14 +81,14 @@ static void ReadString(const char *s, unsigned size, AString &result)
static bool ParseInt64(const char *p, Int64 &val)
{
UInt32 h = GetBe32(p);
val = GetBe64(p + 4);
val = (Int64)GetBe64(p + 4);
if (h == (UInt32)1 << 31)
return ((val >> 63) & 1) == 0;
if (h == (UInt32)(Int32)-1)
return ((val >> 63) & 1) != 0;
UInt64 uv;
bool res = OctalToNumber(p, 12, uv);
val = uv;
val = (Int64)uv;
return res;
}
@@ -112,7 +112,9 @@ static bool ParseSize(const char *p, UInt64 &val)
val = GetBe64(p + 4);
return ((val >> 63) & 1) == 0;
}
return OctalToNumber(p, 12, val);
return OctalToNumber(p, 12, val,
true // 20.03: allow empty size for 'V' Label entry
);
}
#define CHECK(x) { if (!(x)) return k_IsArc_Res_NO; }
@@ -201,8 +203,8 @@ static HRESULT GetNextItemReal(ISequentialInStream *stream, bool &filled, CItemE
// we allow empty Mode value for LongName prefix items
RIF(OctalToNumber32(p, 8, item.Mode, true)); p += 8;
if (!OctalToNumber32(p, 8, item.UID)) item.UID = 0; p += 8;
if (!OctalToNumber32(p, 8, item.GID)) item.GID = 0; p += 8;
if (!OctalToNumber32(p, 8, item.UID)) { item.UID = 0; } p += 8;
if (!OctalToNumber32(p, 8, item.GID)) { item.GID = 0; } p += 8;
RIF(ParseSize(p, item.PackSize));
item.Size = item.PackSize;
@@ -245,6 +247,15 @@ static HRESULT GetNextItemReal(ISequentialInStream *stream, bool &filled, CItemE
item.PackSize = 0;
item.Size = 0;
}
if (item.LinkFlag == NFileHeader::NLinkFlag::kDirectory)
{
// GNU tar ignores Size field, if LinkFlag is kDirectory
// 21.02 : we set PackSize = 0 to be more compatible with GNU tar
item.PackSize = 0;
// item.Size = 0;
}
/*
TAR standard requires sum of unsigned byte values.
But some TAR programs use sum of signed byte values.
@@ -269,7 +280,7 @@ static HRESULT GetNextItemReal(ISequentialInStream *stream, bool &filled, CItemE
if (item.LinkFlag == NFileHeader::NLinkFlag::kSparse)
{
Byte isExtended = buf[482];
Byte isExtended = (Byte)buf[482];
if (isExtended != 0 && isExtended != 1)
return S_OK;
RIF(ParseSize(buf + 483, item.Size));
@@ -309,7 +320,7 @@ static HRESULT GetNextItemReal(ISequentialInStream *stream, bool &filled, CItemE
}
item.HeaderSize += NFileHeader::kRecordSize;
isExtended = buf[21 * 24];
isExtended = (Byte)buf[21 * 24];
if (isExtended != 0 && isExtended != 1)
return S_OK;
for (unsigned i = 0; i < 21; i++)
@@ -442,9 +453,16 @@ HRESULT ReadItem(ISequentialInStream *stream, bool &filled, CItemEx &item, EErro
case 'x':
case 'X':
{
// pax Extended Header
if (item.Name.IsPrefixedBy("PaxHeader/")
|| item.Name.Find("PaxHeaders.4467/") >= 0)
const char *s = item.Name.Ptr();
if (IsString1PrefixedByString2(s, "./"))
s += 2;
if (IsString1PrefixedByString2(s, "./"))
s += 2;
if ( IsString1PrefixedByString2(s, "PaxHeader/")
|| IsString1PrefixedByString2(s, "PaxHeaders.X/")
|| IsString1PrefixedByString2(s, "PaxHeaders.4467/")
|| StringsAreEqual_Ascii(s, "@PaxHeader")
)
{
RINOK(ReadDataToString(stream, item, pax, error));
if (error != k_ErrorType_OK)

View File

@@ -4,6 +4,7 @@
#define __ARCHIVE_TAR_ITEM_H
#include "../../../Common/MyLinux.h"
#include "../../../Common/UTFConvert.h"
#include "../Common/ItemNameUtils.h"
@@ -108,8 +109,52 @@ struct CItem
}
UInt64 GetPackSizeAligned() const { return (PackSize + 0x1FF) & (~((UInt64)0x1FF)); }
bool IsThereWarning() const
{
// that Header Warning is possible if (Size != 0) for dir item
return (PackSize < Size) && (LinkFlag == NFileHeader::NLinkFlag::kDirectory);
}
};
struct CEncodingCharacts
{
bool IsAscii;
// bool Oem_Checked;
// bool Oem_Ok;
// bool Utf_Checked;
CUtf8Check UtfCheck;
void Clear()
{
IsAscii = true;
// Oem_Checked = false;
// Oem_Ok = false;
// Utf_Checked = false;
UtfCheck.Clear();
}
void Update(const CEncodingCharacts &ec)
{
if (!ec.IsAscii)
IsAscii = false;
// if (ec.Utf_Checked)
{
UtfCheck.Update(ec.UtfCheck);
// Utf_Checked = true;
}
}
CEncodingCharacts() { Clear(); }
void Check(const AString &s);
AString GetCharactsString() const;
};
struct CItemEx: public CItem
{
UInt64 HeaderPos;
@@ -117,6 +162,8 @@ struct CItemEx: public CItem
bool NameCouldBeReduced;
bool LinkNameCouldBeReduced;
CEncodingCharacts EncodingCharacts;
UInt64 GetDataPosition() const { return HeaderPos + HeaderSize; }
UInt64 GetFullSize() const { return HeaderSize + PackSize; }
};

View File

@@ -62,7 +62,7 @@ static void WriteOctal_12_Signed(char *s, Int64 val)
{
if (val >= 0)
{
WriteOctal_12(s, val);
WriteOctal_12(s, (UInt64)val);
return;
}
s[0] = s[1] = s[2] = s[3] = (char)(Byte)0xFF;

View File

@@ -15,13 +15,10 @@
namespace NArchive {
namespace NTar {
HRESULT GetPropString(IArchiveUpdateCallback *callback, UInt32 index, PROPID propId,
AString &res, UINT codePage, bool convertSlash = false);
HRESULT UpdateArchive(IInStream *inStream, ISequentialOutStream *outStream,
const CObjectVector<NArchive::NTar::CItemEx> &inputItems,
const CObjectVector<CUpdateItem> &updateItems,
UINT codePage,
UINT codePage, unsigned utfFlags,
IArchiveUpdateCallback *updateCallback)
{
COutArchive outArchive;
@@ -43,7 +40,7 @@ HRESULT UpdateArchive(IInStream *inStream, ISequentialOutStream *outStream,
if (ui.NewData)
complexity += ui.Size;
else
complexity += inputItems[ui.IndexInArc].GetFullSize();
complexity += inputItems[(unsigned)ui.IndexInArc].GetFullSize();
}
RINOK(updateCallback->SetTotal(complexity));
@@ -95,12 +92,12 @@ HRESULT UpdateArchive(IInStream *inStream, ISequentialOutStream *outStream,
memcpy(item.Magic, NFileHeader::NMagic::kUsTar_00, 8);
}
else
item = inputItems[ui.IndexInArc];
item = inputItems[(unsigned)ui.IndexInArc];
AString symLink;
if (ui.NewData || ui.NewProps)
{
RINOK(GetPropString(updateCallback, ui.IndexInClient, kpidSymLink, symLink, codePage, true));
RINOK(GetPropString(updateCallback, ui.IndexInClient, kpidSymLink, symLink, codePage, utfFlags, true));
if (!symLink.IsEmpty())
{
item.LinkFlag = NFileHeader::NLinkFlag::kSymLink;
@@ -159,7 +156,7 @@ HRESULT UpdateArchive(IInStream *inStream, ISequentialOutStream *outStream,
{
AString hardLink;
RINOK(GetPropString(updateCallback, ui.IndexInClient, kpidHardLink, hardLink, codePage, true));
RINOK(GetPropString(updateCallback, ui.IndexInClient, kpidHardLink, hardLink, codePage, utfFlags, true));
if (!hardLink.IsEmpty())
{
item.LinkFlag = NFileHeader::NLinkFlag::kHardLink;
@@ -189,7 +186,7 @@ HRESULT UpdateArchive(IInStream *inStream, ISequentialOutStream *outStream,
outArchive.Pos = fileHeaderStartPos;
item.PackSize = copyCoderSpec->TotalSize;
RINOK(outArchive.WriteHeader(item));
RINOK(outSeekStream->Seek(item.PackSize, STREAM_SEEK_CUR, NULL));
RINOK(outSeekStream->Seek((Int64)item.PackSize, STREAM_SEEK_CUR, NULL));
outArchive.Pos += item.PackSize;
}
RINOK(outArchive.FillDataResidual(item.PackSize));
@@ -201,7 +198,7 @@ HRESULT UpdateArchive(IInStream *inStream, ISequentialOutStream *outStream,
}
else
{
const CItemEx &existItem = inputItems[ui.IndexInArc];
const CItemEx &existItem = inputItems[(unsigned)ui.IndexInArc];
UInt64 size;
if (ui.NewProps)
@@ -231,12 +228,12 @@ HRESULT UpdateArchive(IInStream *inStream, ISequentialOutStream *outStream,
item.GID = existItem.GID;
RINOK(outArchive.WriteHeader(item));
RINOK(inStream->Seek(existItem.GetDataPosition(), STREAM_SEEK_SET, NULL));
RINOK(inStream->Seek((Int64)existItem.GetDataPosition(), STREAM_SEEK_SET, NULL));
size = existItem.PackSize;
}
else
{
RINOK(inStream->Seek(existItem.HeaderPos, STREAM_SEEK_SET, NULL));
RINOK(inStream->Seek((Int64)existItem.HeaderPos, STREAM_SEEK_SET, NULL));
size = existItem.GetFullSize();
}

View File

@@ -13,7 +13,7 @@ namespace NTar {
struct CUpdateItem
{
int IndexInArc;
int IndexInClient;
unsigned IndexInClient;
UInt64 Size;
Int64 MTime;
UInt32 Mode;
@@ -30,9 +30,12 @@ struct CUpdateItem
HRESULT UpdateArchive(IInStream *inStream, ISequentialOutStream *outStream,
const CObjectVector<CItemEx> &inputItems,
const CObjectVector<CUpdateItem> &updateItems,
UINT codePage,
UINT codePage, unsigned utfFlags,
IArchiveUpdateCallback *updateCallback);
HRESULT GetPropString(IArchiveUpdateCallback *callback, UInt32 index, PROPID propId, AString &res,
UINT codePage, unsigned utfFlags, bool convertSlash);
}}
#endif