【问题标题】:What could cause "No mapping for the Unicode character exists in the target multi-byte code page"?什么可能导致“目标多字节代码页中不存在 Unicode 字符的映射”?
【发布时间】:2016-06-13 00:57:06
【问题描述】:

我有一个显示EEncodingError 的错误报告。日志指向TFile.AppendAllText。我叫TFile.AppendAllText是我的这个程序:

procedure WriteToFile(CONST FileName: string; CONST uString: string; CONST WriteOp: WriteOpperation; ForceFolder: Boolean= FALSE);     // Works with UNC paths
begin
 if NOT ForceFolder
 OR (ForceFolder AND ForceDirectoriesMsg(ExtractFilePath(FileName))) then
   if WriteOp= (woOverwrite)
   then IOUtils.TFile.WriteAllText (FileName, uString)
   else IOUtils.TFile.AppendAllText(FileName, uString);
end;

这是来自 EurekaLog 的信息。

什么会导致这种情况发生?

【问题讨论】:

  • Delphi 10.2 的类似问题

标签: delphi unicode delphi-xe7


【解决方案1】:

此程序会重现您报告的错误:

{$APPTYPE CONSOLE}

uses
  System.SysUtils, System.IOUtils;

var
  FileName: string;

begin
  try
    FileName := TPath.GetTempFileName;
    TFile.WriteAllText(FileName, 'é', TEncoding.ANSI);
    TFile.AppendAllText(FileName, 'é');
  except
    on E: Exception do
      Writeln(E.ClassName, ': ', E.Message);
  end;
end.

这里我把原始文件写成ANSI。然后调用AppendAllText,它会尝试写为UTF-8。发生的事情是我们最终进入了这个函数:

class procedure TFile.AppendAllText(const Path, Contents: string);
var
  LFileStream: TFileStream;
  LFileEncoding: TEncoding; // encoding of the file
  Buff: TBytes;
  Preamble: TBytes;
  UTFStr: TBytes;
  UTF8Str: TBytes;
begin
  CheckAppendAllTextParameters(Path, nil, False);

  LFileStream := nil;
  try
    try
      LFileStream := DoCreateOpenFile(Path);
      // detect the file encoding
      LFileEncoding := GetEncoding(LFileStream);

      // file is written is ASCII (default ANSI code page)
      if LFileEncoding = TEncoding.ANSI then
      begin
        // Contents can be represented as ASCII;
        // append the contents in ASCII

        UTFStr := TEncoding.ANSI.GetBytes(Contents);
        UTF8Str := TEncoding.UTF8.GetBytes(Contents);

        if TEncoding.UTF8.GetString(UTFStr) = TEncoding.UTF8.GetString(UTF8Str) then
        begin
          LFileStream.Seek(0, TSeekOrigin.soEnd);
          Buff := TEncoding.ANSI.GetBytes(Contents);
        end
        // Contents can be represented only in UTF-8;
        // convert file and Contents encodings to UTF-8
        else
        begin
          // convert file contents to UTF-8
          LFileStream.Seek(0, TSeekOrigin.soBeginning);
          SetLength(Buff, LFileStream.Size);
          LFileStream.ReadBuffer(Buff, Length(Buff));
          Buff := TEncoding.Convert(LFileEncoding, TEncoding.UTF8, Buff);

          // prepare the stream to rewrite the converted file contents
          LFileStream.Size := Length(Buff);
          LFileStream.Seek(0, TSeekOrigin.soBeginning);
          Preamble := TEncoding.UTF8.GetPreamble;
          LFileStream.WriteBuffer(Preamble, Length(Preamble));
          LFileStream.WriteBuffer(Buff, Length(Buff));

          // convert Contents in UTF-8
          Buff := TEncoding.UTF8.GetBytes(Contents);
        end;
      end
      // file is written either in UTF-8 or Unicode (BE or LE);
      // append Contents encoded in UTF-8 to the file
      else
      begin
        LFileStream.Seek(0, TSeekOrigin.soEnd);
        Buff := TEncoding.UTF8.GetBytes(Contents);
      end;

      // write Contents to the stream
      LFileStream.WriteBuffer(Buff, Length(Buff));
    except
      on E: EFileStreamError do
        raise EInOutError.Create(E.Message);
    end;
  finally
    LFileStream.Free;
  end;
end;

错误源于这一行:

if TEncoding.UTF8.GetString(UTFStr) = TEncoding.UTF8.GetString(UTF8Str) then

问题是UTFStr 实际上不是有效的UTF-8。因此TEncoding.UTF8.GetString(UTFStr) 会抛出异常。

这是TFile.AppendAllBytes 中的一个缺陷。鉴于它非常清楚UTFStrANSI 编码的,它调用TEncoding.UTF8.GetString 毫无意义。

您应该针对 Delphi 10 Seattle 中仍然存在的这个缺陷向 Embarcadero 提交错误报告。同时你不应该使用TFile.AppendAllBytes

【讨论】:

  • TStreamReader 怎么样?似乎是一个不错的选择,它不是基于 IOUtils。
  • Perf 有点狡猾。我不想在不知道文件的生命周期以及还有谁修改它的情况下提出建议。
  • 这个缺陷在Delphi 10.4中仍然存在,它影响了解码Base64的DecodeStream等其他功能。
猜你喜欢
  • 2017-03-21
  • 2013-08-14
  • 2014-08-05
  • 2014-11-21
  • 1970-01-01
  • 2014-08-30
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
相关资源
最近更新 更多