如何在浏览器 JavaScript 中将文件传输到计算机或从计算机传输？答案

【问题标题】：How to stream files to and from the computer in browser JavaScript?如何在浏览器 JavaScript 中将文件传输到计算机或从计算机传输？
【发布时间】：2020-12-01 16:54:19
【问题描述】：

我正在开发一个对文件进行字节级处理的网络应用程序（纯 HTML/Javascript，没有库）（霍夫曼编码演示）。它工作得很好（你不想知道到达那里需要多长时间），但是我的完成感让我有点困扰，因为我必须将文件加载到 ArrayBuffer 而不是从 HDD 流式传输。还有一个文件大小限制，尽管压缩一个 4GB 文件（我的数据结构支持的最大值）确实需要很长时间。

仍然，为了让这个应用程序在低资源设备上运行，我如何从文件input 框中流式传输文件（我需要多次通过频率计数、文件大小检测和实际写入）和到某种浏览器下载（谢天谢地，至少一次性完成）？

以下是现在处理它的相关函数（我为全局变量道歉：P）：

//Load the file
  function startProcessingFile(){ //Loads the file and sets up a callback to start the main process when done.
    var ff=document.getElementById("file");//I am assuming that you don't need to see the HTML here. :D
    if (ff.files.length === 0) {
      displayError("No file selected");
    }
    else{
      displayStatus("Loading File...");
      var fr = new FileReader;
      fr.onload=function () {inp = new DataView(fr.result); boot();}
      fr.onerror=function () {displayError(fr.error)};
      fr.readAsArrayBuffer(ff.files[0]);
    }
  }

//A bit later on -- one of the functions that reads the data from the input file
function countTypes(c){ //counts the frequencies. c is # bytes processed.
  if (die){
    die=false;
    return;
  }
  var i=Math.ceil(inputSize/100.0);
  while (c<inputSize && i>0){
    var d=inp.getUint8(c);
    frequencies[d]=frequencies[d]+1;
    i--;
    c++;//Accidental, but funny.
  }
  var perc=100.0*c/inputSize;
  updateProgress(perc);
  if (c<inputSize){
    setTimeout(function () {countTypes(c);}, 0);
  }
  else{
    updateProgress(100);
    system_state++;
    taskHandle();
  }
}

//Here's where the file is read the last time and also where the bits come from that I want to save. If I could stream the data directly I could probably even get rid of the dry-run stage I currently need to count how many bytes to allocate for the output ArrayBuffer. I mean, Google Drive can download files without telling the browser the size, just whether it's done yet or not, so I'd assume that's a feature I could access here too. I'm just not sure how you actually gain access to a download from JS in the first place.
function encode(c,d){ //performs the Huffman encoding. 
//If d is true, does not actually write. c is # of bits processed so far.
  if (die){
    die=false;
    return;
  }
  var i=Math.ceil(inputSize/250.0);
  while (c<inputSize && i>0){
    var b=inp.getUint8(c);
    var seq;
    for (var j=0; j<table.length; j++){
      if (table[j].value===b){
        seq=table[j].code
      }
    }
    for (var j=0; j<seq.length; j++){
      writeBit(seq[j],d);
    }
    i--;
    c++;//Accidental, but funny.
  }
  var perc=100.0*c/inputSize;
  updateProgress(perc);
  if (c<inputSize){
    setTimeout(function () {encode(c,d);}, 0);
  }
  else{
    updateProgress(100);
    system_state++;
    taskHandle();
  }
}

//Finally, bit-level access for unaligned read/write so I can actually take advantage of the variable word size of the Huffman encoding (the read is used for decoding).
function readBit(){ //reads one bit (b) from the ArrayBuffer/DataView. The offset of 4 is for the filesize int.
  var data_byte=inp.getUint8(byte_index+4);
  var res=data_byte>>>bit_index;
  bit_index+=1;
  if (bit_index>7){
    bit_index=0;
    byte_index++;
  }
  return (res&1);
}

function writeBit(b,d){ //writes one bit (b) to the output Arraybuffer/Dataview. If d is true, does not actually write.
  if (d===false){ //i.e. not dry-run mode
    var bitmask=0xff;
    var flag=1<<bit_index;
    bitmask=bitmask^flag;
    current_byte=current_byte&bitmask;
    current_byte=current_byte|(b<<bit_index);
    output.setUint8(byte_index+4, current_byte);
  }
  bit_index+=1;
  if (bit_index>7){
    bit_index=0;
    byte_index++;
  }
}

function readByte(){ //reads a byte using readBit. Unaligned.
  var b=0;
  for (var i=0; i<8; i++){
    var t=readBit();
    b=b|(t<<i);
  }
  return b;
}

function writeByte(b,d){ //writes a byte using writeByte. Unaligned.
  for (var i=0; i<8; i++){
    var res=b>>>i;
    writeBit((res&1),d); 
  }
}

//And finally the download mechanism I'm using.
function downloadResult(){//download processed file with specified extension
  var blobObject = new Blob([output], {type: 'application/octet-stream'});
  var n=source_name.split('\\').pop().split('/').pop();
  if (doEncode){
    n=n+fext
  }else{
    n=n.replace(fext,"");
  }
  var a = document.createElement("a");
  a.setAttribute("href", URL.createObjectURL(blobObject));
  a.setAttribute("download", n);
  a.click();
  delete a;
  running=false;
  var b=document.getElementById("ac");
  if (b.classList.contains("activeNav")){
    clearRes();
  }
}

我基本上想撕掉大部分内容，并用可以从用户选择的文件中读取字节或中等数据块的东西替换它，然后当它到达实际输出阶段时，涓涓细流通过或多或少的香草下载到他们的下载文件夹，逐字节地获取数据。

我确实知道可以在文件输入框中选择多个文件，所以如果可以下载到子文件夹，我可以研究如何为见鬼。那岂不是很有趣！ ...请注意，我很确定这是不可能的（我不明白为什么您不能在网页的浏览器下载文件夹中创建子目录，但可能是出于安全原因）。

如果您需要查看更多代码，请告诉我，但由于这是一个课堂项目，我不想被指责抄袭我自己的应用程序...

【问题讨论】：

标签： javascript html

【解决方案1】：

以流的形式从磁盘读取

您可以使用 Blob.stream() 方法从该 Blob（或文件）返回 ReadableStream。

inp.onchange = async (evt) => {
  const stream = inp.files[ 0 ].stream();
  const reader = stream.getReader();
  while( true ) {
    const { done, value } = await reader.read();
    if( done ) { break; }
    handleChunk( value );
  }
  console.log( "all done" );
};

function handleChunk( buf ) {
  console.log( "received a new buffer", buf.byteLength );
}

&lt;input type="file" id="inp"&gt;

对于不支持此方法的旧浏览器，您仍然可以仅使用其.slice() 方法分块读取文件：

inp.onchange = async (evt) => {
  const file = inp.files[ 0 ];
  const chunksize = 64 * 1024;
  let offset = 0;
  while( offset < file.size ) {
    const chunkfile = await file.slice( offset, offset + chunksize );
    // Blob.arrayBuffer() can be polyfilled with a FileReader
    const chunk = await chunkfile.arrayBuffer();
    handleChunk( chunk );
    offset += chunksize;
  }
  console.log( "all done" );
};

function handleChunk( buf ) {
  console.log( "received a new buffer", buf.byteLength );
}

&lt;input type="file" id="inp"&gt;

然而，以流的形式写入磁盘有点困难。

Jimmy Wärting 有一个 伟大的 hack，名为 StreamSaver.js，它使用了 Service Worker。我不确定它的浏览器支持能走多远，虽然很棒，但它仍然是一个“hack”，需要一个 Service Worker 才能运行。

一个更简单的方法是使用正在定义的File System API，它目前仅在 Chrome 中可用。代码示例见this Q/A。

【讨论】：

你能解释一下StreamSaver中使用的机制吗？我尝试查看它，但它非常复杂，而且我的 JS 不是那么好！另外，它支持哪些输入数据类型？我需要一些可以写入单个字节的东西（如前所述，如果我有字节，我已经有办法获得位级访问）。至于 blob.stream()，什么控制缓冲区大小（我假设缓冲区是一个 ArrayBuffer 以便我可以运行 DataView）？不过，最重要的是，是否可以多次读取输入流？在我有一个频率表之前我无法对其进行编码，这需要阅读它......
How StreamSaver works。对于它接受的输入类型，它的标准输入是WritableStreamDefaultWriter，因此它以相同的方式接受任何类型和任何长度的二进制数据的块。（虽然单独写入每个字节会比写入大块慢，所以您可能更喜欢缓冲一点（例如几 KB）并写入这些更大的缓冲区。
对于Blob.stream()，块大小不是由规范yet定义的，实际上Chrome现在显然有它的变量，而第一个块仍然是64KB，这似乎也是FF中的默认值.因此，如果它对您很重要，您可能必须检查接收缓冲区的大小。而且您不能多次读取相同的输入流，但是如果需要，您可以将接收到的块存储在更大的 ArrayBuffer 中，并且您还可以要求来自同一个文件的新流，或者只需要其中的一部分（使用它的.slice() 方法）
这是基于@kaiido 上面的回答的 Blob.stream 的 polyfill，它似乎对我有用 gist.github.com/dumbmatter/3b1d90bf7cfb6c5951bfffeefecf40aa - 这假设您已经加载了 ReadableStream polyfill，如果需要的话

【解决方案2】：

现在 JavaScript 中的现代浏览器已经支持流 API

Mozilla Streams MDN with samples

// setup your stream with the options, it will help handle the size limitations etc.
var readableStream = new ReadableStream(underlyingSource[, queuingStrategy]);

fetch("https://www.example.org/").then((response) => {
  const reader = response.body.getReader();
  const stream = new ReadableStream({
    start(controller) {
      // The following function handles each data chunk
      function push() {
        // "done" is a Boolean and value a "Uint8Array"
        reader.read().then(({ done, value }) => {
          // Is there no more data to read?
          if (done) {
            // Tell the browser that we have finished sending data
            controller.close();
            return;
          }

          // Get the data and send it to the browser via the controller
          controller.enqueue(value);
          push();
        });
      };
      
      push();
    }
  });

  return new Response(stream, { headers: { "Content-Type": "text/html" } });
});

【讨论】：