【发布时间】:2020-12-01 16:54:19
【问题描述】:
我正在开发一个对文件进行字节级处理的网络应用程序(纯 HTML/Javascript,没有库)(霍夫曼编码演示)。它工作得很好(你不想知道到达那里需要多长时间),但是我的完成感让我有点困扰,因为我必须将文件加载到 ArrayBuffer 而不是从 HDD 流式传输。还有一个文件大小限制,尽管压缩一个 4GB 文件(我的数据结构支持的最大值)确实需要很长时间。
仍然,为了让这个应用程序在低资源设备上运行,我如何从文件input 框中流式传输文件(我需要多次通过频率计数、文件大小检测和实际写入)和到某种浏览器下载(谢天谢地,至少一次性完成)?
以下是现在处理它的相关函数(我为全局变量道歉:P):
//Load the file
function startProcessingFile(){ //Loads the file and sets up a callback to start the main process when done.
var ff=document.getElementById("file");//I am assuming that you don't need to see the HTML here. :D
if (ff.files.length === 0) {
displayError("No file selected");
}
else{
displayStatus("Loading File...");
var fr = new FileReader;
fr.onload=function () {inp = new DataView(fr.result); boot();}
fr.onerror=function () {displayError(fr.error)};
fr.readAsArrayBuffer(ff.files[0]);
}
}
//A bit later on -- one of the functions that reads the data from the input file
function countTypes(c){ //counts the frequencies. c is # bytes processed.
if (die){
die=false;
return;
}
var i=Math.ceil(inputSize/100.0);
while (c<inputSize && i>0){
var d=inp.getUint8(c);
frequencies[d]=frequencies[d]+1;
i--;
c++;//Accidental, but funny.
}
var perc=100.0*c/inputSize;
updateProgress(perc);
if (c<inputSize){
setTimeout(function () {countTypes(c);}, 0);
}
else{
updateProgress(100);
system_state++;
taskHandle();
}
}
//Here's where the file is read the last time and also where the bits come from that I want to save. If I could stream the data directly I could probably even get rid of the dry-run stage I currently need to count how many bytes to allocate for the output ArrayBuffer. I mean, Google Drive can download files without telling the browser the size, just whether it's done yet or not, so I'd assume that's a feature I could access here too. I'm just not sure how you actually gain access to a download from JS in the first place.
function encode(c,d){ //performs the Huffman encoding.
//If d is true, does not actually write. c is # of bits processed so far.
if (die){
die=false;
return;
}
var i=Math.ceil(inputSize/250.0);
while (c<inputSize && i>0){
var b=inp.getUint8(c);
var seq;
for (var j=0; j<table.length; j++){
if (table[j].value===b){
seq=table[j].code
}
}
for (var j=0; j<seq.length; j++){
writeBit(seq[j],d);
}
i--;
c++;//Accidental, but funny.
}
var perc=100.0*c/inputSize;
updateProgress(perc);
if (c<inputSize){
setTimeout(function () {encode(c,d);}, 0);
}
else{
updateProgress(100);
system_state++;
taskHandle();
}
}
//Finally, bit-level access for unaligned read/write so I can actually take advantage of the variable word size of the Huffman encoding (the read is used for decoding).
function readBit(){ //reads one bit (b) from the ArrayBuffer/DataView. The offset of 4 is for the filesize int.
var data_byte=inp.getUint8(byte_index+4);
var res=data_byte>>>bit_index;
bit_index+=1;
if (bit_index>7){
bit_index=0;
byte_index++;
}
return (res&1);
}
function writeBit(b,d){ //writes one bit (b) to the output Arraybuffer/Dataview. If d is true, does not actually write.
if (d===false){ //i.e. not dry-run mode
var bitmask=0xff;
var flag=1<<bit_index;
bitmask=bitmask^flag;
current_byte=current_byte&bitmask;
current_byte=current_byte|(b<<bit_index);
output.setUint8(byte_index+4, current_byte);
}
bit_index+=1;
if (bit_index>7){
bit_index=0;
byte_index++;
}
}
function readByte(){ //reads a byte using readBit. Unaligned.
var b=0;
for (var i=0; i<8; i++){
var t=readBit();
b=b|(t<<i);
}
return b;
}
function writeByte(b,d){ //writes a byte using writeByte. Unaligned.
for (var i=0; i<8; i++){
var res=b>>>i;
writeBit((res&1),d);
}
}
//And finally the download mechanism I'm using.
function downloadResult(){//download processed file with specified extension
var blobObject = new Blob([output], {type: 'application/octet-stream'});
var n=source_name.split('\\').pop().split('/').pop();
if (doEncode){
n=n+fext
}else{
n=n.replace(fext,"");
}
var a = document.createElement("a");
a.setAttribute("href", URL.createObjectURL(blobObject));
a.setAttribute("download", n);
a.click();
delete a;
running=false;
var b=document.getElementById("ac");
if (b.classList.contains("activeNav")){
clearRes();
}
}
我基本上想撕掉大部分内容,并用可以从用户选择的文件中读取字节或中等数据块的东西替换它,然后当它到达实际输出阶段时,涓涓细流通过或多或少的香草下载到他们的下载文件夹,逐字节地获取数据。
我确实知道可以在文件输入框中选择多个文件,所以如果可以下载到子文件夹,我可以研究如何为见鬼。那岂不是很有趣! ...请注意,我很确定这是不可能的(我不明白为什么您不能在网页的浏览器下载文件夹中创建子目录,但可能是出于安全原因)。
如果您需要查看更多代码,请告诉我,但由于这是一个课堂项目,我不想被指责抄袭我自己的应用程序...
【问题讨论】:
标签: javascript html