【发布时间】:2014-09-08 11:24:27
【问题描述】:
我正在开发一个将 pdf 文档转换为 excel 文件的程序。大约有 1000 个 pdf 文件需要转换。但是我在处理第 234 个文件时遇到了这个错误。这是重要的代码。
private void getFullFileContent()
{
Excel.Application objExcel = new Excel.Application();
objExcel.Visible = true;
Excel.Workbook objBook = objExcel.Workbooks.Add(System.Reflection.Missing.Value);
Excel.Worksheet objSheet;
Excel.Range objRange=null;
DateTime endDate, startDate=System.DateTime.Now;
string[] pdfFiles = Directory.GetFiles(folderBrowserDialog1.SelectedPath, "*.pdf");
for (int i = 1; i <= pdfFiles.Length; i++)
{
objSheet = (Excel.Worksheet)objBook.Worksheets.get_Item(1);
string bColumn = "B" + i,aColumn="A"+i;
objRange = objSheet.get_Range(aColumn, System.Reflection.Missing.Value);
objRange.set_Value(System.Reflection.Missing.Value, pdfFiles[i-1].Substring(pdfFiles[i-1].LastIndexOf('\\') + 1));
objRange = objSheet.get_Range(bColumn, System.Reflection.Missing.Value);
objRange.set_Value(System.Reflection.Missing.Value, ConvertPdfToText(pdfFiles[i - 1]));
label4.Text = pdfFiles[i].Substring(pdfFiles[i].LastIndexOf('\\')+1);
label6.Text = "Remaining File: "+(pdfFiles.Length - i).ToString();
endDate = System.DateTime.Now;
label5.Text = "Total time: " + (endDate - startDate).Hours.ToString() + ":" + (endDate - startDate).Minutes.ToString() + ":" + (endDate - startDate).Seconds.ToString();
GC.Collect();
}
try
{
objBook.SaveAs("Training Data.xlsx");
MessageBox.Show("Your PDF files converted and printed into \"Training Data.xlsx\"");
}
catch (Exception)
{
throw;
}
}
private string ConvertPdfToText(string path)
{
StringBuilder text = new StringBuilder();
string fileName = path;
string strFileContent = "";
if (File.Exists(fileName))
{
PdfReader pdfReader = new PdfReader(fileName);
for (int page = 1; page <= pdfReader.NumberOfPages; page++)
{
ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
text.Append(currentText);
strFileContent += currentText;
}
pdfReader.Close();
}
return strFileContent;
}
【问题讨论】:
-
那你试过200个批量跑吗?
标签: c# excel pdf out-of-memory