【发布时间】:2016-08-31 22:53:45
【问题描述】:
我尝试在 C# 中阅读 .docx 和 .txt ABC.docx 的内容是:
测试1
测试2
我的代码实际上读取了 ABC.docx 但一个问题是当数据存储在 sql server 中时输出是这样的:
下面是我的代码:
void WalkDirectoryTree(System.IO.DirectoryInfo root)
{
//System.IO.FileInfo[] files = null;
System.IO.DirectoryInfo[] subDirs = null;
//need to add-in more extension file such as .doc, .ppt, .xlsx
//files = root.GetFiles("*.txt");
var files = root.GetFiles().Where(a => a.Extension.Contains(".docx") || a.Extension.Contains(".txt"));
// files = new string[] { "*.txt", "*.docx" }
//.SelectMany(i => root.GetFiles(i, SearchOption.AllDirectories))
//.ToArray();
//if file is not null, read filename & file extension
if (files != null)
{
foreach (System.IO.FileInfo fi in files)
{
StringBuilder text = new StringBuilder();
Microsoft.Office.Interop.Word.Application word = new Microsoft.Office.Interop.Word.Application();
object miss = System.Reflection.Missing.Value;
//object path = @"I:\def.docx";
object path = fi.FullName;
object readOnly = true;
Microsoft.Office.Interop.Word.Document docs = word.Documents.Open(ref path, ref miss, ref readOnly, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss);
for (int i = 0; i < docs.Paragraphs.Count; i++)
{
text.Append(" \r\n " + docs.Paragraphs[i + 1].Range.Text.ToString());
}
//Get the full patch of the file extension
string[] lines = System.IO.File.ReadAllLines(fi.FullName);
//TextReader reader = new FilterReader(fi.FullName);
//StreamReader m = new StreamReader(fi.FullName);
foreach (string line in lines)
{
String[] substrings = fi.FullName.Split('\\');
string strFileName = string.Empty;
string strFileExtension = string.Empty;
if (substrings.Length > 0)
{
strFileName = substrings[ substrings.Length -1 ];
if( !string.IsNullOrEmpty(strFileName) )
{
string[] extensionSplit = strFileName.Split('.');
if (extensionSplit.Length > 0)
{
strFileExtension = extensionSplit[extensionSplit.Length - 1];
}
}
}
else
{
strFileName = fi.FullName;
}
InsertData(strFileName, line.Replace("'",""), fi.FullName,strFileExtension);
}
}
//After searched from root, continue search from subDirectories
subDirs = root.GetDirectories();
#region Exclude all the hidden files from drives
foreach (System.IO.DirectoryInfo dirInfo in subDirs)
{
if ((dirInfo.Attributes & FileAttributes.Hidden) == 0)
{
WalkDirectoryTree(dirInfo);
}
}
#endregion
}
}
请指教如何在 sql server 中存储。 谢谢。
【问题讨论】:
-
Content的类型是什么?您正在为每一行创建一行。它看起来不正确。您可能希望将文件作为二进制数组读取并为每个文件创建一行。 -
.dcox 文件的内容是:Test1 Test2
-
您希望它显示什么?
docx文件是二进制文件,而不是文本文件(实际上它们是 XML 文件的压缩集合)。 -
您正试图将 Word 文档视为文本文件。在记事本中打开一个,你会发现它不像一个文本文件。
-
@coder 我在问表格列的类型
Content。
标签: c#