【发布时间】:2011-12-19 16:47:39
【问题描述】:
我遇到了问题。我有一个用不同类型的问题(列表、复选框、下拉列表...)编写的调查生成器,还有一个要填写表格的调查生成器。现在的问题是要存储表,我的数据库上有 3 个不同的表,稍后创建表有点复杂。我正在尝试使用 JavaScript 编辑器来执行此操作,以便用户可以像在 word 中一样创建表格。在我后面的代码中,我将表标记作为字符串接收,我想将其作为 JSON 存储在数据库中。
我在这里有这段代码来将表转换为 DataSet 对象,然后我可以使用 JSON.NET 库从这里转换为 XML 并从这里转换为 JSON。所有这些都工作正常,但问题是当表有“colspan”和“rowspan”参数时,不能使用它们。您能帮我完成这段代码,以便每个人都可以以此为例,在 C# 中将 HTML 表解析为 JSON。
protected void Page_Load(object sender, EventArgs e)
{
string table = @"<table>
<tbody><tr>
<th rowspan='4'>Project</th><th rowspan='4'>Country</th><th colspan='3' rowspan='1'>Header 1</th><th colspan='2' rowspan='1'>Header 2</th>
</tr><tr>
<th colspan='1' rowspan='1'>Child Header 1</th><th colspan='2' rowspan='1'>Child Header 2</th><th colspan='1' rowspan='3'>Child Header 3</th><th colspan='1' rowspan='3'>Child Header 4</th>
</tr><tr>
<th colspan='1' rowspan='2'>Child Child Header 1</th><th colspan='1' rowspan='1'>tee</th><th colspan='1' rowspan='2'>ssss</th>
</tr><tr>
<th colspan='1' rowspan='1'>aas</th>
</tr><tr>
<td>EUS</td><td>ES</td><td> </td><td> </td><td> </td><td> </td><td> </td>
</tr><tr>
<td>ARP</td><td>IE</td><td> </td><td> </td><td> </td><td> </td><td> </td>
</tr><tr>
<td>ARM</td><td>UK</td><td> </td><td> </td><td> </td><td> </td><td> </td>
</tr><tr>
<td>SMRT</td><td>US</td><td> </td><td> </td><td> </td><td> </td><td> </td>
</tr><tr>
<td>CM</td><td></td><td> </td><td> </td><td> </td><td> </td><td> </td>
</tr>
</tbody></table>";
DataSet dataSet = HtmlTableParser.ParseDataSet(table);
StringWriter sw = new StringWriter();
dataSet.WriteXml(sw, XmlWriteMode.IgnoreSchema);
XmlDocument xd = new XmlDocument();
xd.LoadXml(sw.ToString());
string jsonText = JsonConvert.SerializeXmlNode(xd).Replace("_x0020_", " ");
}
/// <summary>
/// HtmlTableParser parses the contents of an html string into a System.Data DataSet or DataTable.
/// </summary>
public class HtmlTableParser
{
private const RegexOptions ExpressionOptions = RegexOptions.Singleline | RegexOptions.Multiline | RegexOptions.IgnoreCase;
private const string CommentPattern = "<!--(.*?)-->";
private const string TablePattern = "<table[^>]*>(.*?)</table>";
private const string HeaderPattern = "<th[^>]*>(.*?)</th>";
private const string RowPattern = "<tr[^>]*>(.*?)</tr>";
private const string CellPattern = "<td[^>]*>(.*?)</td>";
/// <summary>
/// Given an HTML string containing n table tables, parse them into a DataSet containing n DataTables.
/// </summary>
/// <param name="html">An HTML string containing n HTML tables</param>
/// <returns>A DataSet containing a DataTable for each HTML table in the input HTML</returns>
public static DataSet ParseDataSet(string html)
{
DataSet dataSet = new DataSet();
MatchCollection tableMatches = Regex.Matches(
WithoutComments(html),
TablePattern,
ExpressionOptions);
foreach (Match tableMatch in tableMatches)
dataSet.Tables.Add(ParseTable(tableMatch.Value));
return dataSet;
}
/// <summary>
/// Given an HTML string containing a single table, parse that table to form a DataTable.
/// </summary>
/// <param name="tableHtml">An HTML string containing a single HTML table</param>
/// <returns>A DataTable which matches the input HTML table</returns>
public static DataTable ParseTable(string tableHtml)
{
string tableHtmlWithoutComments = WithoutComments(tableHtml);
DataTable dataTable = new DataTable();
MatchCollection rowMatches = Regex.Matches(
tableHtmlWithoutComments,
RowPattern,
ExpressionOptions);
dataTable.Columns.AddRange(tableHtmlWithoutComments.Contains("<th")
? ParseColumns(tableHtml)
: GenerateColumns(rowMatches));
ParseRows(rowMatches, dataTable);
return dataTable;
}
/// <summary>
/// Strip comments from an HTML stirng
/// </summary>
/// <param name="html">An HTML string potentially containing comments</param>
/// <returns>The input HTML string with comments removed</returns>
private static string WithoutComments(string html)
{
return Regex.Replace(html, CommentPattern, string.Empty, ExpressionOptions);
}
/// <summary>
/// Add a row to the input DataTable for each row match in the input MatchCollection
/// </summary>
/// <param name="rowMatches">A collection of all the rows to add to the DataTable</param>
/// <param name="dataTable">The DataTable to which we add rows</param>
private static void ParseRows(MatchCollection rowMatches, DataTable dataTable)
{
foreach (Match rowMatch in rowMatches)
{
// if the row contains header tags don't use it - it is a header not a row
if (!rowMatch.Value.Contains("<th"))
{
DataRow dataRow = dataTable.NewRow();
MatchCollection cellMatches = Regex.Matches(
rowMatch.Value,
CellPattern,
ExpressionOptions);
for (int columnIndex = 0; columnIndex < cellMatches.Count; columnIndex++)
dataRow[columnIndex] = cellMatches[columnIndex].Groups[1].ToString();
dataTable.Rows.Add(dataRow);
}
}
}
/// <summary>
/// Given a string containing an HTML table, parse the header cells to create a set of DataColumns
/// which define the columns in a DataTable.
/// </summary>
/// <param name="tableHtml">An HTML string containing a single HTML table</param>
/// <returns>A set of DataColumns based on the HTML table header cells</returns>
private static DataColumn[] ParseColumns(string tableHtml)
{
MatchCollection headerMatches = Regex.Matches(
tableHtml,
HeaderPattern,
ExpressionOptions);
return (from Match headerMatch in headerMatches
select new DataColumn(headerMatch.Groups[1].ToString())).ToArray();
}
/// <summary>
/// For tables which do not specify header cells we must generate DataColumns based on the number
/// of cells in a row (we assume all rows have the same number of cells).
/// </summary>
/// <param name="rowMatches">A collection of all the rows in the HTML table we wish to generate columns for</param>
/// <returns>A set of DataColumns based on the number of celss in the first row of the input HTML table</returns>
private static DataColumn[] GenerateColumns(MatchCollection rowMatches)
{
int columnCount = Regex.Matches(
rowMatches[0].ToString(),
CellPattern,
ExpressionOptions).Count;
return (from index in Enumerable.Range(0, columnCount)
select new DataColumn("Column " + Convert.ToString(index))).ToArray();
}
}
【问题讨论】:
-
我从kw-share2learn.blogspot.com/2011/01/…获得了将DataSet解析为XML的代码
-
在数据库中存储 html 标记有什么问题?
-
不像我想要的那样干净。以后可以用JSON作为对象,用起来更方便。
-
数据存入数据库后如何处理?
-
您从哪里接收表格标记?如果您在客户端创建表,然后将标记传递给代码隐藏 - 这可能不是最简单的方法。在客户端上创建 JSON 对象并将其(而不是表字符串)传递给代码隐藏可能更容易。
标签: c# asp.net json xml html-table