【发布时间】:2022-01-19 06:36:19
【问题描述】:
我有一个包含以下数据的文本文件:
#294448
ORDER_STATUS1098988 VALID
24.09.2021 05:17 AM
Customer_ID: 5524335312265537
MMYY: 08/23
Txn_ID: 74627
Name: Krystal Flowers
E-mail: abc@gmail.com
Phone: 9109153030
Address_original: 1656 W Alvarado dr, Pueblo West, Colorado, 81007, United States
ZIP_City_State_Country: -
Type: -
Subtype: -
#294448
ORDER_STATUS1097728 VALID
24.09.2021 05:17 AM
Customer_ID: 5524331591654699
MMYY: 11/23
Txn_ID: 45617
Name: Allen E Prieto
E-mail: xyz@gmail.com
Phone: 5056994899
Address_original: 655 Ives Dairy Rd, Miami, Florida, 33179, United States
ZIP_City_State_Country: -
Type: -
Subtype: -
#294445
ORDER_STATUS537099 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230087730234
MMYY: 09/25
Txn_ID: 24430
Name: tera casey
Phone: 7405863997
Address_original: 13705 Neptune Lane, New Concord, Ohio State, 43762, PE
ZIP_City_State_Country: 43762, New Concord, Ohio State, UNITED STATES
Subtype: N/A
#294445
ORDER_STATUS489401 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230054806983
MMYY: 07/24
Txn_ID: 13183
Name: Nancy Lambert
Address_original: 2600 loop drive, N, N, 44113, PE
ZIP_City_State_Country: 44113, N, N, UNITED STATES
Subtype: N/A
#294445
ORDER_STATUS437355 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230061412668
MMYY: 05/24
Txn_ID: 55474
Name: Sheets Sherry
E-mail: tyd@gmail.com
Phone: (567) 241-5074
Address_original: 37 Martha Avenue, Mansfield, Ohio, 44905, US
ZIP_City_State_Country: 44905, Mansfield, Ohio, UNITED STATES
Subtype: N/A
数据需要以一种方式组织,以便 Customer_ID、MMYY 和 Txn_ID 仅显示在由 | 分隔的单行中象征。应忽略此文本文件中的所有其他内容。
例子:
5524335312265537 | 08/23 | 24430
5524331591654699 | 11/23 | 45617
4118230087730234 | 09/25 | 24430
4118230054806983 | 07/24 | 13183
4118230061412668 | 05/24 | 55474
这是我尝试过的,但我得到“无效文件!”打开文本文件后的消息。 Reference taken from this post
private void openFile_Click(object sender, EventArgs e)
{
OpenFileDialog ofdtmp = new OpenFileDialog();
if (ofdtmp.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
try
{
using (StreamReader sr = File.OpenText(ofdtmp.FileName))
{
while (sr.Peek() >= 0)
{
string line = sr.ReadLine();
line = line.Trim();
if (line.ToString() == "" || line.Contains("#") || line.Contains("ORDER_STATUS") || /*Exclude Date & Time*/ line.Contains(".") || line.Contains("Name:") || line.Contains("E-mail:") || line.Contains("Phone:") || line.Contains("Address_original:") || line.Contains("ZIP_City_State_Country:") || line.Contains("Type:") || line.Contains("Subtype:"))
continue; //skip
if (line.Contains("CustomerID: "))
{
string customID = line.Substring(12, 29).Trim();
continue;
}
if (line.Contains("MMYY: "))
{
string mmyy = line.Substring(6, 11).Trim();
continue;
}
if (line.Contains("Txn_ID: "))
{
string txnID = line.Substring(10, 16).Trim();
continue;
}
}
richTextBox.Text = sr.ToString();
}
}
catch
{
MessageBox.Show("Invalid file!");
}
}
}
我在类似的在线帖子中查找了替代解决方案,看来应用正则表达式是正确的方法。困难在于弄清楚如何跳过文本文件中所有不必要的字符和符号,只提取所需的数据。这个问题的最佳解决方案是什么?
解决方案更新:
using System;
using System.IO;
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
using System.Windows.Forms;
namespace RegExTool
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
}
private void openFile_Click(object sender, EventArgs e)
{
OpenFileDialog ofdtmp = new OpenFileDialog();
if (ofdtmp.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
try
{
using (StreamReader sr = new StreamReader(ofdtmp.FileName))
{
string data = sr.ReadToEnd();
richTextBox1.Clear();
richTextBox2.Clear();
richTextBox1.Text = data;
string pattern = @"(?<=CustomerID:).*|(?<=MMYY:).*|(?<=Txn_ID:).*";
var en = Regex.Matches(data, pattern, RegexOptions.IgnoreCase).GetEnumerator();
while (en.MoveNext())
{
var ci = en.Current;
if (!en.MoveNext())
break;
var di = en.Current;
if (!en.MoveNext())
break;
var ti = en.Current;
string text = ($"{ci}|{di}|{ti}") + System.Environment.NewLine;
richTextBox2.Text += text.Replace(" ", string.Empty);
}
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
}
private void saveFile_Click(object sender, EventArgs e)
{
string tmp = richTextBox2.Text;
SaveFileDialog svdtmp = new SaveFileDialog();
if (svdtmp.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
try
{
File.WriteAllText(svdtmp.FileName, (tmp.ToString()));
MessageBox.Show("File Saved!");
}
catch (Exception ex)
{
MessageBox.Show("Cannot save text to file.");
}
}
}
}
}
最终解决方案:
using System;
using System.Collections.Generic;
using System.IO;
using System.Text.RegularExpressions;
using System.Windows.Forms;
namespace RegExTool
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
}
static List<string> GetStrings(string input)
{
string pattern = @"Customer_ID: (?<CustomerId>\d+)[\r\n]+MMYY\: (?<ExpiryDate>\d{2}\/\d{2})[\r\n]+Txn_ID: (?<TxnId>\d+)";
List<string> strings = new List<string>();
foreach(Match match in Regex.Matches(input, pattern, RegexOptions.Multiline,TimeSpan.FromSeconds(1)))
{
strings.Add($"{match.Groups["CustomerId"]} | {match.Groups["ExpiryDate"]} | {match.Groups["TxnId"]}");
}
return strings;
}
private void openFile_Click(object sender, EventArgs e)
{
OpenFileDialog ofdtmp = new OpenFileDialog();
if (ofdtmp.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
try
{
using (StreamReader sr = new StreamReader(ofdtmp.FileName))
{
string input = sr.ReadToEnd();
richTextBox1.Clear();
richTextBox2.Clear();
richTextBox1.Text = input;
foreach (var value in GetStrings(input))
{
string text = value + System.Environment.NewLine;
richTextBox2.Text += text;
}
}
} catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
}
private void saveFile_Click(object sender, EventArgs e)
{
string tmp = richTextBox2.Text;
SaveFileDialog svdtmp = new SaveFileDialog();
if (svdtmp.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
try
{
File.WriteAllText(svdtmp.FileName, (tmp.ToString()));
MessageBox.Show("File Saved!");
}
catch (Exception ex)
{
MessageBox.Show("Cannot save text to file.");
}
}
}
}
}
【问题讨论】:
-
让我们从实际显示您得到的异常而不是 Invalid File 消息开始。将 catch (Exception ex) { MessageBox.Show(ex.Message) } 添加到您的 catch 子句中...或使用 Debug.WriteLine()
-
@Bola 好的错误提示“索引和长度必须引用字符串中的位置。参数名称:长度”。更新 - 将每个部分中的
line.Substring编辑为line.Substring(12, line.Length - 12).Trim();和line.Substring(5, line.Length - 5).Trim();但这次得到的结果是 richTextBox 中的 text "System.IO.StreamReader",发生了什么错了吗? -
您应该跟踪您的代码,因为您应该确切地看到为什么它没有按预期工作。为了帮助……在……
while (sr.Peek() >= 0)……循环中……代码从文件中读取了几行,你的问题是你怎么知道“什么时候”从文件中读取的最后一行是“END”您要添加到文本框中的行之一?代码在读取 x 行后设置了三个变量……string customID = line.Substring(12, 29).Trim();……但是,代码对这些值没有任何作用。 -
我认为您可能想要类似...
richTextBox1.Text += customID + " | " + mmyy + " | " + txnID + Environment.NewLine;。正如我之前提到的……您需要弄清楚如何告诉“何时”将行添加到文本框中。可能将每个变量设置为空,然后当所有三个变量都不为空时,您就会知道将该行添加到文本框中,然后将所有变量重置为空。粗鲁,但应该可以。 -
@JohnG 我已将
richTextBox.Text = sr.ToString();的行替换为richTextBox.Text += customID + " | " + mmyy + " | " + txnID + Environment.NewLine;但错误提示“当前上下文中不存在名称customID/mmyy/txnID。”
标签: c# .net parsing desktop-application text-parsing