这段代码:
enum Where { None, First, Second, Both } // somewhere in your source file
//...
var val1 = "Have a good calm day calm calm calm";
var val2 = "Have a very good day, Joe Joe Joe Joe";
var words1 = from m in Regex.Matches(val1, "(\\w+)|(\\S+\\s+\\S+)").Cast<Match>()
where m.Success
select m.Value.ToLower();
var words2 = from m in Regex.Matches(val2, "(\\w+)|(\\S+\\s+\\S+)").Cast<Match>()
where m.Success
select m.Value.ToLower();
var dic = new Dictionary<string, Where>();
foreach (var s in words1)
{
dic[s] = Where.First;
}
foreach (var s in words2)
{
Where b;
if (!dic.TryGetValue(s, out b)) b = Where.None;
switch (b)
{
case Where.None:
dic[s] = Where.Second;
break;
case Where.First:
dic[s] = Where.Both;
break;
}
}
foreach (var kv in dic.Where(x => x.Value != Where.Both))
{
Console.WriteLine(kv.Key);
}
给我们 'calm'、'very'、', Joe' 和 'Joe' 这两个字符串的区别;第一个中的“平静”,下一个中的“非常”、“乔”和“乔”。它还会删除重复的案例。
并获得两个单独的列表,告诉我们哪个单词来自哪个文本:
var list1 = dic.Where(x => x.Value == Where.First).ToList();
var list2 = dic.Where(x => x.Value == Where.Second).ToList();
foreach (var kv in list1)
{
Console.WriteLine("{0}: {1}", kv.Key, kv.Value);
}
foreach (var kv in list2)
{
Console.WriteLine("{0}: {1}", kv.Key, kv.Value);
}