【问题标题】:Create array of strings from a string that contains certain (dual) characters从包含某些(双)字符的字符串创建字符串数组
【发布时间】:2021-06-30 20:14:46
【问题描述】:

我有一串单词和数学/编程使用的符号。例如,像这样:

let source = "a + b + 3 == c"

(注意:不能依赖空格)

我还有一个字符串数组,需要从源字符串中过滤掉:

let symbols = ["+", "-", "==", "!="]

现在,我需要创建一个匹配项的数组有重复项)。在本例中为["+", "+", "=="]

根据我的尝试,== 是两个字符,所以我不能执行以下操作:

let source = "a + b + 3 == c"
let symbols = CharacterSet(charactersIn: "+-=≠") // not '==' and '!=', but '=' and '≠' due to it being a CharacterSet

let operations = source
    .map { String($0) }
    .filter { char in symbols.contains(UnicodeScalar(char)!) }

print(operations)
// Output: ["+", "+", "=", "="]
// Needed: ["+", "+", "=="]

非常感谢任何帮助

【问题讨论】:

  • ==真的用在数学里吗,我以为只用在编程语言里?
  • 我应该添加那个细节,谢谢

标签: arrays swift string


【解决方案1】:

这个问题是一个解析问题,就是将输入的字符串转换成某种结构化的数据。一个很好的方法是使用 解析器组合器。虽然胆子很复杂,但是一旦设置好,示例“opStrings”函数就很容易理解,也很容易扩展。

struct Parser<A> {
    let run: (inout Substring) -> A?

    static func always<A>(_ a: A) -> Parser<A> {
        .init { _ in a }
    }

    static var never: Parser {
        .init { _ in nil }
    }

    func map<B>(_ f: @escaping (A) -> B) -> Parser<B> {
        Parser<B> { str -> B? in
            self.run(&str).map(f)
        }
    }
    
    func flatMap<B>(_ f: @escaping (A) -> Parser<B>) -> Parser<B> {
        Parser<B> { str -> B? in
            let original = str
            let matchA = self.run(&str)
            let parserB = matchA.map(f)
            guard let matchB = parserB?.run(&str) else {
                str = original
                return nil
            }
            return matchB
        }
    }

    func run(_ str: String) -> (match: A?, rest: Substring) {
        var str = str[...]
        let match = self.run(&str)
        return (match, str)
    }
}

func prefix(while p: @escaping (Character) -> Bool) -> Parser<Substring> {
    Parser<Substring> { str in
        let prefix = str.prefix(while: p)
        str.removeFirst(prefix.count)
        return prefix
    }
}

func literalString(_ p: String) -> Parser<String?> {
    Parser<String?> { str in
        guard str.hasPrefix(p) else { return nil }
        str.removeFirst(p.count)
        return p
    }
}

func literal(_ str: String) -> Parser<Void> {
    literalString(str).map {
        _ in ()
    }
}

func oneOrMore<A>(_ predicate: @escaping (Character) -> Bool,
                 mapped f: @escaping (Substring) -> A) -> Parser<A> {
    prefix(while: predicate)
        .flatMap { $0.isEmpty ? .never : .always(f($0)) }
}

let oneOrMoreDecimals = oneOrMore((\.isNumber), mapped: Double.init)
    .flatMap { d -> Parser<Double> in
        guard let d = d else { return .never }
        return .always(d)
    }

func oneOf<A>(_ ps: [Parser<A>]) -> Parser<A> {
    Parser<A> { str -> A? in
        for p in ps {
            if let match = p.run(&str) {
                return match
            }
        }
        return nil
    }
}

let whitespace = oneOf([
        literal(" "),
        literal("\t"),
        literal("\n")
    ])

let identifier = prefix(while: { ($0.isLetter && $0.isASCII) || $0.isNumber })
    .flatMap { str -> Parser<String> in
    guard let first = str.first else { return Parser.never }
        return first.isNumber ? .never : .always(String(str))
}

let literalNumber = prefix(while: { $0.isNumber })
    .flatMap { str -> Parser<Double> in
        guard let value = Double(str) else { return Parser.never }
        return .always(value)
    }

func skip<T, U>(_ p: Parser<T>) -> Parser<U?> {
    p.map { _ in nil }
}

struct Example {
    enum Token {
        case minus
        case plus
        case equal
        case equalEqual
        case notEqual
        case identifier(String)
        case number(Double)
    }

    enum Error: Swift.Error {
        case unexpectedInput
    }
    
    let input: String

    func run<T>(_ parsers: [Parser<T?>]) throws -> [T] {
        var source = Substring(input)
        var results: [T] = []
        
        while let token = oneOf(parsers).run(&source) {
            if let token = token {
                results.append(token)
            }
        }
        
        guard source.isEmpty else {
            throw Error.unexpectedInput
        }
        
        return results
    }
    
    func tokenize() throws -> [Token] {
        let tokenizers: [Parser<Token?>] = [
            literal("-").map { .minus },
            literal("+").map { .plus },
            literal("==").map { .equalEqual },
            literal("!=").map { .notEqual },
            literal("=").map { .equal },
            identifier.map { .identifier($0) },
            oneOrMoreDecimals.map { .number($0) },
            skip(whitespace)
        ]
        
        return try run(tokenizers)
    }
    
    func opStrings() throws -> [String] {
        try run([
            literalString("-"),
            literalString("+"),
            literalString("=="),
            literalString("!="),
            literalString("="),
            skip(identifier),
            skip(oneOrMoreDecimals),
            skip(whitespace),
        ] as [Parser<String?>])
    }
}

do {
    let example = Example(input: "a + b + 3 == c")
    // let tokens = try example.tokenize()
    print(try example.opStrings()) // ["+", "+", "=="]
}
catch {
    print(error)
}

【讨论】:

    【解决方案2】:

    您需要将符号作为常规字符串进行威胁。迭代每个符号并在您的源中搜索它。如果您找到一个范围将其附加到集合中,请获取该位置的子字符串,然后从范围 upperBound 之后的索引中再次搜索您的字符串。当您到达搜索该符号的字符串末尾时,请从源中删除子字符串。试试这样的:

    var source = "a + b + 3 == c"
    let symbols = ["+", "-", "==", "!="]
    
    var results: [Substring] = []
    
    for symbol in symbols {
        var startIndex = source.startIndex
        var ranges: [Range<String.Index>] = []
        while startIndex < source.endIndex,
              let range = source[startIndex...].range(of: symbol) {
            ranges.append(range)
            results.append(source[range])
            startIndex = source.index(after: range.upperBound)
        }
        for range in ranges.reversed() {
            source.removeSubrange(range)
        }
    }
    
    print(results)  
    

    这将打印出来

    ["+"、"+"、"=="]

    【讨论】:

      猜你喜欢
      • 2020-05-01
      • 1970-01-01
      • 2012-03-24
      • 2023-01-10
      • 1970-01-01
      • 2016-08-09
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多