【发布时间】:2017-09-28 00:03:15
【问题描述】:
给定一个带有包含记录数和记录数的标头的二进制格式:
{ type : Int8, timestamp : UInt32, user_id : UInt64 }
0000 0004 0153 0927 d139 6747 c045 d991
2100 53d1 6287 4fd2 69fd 8e5f 0475 0153
f323 a72b 4984 a40b 8d54 db00 53a0 78d4
1db8 b1a6 4129 1651
我来自 Ruby,我有以下可行的解决方案,但我认为当它是结构化数据时,可能有一种更优雅或“水晶”的方式来读取字节?
class User
USER_TYPES = {
0 => :admin,
1 => :user,
}
property user_type : Symbol
property timestamp : UInt32
property user_id : UInt64
def initialize(user_type : Int8, @timestamp : UInt32, @user_id : UInt64)
@user_type = USER_TYPES[user_type]
end
end
class Parser
property users : Array(User)
def initialize
@users = [] of User
end
def parse(file_path : String)
File.open(file_path) do |file|
offset = 0
count : UInt32 = seek_and_unpack(file, offset, UInt32)
offset += 4
(0..count).each do |i|
user_type = seek_and_unpack(file, offset, Int8)
timestamp = seek_and_unpack(file, offset + 1, UInt32)
user_id = seek_and_unpack(file, offset + 5, UInt64)
user = User.new(user_type, timestamp, user_id)
@users << user
offset += 13
end
@users
end
end
private def seek_and_unpack(file : File, offset : Int32, read_type)
file.seek(offset)
file.read_bytes(read_type, IO::ByteFormat::BigEndian)
end
end
puts Parser.new.parse("my_file.dat")
# [#<User:0x102805fe0 @user_type=:user, @timestamp=1393108945, @user_id=4136353673894269217>,
# #<User:0x102805fc0 @user_type=:admin, @timestamp=1406231175, @user_id=5751776211841778805>,
# #<User:0x102805fa0 @user_type=:user, @timestamp=1408443303, @user_id=3119170057034093787>,
# #<User:0x102805f80 @user_type=:admin, @timestamp=1403025620, @user_id=2141656950430570065>]
【问题讨论】:
-
对我来说看起来不错。
#seek调用有时是多余的(只需使用IO#skip),但看起来其余的都很好。 -
@RX14 好的,谢谢!不知道
IO#skip
标签: crystal-lang