tl;dr:将函数内存增加到至少 1024MB,请参阅更新 2
我很好奇,所以我做了一些测量。我创建了一个脚本,用于在新表中创建一个大小几乎正好为 400KB 的大 boi 项。
然后我测试来自 Python 的两次读取 - 一次使用资源 API,另一次使用较低级别的客户端 - 在两种情况下最终一致的读取。
这是我测量的:
Reading Big Boi from a Table Resource took 0.366508s and consumed 50.0 RCUs
Reading Big Boi from a Client took 0.301585s and consumed 50.0 RCUs
如果我们从 RCU 推断,它读取的项目大小约为 50 * 2 * 4KB = 400 KB(最终一致读取消耗 0.5 个 RCU)。
我在德国本地针对eu-central-1(德国法兰克福)运行了几次,我看到的最高延迟约为 900 毫秒。 (这没有 DAX。)
因此,我认为您应该向我们展示您是如何进行测量的。
import uuid
from datetime import datetime, timedelta
import boto3
import boto3.dynamodb.conditions as conditions
TABLE_NAME = "big-boi-test"
BIG_BOI_PK = "f0ba8d6c"
TABLE_RESOURCE = boto3.resource("dynamodb").Table(TABLE_NAME)
DDB_CLIENT = boto3.client("dynamodb")
def create_table():
DDB_CLIENT.create_table(
AttributeDefinitions=[{"AttributeName": "PK", "AttributeType": "S"}],
TableName=TABLE_NAME,
KeySchema=[{"AttributeName": "PK", "KeyType": "HASH"}],
BillingMode="PAY_PER_REQUEST"
)
def create_big_boi_item() -> str:
# based on calculations here: https://zaccharles.github.io/dynamodb-calculator/
template = {
"PK": {
"S": BIG_BOI_PK
},
"bigBoi": {
"S": ""
}
} # This is 16 bytes
big_boi = "X" * (1024 * 400 - 16)
template["bigBoi"]["S"] = big_boi
return template
def store_big_boi():
big_bio = create_big_boi_item()
DDB_CLIENT.put_item(
Item=big_bio,
TableName=TABLE_NAME
)
def get_big_boi_with_table_resource():
start = datetime.now()
response = TABLE_RESOURCE.get_item(
Key={"PK": BIG_BOI_PK},
ReturnConsumedCapacity="TOTAL"
)
end = datetime.now()
seconds = (end - start).total_seconds()
capacity_units = response["ConsumedCapacity"]["CapacityUnits"]
print(f"Reading Big Boi from a Table Resource took {seconds}s and consumed {capacity_units} RCUs")
def get_big_boi_with_client():
start = datetime.now()
response = DDB_CLIENT.get_item(
Key={"PK": {"S": BIG_BOI_PK}},
ReturnConsumedCapacity="TOTAL",
TableName=TABLE_NAME
)
end = datetime.now()
seconds = (end - start).total_seconds()
capacity_units = response["ConsumedCapacity"]["CapacityUnits"]
print(f"Reading Big Boi from a Client took {seconds}s and consumed {capacity_units} RCUs")
if __name__ == "__main__":
# create_table()
# store_big_boi()
get_big_boi_with_table_resource()
get_big_boi_with_client()
更新
我再次对一个看起来更像您正在使用的项目进行了相同的测量,无论我以哪种方式请求它们,我的平均时间仍然低于 1000 毫秒:
Reading Big Boi from a Table Resource took 1.492829s and consumed 50.0 RCUs
Reading Big Boi from a Table Resource took 0.871583s and consumed 50.0 RCUs
Reading Big Boi from a Table Resource took 0.857513s and consumed 50.0 RCUs
Reading Big Boi from a Table Resource took 0.769432s and consumed 50.0 RCUs
Reading Big Boi from a Table Resource took 0.690172s and consumed 50.0 RCUs
Reading Big Boi from a Table Resource took 0.670099s and consumed 50.0 RCUs
Reading Big Boi from a Table Resource took 0.633489s and consumed 50.0 RCUs
Reading Big Boi from a Table Resource took 0.605999s and consumed 50.0 RCUs
Reading Big Boi from a Table Resource took 0.598635s and consumed 50.0 RCUs
Reading Big Boi from a Table Resource took 0.606553s and consumed 50.0 RCUs
Reading Big Boi from a Client took 1.66636s and consumed 50.0 RCUs
Reading Big Boi from a Client took 0.921605s and consumed 50.0 RCUs
Reading Big Boi from a Client took 0.831735s and consumed 50.0 RCUs
Reading Big Boi from a Client took 0.707082s and consumed 50.0 RCUs
Reading Big Boi from a Client took 0.668602s and consumed 50.0 RCUs
Reading Big Boi from a Client took 0.648401s and consumed 50.0 RCUs
Reading Big Boi from a Client took 0.5695s and consumed 50.0 RCUs
Reading Big Boi from a Client took 0.592073s and consumed 50.0 RCUs
Reading Big Boi from a Client took 0.611436s and consumed 50.0 RCUs
Reading Big Boi from a Client took 0.553827s and consumed 50.0 RCUs
Average latency over 10 requests with the table resource: 0.7796304s
Average latency over 10 requests with the client: 0.7770621s
这是物品的样子:
这是供您验证的完整测试脚本:
import statistics
import uuid
from datetime import datetime, timedelta
import boto3
import boto3.dynamodb.conditions as conditions
TABLE_NAME = "big-boi-test"
BIG_BOI_PK = "NestedBoi"
TABLE_RESOURCE = boto3.resource("dynamodb").Table(TABLE_NAME)
DDB_CLIENT = boto3.client("dynamodb")
def create_table():
DDB_CLIENT.create_table(
AttributeDefinitions=[{"AttributeName": "PK", "AttributeType": "S"}],
TableName=TABLE_NAME,
KeySchema=[{"AttributeName": "PK", "KeyType": "HASH"}],
BillingMode="PAY_PER_REQUEST"
)
def create_big_boi_item() -> str:
# based on calculations here: https://zaccharles.github.io/dynamodb-calculator/
template = {
"PK": {
"S": "NestedBoi"
},
"bigBoiContainer": {
"M": {
"bigBoiList": {
"L": [
]
}
}
}
} # 43 bytes
item = {
"M": {
"t": {
"S": "1614712316"
},
"a": {
"S": "product_view"
},
"i": {
"S": "1275"
}
}
} # 36 bytes
number_of_items = int((1024 * 400 - 43) / 36)
for _ in range(number_of_items):
template["bigBoiContainer"]["M"]["bigBoiList"]["L"].append(item)
return template
def store_big_boi():
big_bio = create_big_boi_item()
DDB_CLIENT.put_item(
Item=big_bio,
TableName=TABLE_NAME
)
def get_big_boi_with_table_resource():
start = datetime.now()
response = TABLE_RESOURCE.get_item(
Key={"PK": BIG_BOI_PK},
ReturnConsumedCapacity="TOTAL"
)
end = datetime.now()
seconds = (end - start).total_seconds()
capacity_units = response["ConsumedCapacity"]["CapacityUnits"]
print(f"Reading Big Boi from a Table Resource took {seconds}s and consumed {capacity_units} RCUs")
return seconds
def get_big_boi_with_client():
start = datetime.now()
response = DDB_CLIENT.get_item(
Key={"PK": {"S": BIG_BOI_PK}},
ReturnConsumedCapacity="TOTAL",
TableName=TABLE_NAME
)
end = datetime.now()
seconds = (end - start).total_seconds()
capacity_units = response["ConsumedCapacity"]["CapacityUnits"]
print(f"Reading Big Boi from a Client took {seconds}s and consumed {capacity_units} RCUs")
return seconds
if __name__ == "__main__":
# create_table()
# store_big_boi()
n_experiments = 10
experiments_with_table_resource = [get_big_boi_with_table_resource() for i in range(n_experiments)]
experiments_with_client = [get_big_boi_with_client() for i in range(n_experiments)]
print(f"Average latency over {n_experiments} requests with the table resource: {statistics.mean(experiments_with_table_resource)}s")
print(f"Average latency over {n_experiments} requests with the client: {statistics.mean(experiments_with_client)}s")
如果我增加 n_experiments,它往往会变得更快,可能是因为 DDB 在内部缓存。
仍然:无法重现。
更新 2
在得知您正在运行 Lambda 函数后,我在 Lambda 内部使用不同的内存配置再次运行了测试。
| Memory |
n_experiments |
average time with resource |
average time with client |
| 128MB |
10 |
6.28s |
5.06s |
| 256MB |
10 |
3.26s |
2.61s |
| 512MB |
10 |
1.62s |
1.33s |
| 1024MB |
10 |
0.84s |
0.68s |
| 2048MB |
10 |
0.52s |
0.43s |
| 4096MB |
10 |
0.51s |
0.41s |
如 cmets 中所述,CPU 和网络性能随分配给函数的内存量而变化。
你可以通过砸钱来解决你的问题:-)