【发布时间】:2018-10-19 00:39:05
【问题描述】:
我有很多来自我的 Apache 服务器的图像,我想将它们放到 azure 中。 我负担不起按顺序进行,所以我会在之后添加线程。我可以从给定的 URL 访问这些图像并在其上构建一个列表。简单的。 现在我没有足够的磁盘空间来下载图像并上传它然后删除它。我想要更干净的东西。
现在有没有办法做到这一点?
类似:
block_blob_service.AZURECOMMAND(container, source_URL, target_blob_name)
如果不可能,是否有解决方法?
这是我今天拥有的完整代码(下载然后上传,我想避免): 编辑:感谢 Gaurav Mantri,我现在明白了。我更新了代码。
import requests
from bs4 import BeautifulSoup
from os.path import basename
import os
import sys
import urllib
import urllib2
import urlparse
import argparse
import json
import config
import random
import base64
import datetime
import time
import string
from azure.storage import CloudStorageAccount, AccessPolicy
from azure.storage.blob import BlockBlobService, PageBlobService, AppendBlobService
from azure.storage.models import CorsRule, Logging, Metrics, RetentionPolicy, ResourceTypes, AccountPermissions
from azure.storage.blob.models import BlobBlock, ContainerPermissions, ContentSettings
#from azure.storage.blob import BlobService
from azure.storage import *
#from azure.storage.blob.blobservice import BlobService
CURRENT_DIR = os.getcwd()
STORING_DIRECTORY_NAME = "stroage_scrapped_images"
STORING_DIRECTORY = CURRENT_DIR+"/"+STORING_DIRECTORY_NAME
if not os.path.exists(STORING_DIRECTORY):
os.makedirs(STORING_DIRECTORY)
def randomword(length):
letters = string.ascii_lowercase
return ''.join(random.choice(letters) for i in range(length))
startdate = time.clock()
metadata_loaded = {'Owner': 'ToBeAddedSoon', 'Date_Of_Upload': startdate, 'VAR_2': 'VAL_VAR_2','VAR_3': 'VAL_VAR_3','VAR_4': 'VAL_VAR_4'}
with open("credentials.json", 'r') as f:
data = json.loads(f.read())
StoAcc_var_name = data["storagacc"]["Accountname"]
StoAcc_var_key = data["storagacc"]["AccountKey"]
StoAcc_var_container = data["storagacc"]["Container"]
#print StoAcc_var_name, StoAcc_var_key, StoAcc_var_container
def copy_azure_files(source_url,destination_object,destination_container):
blob_service = BlockBlobService(account_name=StoAcc_var_name, account_key=StoAcc_var_key)
blob_service.copy_blob(destination_container, destination_object, source_url)
block_blob_service = BlockBlobService(account_name=StoAcc_var_name, account_key=StoAcc_var_key)
def upload_func(container,blobname,filename):
start = time.clock()
block_blob_service.create_blob_from_path(
container,
blobname,
filename)
elapsed = time.clock()
elapsed = elapsed - start
print "*** DEBUG *** Time spent uploading API " , filename , " is : " , elapsed , " in Bucket/container : " , container
#URL_TARGET = "https://mouradcloud.westeurope.cloudapp.azure.com/blog/blog/category/food/"
URL_TARGET = "https://www.cdiscount.com/search/10/telephone.html"
base_url = URL_TARGET
out_folder = '/tmp'
r = requests.get(URL_TARGET)
data = r.text
soup = BeautifulSoup(data, "lxml")
for link in soup.find_all('img'):
src = link
image_url = link.get("src")
while image_url is not None :
if 'http' in image_url:
blocks = []
if image_url.endswith(('.png', '.jpg', '.jpeg')):
print " ->>>>>>>>>>>>>> THIS IS AN IMAGE ... PROCESSING "
file_name_downloaded = basename(image_url)
file_name_path_local = STORING_DIRECTORY+"/"+file_name_downloaded
with open(file_name_path_local, "wb") as f:
f.write(requests.get(image_url).content)
filename_in_clouddir="uploads"+"/"+file_name_downloaded
#upload_func(StoAcc_var_container,filename_in_clouddir,file_name_path_local)
copy_azure_files(image_url,filename_in_clouddir,StoAcc_var_container)
break
else :
print " ->>>>>>>>>>>>>> THIS NOT AN IMAGE ... SKIPPING "
break
else :
print " ->>>>>>>>>>>>>> THIS IS A LOCAL IMAGE ... SKIPPING "
break
continue
【问题讨论】:
标签: python azure azure-blob-storage