【问题标题】:Increase speed creation for masked xarray file提高掩码 xarray 文件的创建速度
【发布时间】:2020-05-13 22:03:53
【问题描述】:

我目前正在尝试使用蒙版网格将矩形 xarray 文件裁剪为国家的形状。您可以在下面找到我当前的解决方案(使用更简单和更小的数组)。代码有效,我得到了基于 1 和 0 的所需掩码。问题在于代码在真实国家(更大更复杂)上运行时需要 30 多分钟才能运行。由于我在这里使用了非常基本的操作,例如嵌套 for 循环,因此我还尝试了不同的替代方法,例如列表方法。但是,在计时过程时,它并没有改进下面的代码。我想知道是否有更快的方法来获得这个掩码(矢量化?)或者我是否应该以不同的方式解决这个问题(尝试探索 xarray 的属性,但还没有找到任何解决这个问题的方法)。

代码如下:

import geopandas as gpd
from shapely.geometry import Polygon, Point
import pandas as pd
import numpy as np 
import xarray as xr

df = pd.read_csv('Brazil_borders.csv',index_col=0)
lats = np.array([-20, -5, -5, -20,])
lons = np.array([-60, -60, -30, -30])
lats2 = np.array([-10.25, -10.75, -11.25, -11.75, -12.25, -12.75, -13.25, -13.75,
       -14.25, -14.75, -15.25, -15.75, -16.25, -16.75, -17.25, -17.75,
       -18.25, -18.75, -19.25, -19.75, -20.25, -20.75, -21.25, -21.75,
       -22.25, -22.75, -23.25, -23.75, -24.25, -24.75, -25.25, -25.75,
       -26.25, -26.75, -27.25, -27.75, -28.25, -28.75, -29.25, -29.75,
       -30.25, -30.75, -31.25, -31.75, -32.25, -32.75])
lons2 = np.array([-61.75, -61.25, -60.75, -60.25, -59.75, -59.25, -58.75, -58.25,
       -57.75, -57.25, -56.75, -56.25, -55.75, -55.25, -54.75, -54.25,
       -53.75, -53.25, -52.75, -52.25, -51.75, -51.25, -50.75, -50.25,
       -49.75, -49.25, -48.75, -48.25, -47.75, -47.25, -46.75, -46.25,
       -45.75, -45.25, -44.75, -44.25])
points = []
for i in range(len(lats)):
        _= [lats[i],lons[i]]
        points.append(_)
poly_proj = Polygon(points)    

mask = np.zeros((len(lats2),len(lons2)))     # Mask with the dataset's shape and size.                                       

for i in range(len(lats2)):                  # Iteration to verify if a given coordinate is within the polygon's area                                    
    for j in range(len(lons2)):                                                  
        grid_point = Point(lats2[i], lons2[j])                  
        if grid_point.within(poly_proj):  
            mask[i][j] = 1    
bool_final = mask
bool_final

基于列表方法的替代方案,但处理时间更差(根据 timeit):

lats = np.array([-20, -5, -5, -20,])
lons = np.array([-60, -60, -30, -30])
lats2 = np.array([-10.25, -10.75, -11.25, -11.75, -12.25, -12.75, -13.25, -13.75,
       -14.25, -14.75, -15.25, -15.75, -16.25, -16.75, -17.25, -17.75,
       -18.25, -18.75, -19.25, -19.75, -20.25, -20.75, -21.25, -21.75,
       -22.25, -22.75, -23.25, -23.75, -24.25, -24.75, -25.25, -25.75,
       -26.25, -26.75, -27.25, -27.75, -28.25, -28.75, -29.25, -29.75,
       -30.25, -30.75, -31.25, -31.75, -32.25, -32.75])
lons2 = np.array([-61.75, -61.25, -60.75, -60.25, -59.75, -59.25, -58.75, -58.25,
       -57.75, -57.25, -56.75, -56.25, -55.75, -55.25, -54.75, -54.25,
       -53.75, -53.25, -52.75, -52.25, -51.75, -51.25, -50.75, -50.25,
       -49.75, -49.25, -48.75, -48.25, -47.75, -47.25, -46.75, -46.25,
       -45.75, -45.25, -44.75, -44.25])
points = []
for i in range(len(lats)):
        _= [lats[i],lons[i]]
        points.append(_)
poly_proj = Polygon(points)    

grid_point = [Point(lats2[i],lons2[j]) for i in range(len(lats2)) for j in range(len(lons2))]                 
mask = [1 if grid_point[i].within(poly_proj) else 0 for i in range(len(grid_point))] 
bool_final2 = np.reshape(mask,(((len(lats2)),(len(lons2)))))

提前谢谢你!

【问题讨论】:

    标签: arrays numpy for-loop grid python-xarray


    【解决方案1】:

    基于snowman2 的回答,我创建了这个简单的函数,它通过使用geopandasrioxarray 提供了一个更快的解决方案。不必使用纬度和经度列表,而是必须使用带有所需形状的 shapefile(Instructions 用于从坐标列表创建 GeoDataFrame)。

    import xarray as xr
    import geopandas as gpd
    import rioxarray
    from shapely.geometry import mapping
    
    def mask_shape_border (DS,shape_shp): #Inputs are the dataset to be cropped and the address of the mask file (.shp )
        if 'lat' in DS:                     #Some datasets use lat/lon, others latitude/longitude
                DS.rio.set_spatial_dims(x_dim="lon", y_dim="lat", inplace=True)
        elif 'latitude' in DS:
                DS.rio.set_spatial_dims(x_dim="longitude", y_dim="latitude", inplace=True)
        else:
            print("Error: check latitude and longitude variable names.")
    
        DS.rio.write_crs("epsg:4326", inplace=True)
        mask = gpd.read_file(shape_shp, crs="epsg:4326")
        DS_clipped = DS.rio.clip(mask.geometry.apply(mapping), mask.crs, drop=False)
    
        return(DS_clipped)
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 2017-11-03
      • 1970-01-01
      • 2013-01-25
      • 2012-12-01
      • 2011-10-31
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多