DICOM格式
Digital Imaging and Communications in Medicine (DICOM)是医学标准格式的医学图像
2加载第三方包
IS_LOCAL = False
import numpy as np
import pandas as pd
from skimage.io import imread
import seaborn as sns
import matplotlib.pyplot as plt
from glob import glob
if(IS_LOCAL):
import pydicom as dicom
else:
import dicom
import os
2.参数设置和文件路径
if(IS_LOCAL):
PATH="../input/siim-medical-image/"
else:
PATH="../input/"
print(os.listdir(PATH))
3.读取数据
data_df = pd.read_csv(os.path.join(PATH,"overview.csv"))
Hide
In [4]:
print("CT Medical images - rows:",data_df.shape[0]," columns:", data_df.shape[1])
4读取TIFF格式数据
print("Number of TIFF images:", len(os.listdir(os.path.join(PATH,"tiff_images"))))
tiff_data = pd.DataFrame([{\'path\': filepath} for filepath in glob(PATH+\'tiff_images/*.tif\')])
5.处理TIFF数据
def process_data(path):
data = pd.DataFrame([{\'path\': filepath} for filepath in glob(PATH+path)])
data[\'file\'] = data[\'path\'].map(os.path.basename)
data[\'ID\'] = data[\'file\'].map(lambda x: str(x.split(\'_\')[1]))
data[\'Age\'] = data[\'file\'].map(lambda x: int(x.split(\'_\')[3]))
data[\'Contrast\'] = data[\'file\'].map(lambda x: bool(int(x.split(\'_\')[5])))
data[\'Modality\'] = data[\'file\'].map(lambda x: str(x.split(\'_\')[6].split(\'.\')[-2]))
return data
tiff_data = process_data(\'tiff_images/*.tif\')
6.检查数据集信息
tiff_data.head(10)
7.读取DICOM数据集
print("Number of DICOM files:", len(os.listdir(PATH+"dicom_dir")))
8.处理DICOM数据
dicom_data = process_data(\'dicom_dir/*.dcm\')
9.检查信息
dicom_data.head(10)
10.检查数据的一致性,
def countplot_comparison(feature):
fig, (ax1, ax2, ax3) = plt.subplots(1,3, figsize = (16, 4))
s1 = sns.countplot(data_df[feature], ax=ax1)
s1.set_title("Overview data")
s2 = sns.countplot(tiff_data[feature], ax=ax2)
s2.set_title("Tiff files data")
s3 = sns.countplot(dicom_data[feature], ax=ax3)
s3.set_title("Dicom files data")
plt.show()
11.显示TIFF图像
def show_images(data, dim=16, imtype=\'TIFF\'):
img_data = list(data[:dim].T.to_dict().values())
f, ax = plt.subplots(4,4, figsize=(16,20))
for i,data_row in enumerate(img_data):
if(imtype==\'TIFF\'):
data_row_img = imread(data_row[\'path\'])
elif(imtype==\'DICOM\'):
data_row_img = dicom.read_file(data_row[\'path\'])
if(imtype==\'TIFF\'):
ax[i//4, i%4].matshow(data_row_img,cmap=\'gray\')
elif(imtype==\'DICOM\'):
ax[i//4, i%4].imshow(data_row_img.pixel_array, cmap=plt.cm.bone)
ax[i//4, i%4].axis(\'off\')
ax[i//4, i%4].set_title(\'Modality: {Modality} Age: {Age}\nSlice: {ID} Contrast: {Contrast}\'.format(**data_row))
plt.show()
应用函数,显示图像
show_images(tiff_data,16,\'TIFF\')
12.显示DICOM数据
# extract voxel data
def extract_voxel_data(list_of_dicom_files):
datasets = [dicom.read_file(f) for f in list_of_dicom_files]
try:
voxel_ndarray, ijk_to_xyz = dicom_numpy.combine_slices(datasets)
except dicom_numpy.DicomImportException as e:
# invalid DICOM data
raise
return voxel_ndarray
13.查看DICOMM更多的信息
dicom_file_path = list(dicom_data[:1].T.to_dict().values())[0][\'path\']
dicom_file_dataset = dicom.read_file(dicom_file_path)
dicom_file_dataset
更多的信息:哪个医院,病人年龄、性别、名字、病人的ID、诊断方式
14.我们可以修改可视化函数,来显示参数
def show_dicom_images(data):
img_data = list(data[:16].T.to_dict().values())
f, ax = plt.subplots(4,4, figsize=(16,20))
for i,data_row in enumerate(img_data):
data_row_img = dicom.read_file(data_row[\'path\'])
modality = data_row_img.Modality
age = data_row_img.PatientAge
ax[i//4, i%4].imshow(data_row_img.pixel_array, cmap=plt.cm.bone)
ax[i//4, i%4].axis(\'off\')
ax[i//4, i%4].set_title(\'Modality: {} Age: {}\nSlice: {} Contrast: {}\'.format(
modality, age, data_row[\'ID\'], data_row[\'Contrast\']))
plt.show()</code></pre>
参考资料https://www.kaggle.com/gpreda/visualize-ct-dicom-data