-
Notifications
You must be signed in to change notification settings - Fork 0
/
segmentation_HC.py
142 lines (118 loc) · 4.97 KB
/
segmentation_HC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#Import Dependencies
import numpy as np
import dicom
import os
import matplotlib.pyplot as plt
from glob import glob
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
import scipy.ndimage
from skimage import morphology
from skimage import measure
from skimage.transform import resize
from sklearn.cluster import KMeans
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.tools import FigureFactory as FF
from plotly.graph_objs import *
# Configuration Variables
# TODO : set the configuration in a conf file
data_path = "C:\\Users\\ludovic.renferme\\Documents\\Kaggle\\Patients\\00cba091fa4ad62cc3200a657aeb957e"
output_path = working_path = "C:\\Users\\ludovic.renferme\\Documents\\Kaggle\\Output\\"
g = glob(data_path + '/*.dcm')
# Function to load a scan images
# Opens folder and get every images
# IN : path (from configuration file)
# OUT : All the DICOM objects files from the patient in path
def load_scan(path):
slices = [dicom.read_file(path + '/' + s) for s in os.listdir(path)]
slices.sort(key = lambda x: int(x.InstanceNumber))
try:
slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2])
except:
slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation)
for s in slices:
s.SliceThickness = slice_thickness
return slices
#Function
# IN : All the DICOM objects files from the patient in path
# OUT : Hounsfield unit array for every CT Scan
def get_pixels_hu(scans):
image = np.stack([s.pixel_array for s in scans])
# Convert to int16 (from sometimes int16),
# Should be possible as values should always be low enough (<32k)
image = image.astype(np.int16)
# Set outside-of-scan pixels to 1
# The intercept is usually -1024, so air is approximately 0
image[image == -2000] = 0
# Convert to Hounsfield units (HU)
intercept = scans[0].RescaleIntercept
slope = scans[0].RescaleSlope
if slope != 1:
image = slope * image.astype(np.float64)
image = image.astype(np.int16)
image += np.int16(intercept)
return np.array(image, dtype=np.int16)
#Function
# IN : Hounsfield unit array for every CT Scan
# OUT : resampled inputs according to new spacing paramter, new spacing
def resample(image, scan, new_spacing=[1,1,1]):
# Determine current pixel spacing
spacing = map(float, ([scan[0].SliceThickness] + scan[0].PixelSpacing))
spacing = np.array(list(spacing))
resize_factor = spacing / new_spacing
new_real_shape = image.shape * resize_factor
new_shape = np.round(new_real_shape)
real_resize_factor = new_shape / image.shape
new_spacing = spacing / real_resize_factor
image = scipy.ndimage.interpolation.zoom(image, real_resize_factor)
return image, new_spacing
#Function segmentation
# IN : Image from the preprocessing scheme
# OUT : lung segmented images destinated to the CNN
def make_lungmask(img):
row_size= img.shape[0]
col_size = img.shape[1]
mean = np.mean(img)
std = np.std(img)
img = img-mean
img = img/std
# Find the average pixel value near the lungs
# to renormalize washed out images
middle = img[int(col_size/5):int(col_size/5*4),int(row_size/5):int(row_size/5*4)]
mean = np.mean(middle)
max = np.max(img)
min = np.min(img)
# To improve threshold finding, I'm moving the
# underflow and overflow on the pixel spectrum
img[img==max]=mean
img[img==min]=mean
#
# Using Kmeans to separate foreground (soft tissue / bone) and background (lung/air)
#
kmeans = KMeans(n_clusters=2).fit(np.reshape(middle,[np.prod(middle.shape),1]))
centers = sorted(kmeans.cluster_centers_.flatten())
threshold = np.mean(centers)
thresh_img = np.where(img<threshold,1.0,0.0) # threshold the image
# First erode away the finer elements, then dilate to include some of the pixels surrounding the lung.
# We don't want to accidentally clip the lung.
eroded = morphology.erosion(thresh_img,np.ones([3,3]))
dilation = morphology.dilation(eroded,np.ones([8,8]))
labels = measure.label(dilation) # Different labels are displayed in different colors
label_vals = np.unique(labels)
regions = measure.regionprops(labels)
good_labels = []
for prop in regions:
B = prop.bbox
if B[2]-B[0]<row_size/10*9 and B[3]-B[1]<col_size/10*9 and B[0]>row_size/5 and B[2]<col_size/5*4:
good_labels.append(prop.label)
mask = np.ndarray([row_size,col_size],dtype=np.int8)
mask[:] = 0
#
# After just the lungs are left, we do another large dilation
# in order to fill in and out the lung mask
#
for N in good_labels:
mask = mask + np.where(labels==N,1,0)
mask = morphology.dilation(mask,np.ones([10,10])) # one last dilation
plt.show()
return mask*img