from dask.threaded import get
def add(a, b):
return a + b
dsk = {
'x': 3,
'y': 2,
'z': (add, 'x', 'y')
}
get(dsk, 'z')
5
def add(a, b):
return len(a) + len(b)
dsk = {
'x': 3,
'y': 2,
'z': (add, ['x'], ['y', 'y'])
}
get(dsk, 'z')
3
def add(a, b):
return a + b
dsk = {
'x': 1,
'y': 2,
'z': (add, 'x', 'y'),
'q': [(sum, ['x', 'y', 'z']), 'z']
}
get(dsk, 'q')
[6, 3]
Custom Graphs
def load(chunk_name):
print(f"Loading chunk {chunk_name}")
return f"chunk --- --- {chunk_name}"
def sanitize(chunk):
print(f"Sanitizing chunk {chunk}")
return chunk
def mean(chunks):
print(f"mean {chunks}")
return chunks[0]
def save(result):
print("saving result")
dsk = {
'load-chunk-1': (load, 'chunk1.dat'),
'load-chunk-2': (load, 'chunk2.dat'),
'sanitize-1': (sanitize, 'load-chunk-1'),
'sanitize-2': (sanitize, 'load-chunk-2'),
'mean': (mean, ["load-chunk-%d" % i for i in [1, 2]]),
'save': (save, 'mean')
}
from dask.distributed import Client
client = Client(n_workers=2)
client
Client
Client-d418603c-8ef1-11ef-a143-acde48001122
<tr>
<td style="text-align: left;"><strong>Connection method:</strong> Cluster object</td>
<td style="text-align: left;"><strong>Cluster type:</strong> distributed.LocalCluster</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Dashboard: </strong> <a href="http://127.0.0.1:8787/status" target="_blank">http://127.0.0.1:8787/status</a>
</td>
<td style="text-align: left;"></td>
</tr>
</table>
<details>
<summary style="margin-bottom: 20px;"><h3 style="display: inline;">Cluster Info</h3></summary>
<div class="jp-RenderedHTMLCommon jp-RenderedHTML jp-mod-trusted jp-OutputArea-output">
<div style="width: 24px; height: 24px; background-color: #e1e1e1; border: 3px solid #9D9D9D; border-radius: 5px; position: absolute;">
</div>
<div style="margin-left: 48px;">
<h3 style="margin-bottom: 0px; margin-top: 0px;">LocalCluster</h3>
<p style="color: #9D9D9D; margin-bottom: 0px;">9e696e30</p>
<table style="width: 100%; text-align: left;">
<tr>
<td style="text-align: left;">
<strong>Dashboard:</strong> <a href="http://127.0.0.1:8787/status" target="_blank">http://127.0.0.1:8787/status</a>
</td>
<td style="text-align: left;">
<strong>Workers:</strong> 2
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Total threads:</strong> 12
</td>
<td style="text-align: left;">
<strong>Total memory:</strong> 16.00 GiB
</td>
</tr>
<tr>
<td style="text-align: left;"><strong>Status:</strong> running</td>
<td style="text-align: left;"><strong>Using processes:</strong> True</td>
</table>
<details>
<summary style="margin-bottom: 20px;">
<h3 style="display: inline;">Scheduler Info</h3>
</summary>
<div style="">
<div>
<div style="width: 24px; height: 24px; background-color: #FFF7E5; border: 3px solid #FF6132; border-radius: 5px; position: absolute;"> </div>
<div style="margin-left: 48px;">
<h3 style="margin-bottom: 0px;">Scheduler</h3>
<p style="color: #9D9D9D; margin-bottom: 0px;">Scheduler-8f432180-0dfb-4565-ac7a-8d40f0b6a709</p>
<table style="width: 100%; text-align: left;">
<tr>
<td style="text-align: left;">
<strong>Comm:</strong> tcp://127.0.0.1:59079
</td>
<td style="text-align: left;">
<strong>Workers:</strong> 2
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Dashboard:</strong> <a href="http://127.0.0.1:8787/status" target="_blank">http://127.0.0.1:8787/status</a>
</td>
<td style="text-align: left;">
<strong>Total threads:</strong> 12
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Started:</strong> Just now
</td>
<td style="text-align: left;">
<strong>Total memory:</strong> 16.00 GiB
</td>
</tr>
</table>
</div>
</div>
<details style="margin-left: 48px;">
<summary style="margin-bottom: 20px;">
<h3 style="display: inline;">Workers</h3>
</summary>
<div style="margin-bottom: 20px;">
<div style="width: 24px; height: 24px; background-color: #DBF5FF; border: 3px solid #4CC9FF; border-radius: 5px; position: absolute;"> </div>
<div style="margin-left: 48px;">
<details>
<summary>
<h4 style="margin-bottom: 0px; display: inline;">Worker: 0</h4>
</summary>
<table style="width: 100%; text-align: left;">
<tr>
<td style="text-align: left;">
<strong>Comm: </strong> tcp://127.0.0.1:59086
</td>
<td style="text-align: left;">
<strong>Total threads: </strong> 6
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Dashboard: </strong> <a href="http://127.0.0.1:59090/status" target="_blank">http://127.0.0.1:59090/status</a>
</td>
<td style="text-align: left;">
<strong>Memory: </strong> 8.00 GiB
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Nanny: </strong> tcp://127.0.0.1:59082
</td>
<td style="text-align: left;"></td>
</tr>
<tr>
<td colspan="2" style="text-align: left;">
<strong>Local directory: </strong> /var/folders/7y/91xrb3bs5g96r0pvlvq04yx00000gn/T/dask-scratch-space/worker-kpcc0yu9
</td>
</tr>
</table>
</details>
</div>
</div>
<div style="margin-bottom: 20px;">
<div style="width: 24px; height: 24px; background-color: #DBF5FF; border: 3px solid #4CC9FF; border-radius: 5px; position: absolute;"> </div>
<div style="margin-left: 48px;">
<details>
<summary>
<h4 style="margin-bottom: 0px; display: inline;">Worker: 1</h4>
</summary>
<table style="width: 100%; text-align: left;">
<tr>
<td style="text-align: left;">
<strong>Comm: </strong> tcp://127.0.0.1:59087
</td>
<td style="text-align: left;">
<strong>Total threads: </strong> 6
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Dashboard: </strong> <a href="http://127.0.0.1:59088/status" target="_blank">http://127.0.0.1:59088/status</a>
</td>
<td style="text-align: left;">
<strong>Memory: </strong> 8.00 GiB
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Nanny: </strong> tcp://127.0.0.1:59083
</td>
<td style="text-align: left;"></td>
</tr>
<tr>
<td colspan="2" style="text-align: left;">
<strong>Local directory: </strong> /var/folders/7y/91xrb3bs5g96r0pvlvq04yx00000gn/T/dask-scratch-space/worker-lmiace3z
</td>
</tr>
</table>
</details>
</div>
</div>
</details>
</details>
</div>
</div>
2024-10-21 08:13:44,417 - distributed.scheduler - WARNING - Worker failed to heartbeat for 3656s; attempting restart: <WorkerState 'tcp://127.0.0.1:59086', name: 0, status: running, memory: 0, processing: 0>
2024-10-21 08:13:44,622 - distributed.scheduler - WARNING - Worker failed to heartbeat for 3656s; attempting restart: <WorkerState 'tcp://127.0.0.1:59087', name: 1, status: running, memory: 0, processing: 0>
2024-10-21 08:13:45,460 - distributed.nanny - WARNING - Restarting worker
2024-10-21 08:13:45,475 - distributed.nanny - WARNING - Restarting worker
2024-10-21 10:37:54,647 - distributed.nanny - WARNING - Restarting worker
2024-10-21 10:37:54,656 - distributed.nanny - WARNING - Restarting worker
2024-10-21 10:37:54,734 - distributed.nanny - WARNING - Restarting worker
2024-10-21 10:37:54,856 - distributed.nanny - WARNING - Restarting worker
2024-10-21 10:37:54,860 - distributed.nanny - WARNING - Restarting worker
2024-10-21 10:37:55,186 - distributed.nanny - WARNING - Restarting worker
2024-10-21 10:37:55,189 - distributed.nanny - WARNING - Restarting worker
2024-10-21 10:37:56,213 - distributed.nanny - WARNING - Restarting worker
2024-10-21 10:37:56,217 - distributed.nanny - WARNING - Restarting worker
client.get(dsk, 'save')