import dask
from dask.distributed import Client
import dask.array as da
import xarray as xr
import numpy as np
from dask.threaded import get
from dask.highlevelgraph import HighLevelGraph
from dask.optimization import fuse
a = da.arange(10000000, chunks=(10000,))
a
|
</details>
<div style="">
<svg width="24" height="24" viewBox="0 0 32 32" fill="none" xmlns="http://www.w3.org/2000/svg" style="position: absolute;">
<circle cx="16" cy="16" r="14" style="stroke: var(--jp-ui-font-color2, #1D1D1D); fill: var(--jp-layout-color1, #F2F2F2);" stroke-width="2" />
</svg>
<details style="margin-left: 32px;">
<summary style="margin-bottom: 10px; margin-top: 10px;">
<h4 style="display: inline;">Layer3: mul</h4>
</summary>
<p style="color: var(--jp-ui-font-color2, #5D5851); margin: -0.25em 0px 0px 0px;">
mul-70cb8ee7f6799eb0f209bd6e00ebc68d
</p>
<table>
<tr>
<td>
<table>
<tr>
<th style="text-align: left; width: 150px;">layer_type</th>
<td style="text-align: left;">Blockwise</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">is_materialized</th>
<td style="text-align: left;">False</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">number of outputs</th>
<td style="text-align: left;">10</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">shape</th>
<td style="text-align: left;">(1000000,)</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">dtype</th>
<td style="text-align: left;">int64</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">chunksize</th>
<td style="text-align: left;">(100000,)</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">type</th>
<td style="text-align: left;">dask.array.core.Array</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">chunk_type</th>
<td style="text-align: left;">numpy.ndarray</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;"> depends on </th>
<td style="text-align: left;">pow-c8de4c922347f0a9f74eb6ba60c96f67</td>
</tr>
</table>
</td>
<td>
<svg width="250" height="92" style="stroke:rgb(0,0,0);stroke-width:1" >
</details>
<div style="">
<svg width="24" height="24" viewBox="0 0 32 32" fill="none" xmlns="http://www.w3.org/2000/svg" style="position: absolute;">
<circle cx="16" cy="16" r="14" fill="#8F8F8F" style="stroke: var(--jp-ui-font-color2, #1D1D1D);" stroke-width="2"/>
</svg>
<details style="margin-left: 32px;">
<summary style="margin-bottom: 10px; margin-top: 10px;">
<h4 style="display: inline;">Layer4: finalize</h4>
</summary>
<p style="color: var(--jp-ui-font-color2, #5D5851); margin: -0.25em 0px 0px 0px;">
finalize-84989d70-ccd9-4027-a139-f55697ae0c7b
</p>
<table>
<tr>
<td>
<table>
<tr>
<th style="text-align: left; width: 150px;">layer_type</th>
<td style="text-align: left;">MaterializedLayer</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">is_materialized</th>
<td style="text-align: left;">True</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">number of outputs</th>
<td style="text-align: left;">1</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;"> depends on </th>
<td style="text-align: left;">mul-70cb8ee7f6799eb0f209bd6e00ebc68d</td>
</tr>
</table>
</td>
<td>
</td>
</tr>
</table>
</details>
<div style="">
<svg width="24" height="24" viewBox="0 0 32 32" fill="none" xmlns="http://www.w3.org/2000/svg" style="position: absolute;">
<circle cx="16" cy="16" r="14" style="stroke: var(--jp-ui-font-color2, #1D1D1D); fill: var(--jp-layout-color1, #F2F2F2);" stroke-width="2" />
</svg>
<details style="margin-left: 32px;">
<summary style="margin-bottom: 10px; margin-top: 10px;">
<h4 style="display: inline;">Layer5: sub</h4>
</summary>
<p style="color: var(--jp-ui-font-color2, #5D5851); margin: -0.25em 0px 0px 0px;">
sub-6cee8e7d70db7adda2bce5bc26a8713f
</p>
<table>
<tr>
<td>
<table>
<tr>
<th style="text-align: left; width: 150px;">layer_type</th>
<td style="text-align: left;">Blockwise</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">is_materialized</th>
<td style="text-align: left;">False</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">number of outputs</th>
<td style="text-align: left;">10</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">shape</th>
<td style="text-align: left;">(1000000,)</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">dtype</th>
<td style="text-align: left;">int64</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">chunksize</th>
<td style="text-align: left;">(100000,)</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">type</th>
<td style="text-align: left;">dask.array.core.Array</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">chunk_type</th>
<td style="text-align: left;">numpy.ndarray</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;"> depends on </th>
<td style="text-align: left;">arange-f3e29e89bdefcec09c94416d7cc877d4</td>
</tr>
</table>
</td>
<td>
<svg width="250" height="92" style="stroke:rgb(0,0,0);stroke-width:1" >
</details>
<div style="">
<svg width="24" height="24" viewBox="0 0 32 32" fill="none" xmlns="http://www.w3.org/2000/svg" style="position: absolute;">
<circle cx="16" cy="16" r="14" fill="#8F8F8F" style="stroke: var(--jp-ui-font-color2, #1D1D1D);" stroke-width="2"/>
</svg>
<details style="margin-left: 32px;">
<summary style="margin-bottom: 10px; margin-top: 10px;">
<h4 style="display: inline;">Layer6: finalize</h4>
</summary>
<p style="color: var(--jp-ui-font-color2, #5D5851); margin: -0.25em 0px 0px 0px;">
finalize-253e7a3b-93d7-4f21-9935-024669196fec
</p>
<table>
<tr>
<td>
<table>
<tr>
<th style="text-align: left; width: 150px;">layer_type</th>
<td style="text-align: left;">MaterializedLayer</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">is_materialized</th>
<td style="text-align: left;">True</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">number of outputs</th>
<td style="text-align: left;">1</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;"> depends on </th>
<td style="text-align: left;">sub-6cee8e7d70db7adda2bce5bc26a8713f</td>
</tr>
</table>
</td>
<td>
</td>
</tr>
</table>
</details>
<div style="">
<svg width="24" height="24" viewBox="0 0 32 32" fill="none" xmlns="http://www.w3.org/2000/svg" style="position: absolute;">
<circle cx="16" cy="16" r="14" fill="#8F8F8F" style="stroke: var(--jp-ui-font-color2, #1D1D1D);" stroke-width="2"/>
</svg>
<details style="margin-left: 32px;">
<summary style="margin-bottom: 10px; margin-top: 10px;">
<h4 style="display: inline;">Layer7: add_unit</h4>
</summary>
<p style="color: var(--jp-ui-font-color2, #5D5851); margin: -0.25em 0px 0px 0px;">
add_unit-e1c78a5a-9bf8-4ba6-96a5-f7434cd21288
</p>
<table>
<tr>
<td>
<table>
<tr>
<th style="text-align: left; width: 150px;">layer_type</th>
<td style="text-align: left;">MaterializedLayer</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">is_materialized</th>
<td style="text-align: left;">True</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">number of outputs</th>
<td style="text-align: left;">1</td>
</tr>
</table>
</td>
<td>
</td>
</tr>
</table>
</details>
<div style="">
<svg width="24" height="24" viewBox="0 0 32 32" fill="none" xmlns="http://www.w3.org/2000/svg" style="position: absolute;">
<circle cx="16" cy="16" r="14" fill="#8F8F8F" style="stroke: var(--jp-ui-font-color2, #1D1D1D);" stroke-width="2"/>
</svg>
<details style="margin-left: 32px;">
<summary style="margin-bottom: 10px; margin-top: 10px;">
<h4 style="display: inline;">Layer8: add</h4>
</summary>
<p style="color: var(--jp-ui-font-color2, #5D5851); margin: -0.25em 0px 0px 0px;">
add-61bd32823b46ab2e579f597bdfcd0c8c
</p>
<table>
<tr>
<td>
<table>
<tr>
<th style="text-align: left; width: 150px;">layer_type</th>
<td style="text-align: left;">MaterializedLayer</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">is_materialized</th>
<td style="text-align: left;">True</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">number of outputs</th>
<td style="text-align: left;">1</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;"> depends on </th>
<td style="text-align: left;">finalize-253e7a3b-93d7-4f21-9935-024669196fec</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;"></th>
<td style="text-align: left;">add_unit-e1c78a5a-9bf8-4ba6-96a5-f7434cd21288</td>
</tr>
</table>
</td>
<td>
</td>
</tr>
</table>
</details>
<div style="">
<svg width="24" height="24" viewBox="0 0 32 32" fill="none" xmlns="http://www.w3.org/2000/svg" style="position: absolute;">
<circle cx="16" cy="16" r="14" fill="#8F8F8F" style="stroke: var(--jp-ui-font-color2, #1D1D1D);" stroke-width="2"/>
</svg>
<details style="margin-left: 32px;">
<summary style="margin-bottom: 10px; margin-top: 10px;">
<h4 style="display: inline;">Layer9: add</h4>
</summary>
<p style="color: var(--jp-ui-font-color2, #5D5851); margin: -0.25em 0px 0px 0px;">
add-0bd3775a8fff364fe2ccb896ef6147d9
</p>
<table>
<tr>
<td>
<table>
<tr>
<th style="text-align: left; width: 150px;">layer_type</th>
<td style="text-align: left;">MaterializedLayer</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">is_materialized</th>
<td style="text-align: left;">True</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">number of outputs</th>
<td style="text-align: left;">1</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;"> depends on </th>
<td style="text-align: left;">add-61bd32823b46ab2e579f597bdfcd0c8c</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;"></th>
<td style="text-align: left;">finalize-84989d70-ccd9-4027-a139-f55697ae0c7b</td>
</tr>
</table>
</td>
<td>
</td>
</tr>
</table>
</details>
</div>
</div>
from dask.optimization import fuse
def op(dsk, key):
# print(dsk)
# print(key)
return fuse(dsk.to_dict())[0]
%time result.compute(optimizations=[op])
CPU times: user 96.4 ms, sys: 215 ms, total: 312 ms
Wall time: 1.26 s
array([ 0, 4, 12, ...,
199999900000012, 199999940000004, 199999980000000])
result.dask.visualize()
dsk, dep = fuse(result.dask.to_dict())
# HighLevelGraph(dsk, dep).visualize()
from dask.delayed import Delayed
result._key
Delayed(result._key, dsk).compute()
/Users/nitinsharma/miniconda3/envs/spec_line_sandbox/lib/python3.10/site-packages/distributed/client.py:3357: UserWarning: Sending large graph of size 76.47 MiB.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
warnings.warn(
array([ 0, 4, 12, ...,
199999900000012, 199999940000004, 199999980000000])
print('hi')
dsk, dep = fuse(result.dask.to_dict())
HighLevelGraph(dsk, dep).visualize()
result.dask.merge(result.dask.layers).__dict__
HighLevelGraph({'test': result.dask.layers}, {'test': set()})
HighLevelGraph
HighLevelGraph with 1 layers and 9 keys from all layers.
<div style="">
<svg width="24" height="24" viewBox="0 0 32 32" fill="none" xmlns="http://www.w3.org/2000/svg" style="position: absolute;">
<circle cx="16" cy="16" r="14" fill="#8F8F8F" style="stroke: var(--jp-ui-font-color2, #1D1D1D);" stroke-width="2"/>
</svg>
<details style="margin-left: 32px;">
<summary style="margin-bottom: 10px; margin-top: 10px;">
<h4 style="display: inline;">Layer1: test</h4>
</summary>
<p style="color: var(--jp-ui-font-color2, #5D5851); margin: -0.25em 0px 0px 0px;">
test
</p>
<table>
<tr>
<td>
<table>
<tr>
<th style="text-align: left; width: 150px;">layer_type</th>
<td style="text-align: left;">MaterializedLayer</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">is_materialized</th>
<td style="text-align: left;">True</td>
</tr>
<tr>
<th style="text-align: left; width: 150px;">number of outputs</th>
<td style="text-align: left;">9</td>
</tr>
</table>
</td>
<td>
</td>
</tr>
</table>
</details>
</div>
</div>
client = Client("tcp://192.168.0.107:8786")
client
Client
Client-4f621360-ad69-11ef-8550-acde48001122
<tr>
<td style="text-align: left;"><strong>Connection method:</strong> Direct</td>
<td style="text-align: left;"></td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Dashboard: </strong> <a href="http://192.168.0.107:8787/status" target="_blank">http://192.168.0.107:8787/status</a>
</td>
<td style="text-align: left;"></td>
</tr>
</table>
<details>
<summary style="margin-bottom: 20px;"><h3 style="display: inline;">Scheduler Info</h3></summary>
<div style="">
<div>
<div style="width: 24px; height: 24px; background-color: #FFF7E5; border: 3px solid #FF6132; border-radius: 5px; position: absolute;"> </div>
<div style="margin-left: 48px;">
<h3 style="margin-bottom: 0px;">Scheduler</h3>
<p style="color: #9D9D9D; margin-bottom: 0px;">Scheduler-73c4acfa-aa00-4189-9430-691903058756</p>
<table style="width: 100%; text-align: left;">
<tr>
<td style="text-align: left;">
<strong>Comm:</strong> tcp://192.168.0.107:8786
</td>
<td style="text-align: left;">
<strong>Workers:</strong> 2
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Dashboard:</strong> <a href="http://192.168.0.107:8787/status" target="_blank">http://192.168.0.107:8787/status</a>
</td>
<td style="text-align: left;">
<strong>Total threads:</strong> 2
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Started:</strong> Just now
</td>
<td style="text-align: left;">
<strong>Total memory:</strong> 8.00 GiB
</td>
</tr>
</table>
</div>
</div>
<details style="margin-left: 48px;">
<summary style="margin-bottom: 20px;">
<h3 style="display: inline;">Workers</h3>
</summary>
<div style="margin-bottom: 20px;">
<div style="width: 24px; height: 24px; background-color: #DBF5FF; border: 3px solid #4CC9FF; border-radius: 5px; position: absolute;"> </div>
<div style="margin-left: 48px;">
<details>
<summary>
<h4 style="margin-bottom: 0px; display: inline;">Worker: tcp://192.168.0.107:52741</h4>
</summary>
<table style="width: 100%; text-align: left;">
<tr>
<td style="text-align: left;">
<strong>Comm: </strong> tcp://192.168.0.107:52741
</td>
<td style="text-align: left;">
<strong>Total threads: </strong> 1
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Dashboard: </strong> <a href="http://192.168.0.107:52742/status" target="_blank">http://192.168.0.107:52742/status</a>
</td>
<td style="text-align: left;">
<strong>Memory: </strong> 4.00 GiB
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Nanny: </strong> tcp://192.168.0.107:52738
</td>
<td style="text-align: left;"></td>
</tr>
<tr>
<td colspan="2" style="text-align: left;">
<strong>Local directory: </strong> /var/folders/7y/91xrb3bs5g96r0pvlvq04yx00000gn/T/dask-scratch-space/worker-is8n2ls_
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Tasks executing: </strong>
</td>
<td style="text-align: left;">
<strong>Tasks in memory: </strong>
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Tasks ready: </strong>
</td>
<td style="text-align: left;">
<strong>Tasks in flight: </strong>
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>CPU usage:</strong> 4.2%
</td>
<td style="text-align: left;">
<strong>Last seen: </strong> Just now
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Memory usage: </strong> 117.12 MiB
</td>
<td style="text-align: left;">
<strong>Spilled bytes: </strong> 0 B
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Read bytes: </strong> 10.00 kiB
</td>
<td style="text-align: left;">
<strong>Write bytes: </strong> 20.00 kiB
</td>
</tr>
</table>
</details>
</div>
</div>
<div style="margin-bottom: 20px;">
<div style="width: 24px; height: 24px; background-color: #DBF5FF; border: 3px solid #4CC9FF; border-radius: 5px; position: absolute;"> </div>
<div style="margin-left: 48px;">
<details>
<summary>
<h4 style="margin-bottom: 0px; display: inline;">Worker: tcp://192.168.0.107:52749</h4>
</summary>
<table style="width: 100%; text-align: left;">
<tr>
<td style="text-align: left;">
<strong>Comm: </strong> tcp://192.168.0.107:52749
</td>
<td style="text-align: left;">
<strong>Total threads: </strong> 1
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Dashboard: </strong> <a href="http://192.168.0.107:52750/status" target="_blank">http://192.168.0.107:52750/status</a>
</td>
<td style="text-align: left;">
<strong>Memory: </strong> 4.00 GiB
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Nanny: </strong> tcp://192.168.0.107:52745
</td>
<td style="text-align: left;"></td>
</tr>
<tr>
<td colspan="2" style="text-align: left;">
<strong>Local directory: </strong> /var/folders/7y/91xrb3bs5g96r0pvlvq04yx00000gn/T/dask-scratch-space/worker-052suecj
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Tasks executing: </strong>
</td>
<td style="text-align: left;">
<strong>Tasks in memory: </strong>
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Tasks ready: </strong>
</td>
<td style="text-align: left;">
<strong>Tasks in flight: </strong>
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>CPU usage:</strong> 4.3%
</td>
<td style="text-align: left;">
<strong>Last seen: </strong> Just now
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Memory usage: </strong> 117.16 MiB
</td>
<td style="text-align: left;">
<strong>Spilled bytes: </strong> 0 B
</td>
</tr>
<tr>
<td style="text-align: left;">
<strong>Read bytes: </strong> 8.01 kiB
</td>
<td style="text-align: left;">
<strong>Write bytes: </strong> 34.03 kiB
</td>
</tr>
</table>
</details>
</div>
</div>
</details>
</div>
2024-11-28 17:46:13,681 - distributed.client - ERROR -
ConnectionRefusedError: [Errno 61] Connection refused
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/Users/nitinsharma/miniconda3/envs/spec_line_sandbox/lib/python3.10/site-packages/distributed/comm/core.py", line 342, in connect
comm = await wait_for(
File "/Users/nitinsharma/miniconda3/envs/spec_line_sandbox/lib/python3.10/site-packages/distributed/utils.py", line 1957, in wait_for
return await asyncio.wait_for(fut, timeout)
File "/Users/nitinsharma/miniconda3/envs/spec_line_sandbox/lib/python3.10/asyncio/tasks.py", line 445, in wait_for
return fut.result()
File "/Users/nitinsharma/miniconda3/envs/spec_line_sandbox/lib/python3.10/site-packages/distributed/comm/tcp.py", line 559, in connect
convert_stream_closed_error(self, e)
File "/Users/nitinsharma/miniconda3/envs/spec_line_sandbox/lib/python3.10/site-packages/distributed/comm/tcp.py", line 140, in convert_stream_closed_error
raise CommClosedError(f"in {obj}: {exc.__class__.__name__}: {exc}") from exc
distributed.comm.core.CommClosedError: in <distributed.comm.tcp.TCPConnector object at 0x15d137310>: ConnectionRefusedError: [Errno 61] Connection refused
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/nitinsharma/miniconda3/envs/spec_line_sandbox/lib/python3.10/site-packages/distributed/utils.py", line 837, in wrapper
return await func(*args, **kwargs)
File "/Users/nitinsharma/miniconda3/envs/spec_line_sandbox/lib/python3.10/site-packages/distributed/client.py", line 1549, in _reconnect
await self._ensure_connected(timeout=timeout)
File "/Users/nitinsharma/miniconda3/envs/spec_line_sandbox/lib/python3.10/site-packages/distributed/client.py", line 1579, in _ensure_connected
comm = await connect(
File "/Users/nitinsharma/miniconda3/envs/spec_line_sandbox/lib/python3.10/site-packages/distributed/comm/core.py", line 366, in connect
await asyncio.sleep(backoff)
File "/Users/nitinsharma/miniconda3/envs/spec_line_sandbox/lib/python3.10/asyncio/tasks.py", line 605, in sleep
return await future
asyncio.exceptions.CancelledError
client.get()
<bound method Client.get of <Client: 'tcp://192.168.0.107:8786' processes=1 threads=1, memory=4.00 GiB>>
from dask.distributed import performance_report
with performance_report(filename="report.html"):
dask.compute(result)