Skip to content

Commit c997434

Browse files
yxy235Ubuntu
and
Ubuntu
authored
[GraphBolt][CUDA] Update copy_to. (#7332)
Co-authored-by: Ubuntu <[email protected]>
1 parent 324fd97 commit c997434

File tree

12 files changed

+18
-68
lines changed

12 files changed

+18
-68
lines changed

examples/multigpu/graphbolt/node_classification.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ def create_dataloader(
133133
# A CopyTo object copying data in the datapipe to a specified device.\
134134
############################################################################
135135
if args.storage_device != "cpu":
136-
datapipe = datapipe.copy_to(device, extra_attrs=["seed_nodes"])
136+
datapipe = datapipe.copy_to(device)
137137
datapipe = datapipe.sample_neighbor(graph, args.fanout)
138138
datapipe = datapipe.fetch_feature(features, node_feature_keys=["feat"])
139139
if args.storage_device == "cpu":

examples/sampling/graphbolt/node_classification.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -95,13 +95,12 @@ def create_dataloader(
9595
# self.copy_to()
9696
# [Input]:
9797
# 'device': The device to copy the data to.
98-
# 'extra_attrs': The extra attributes to copy.
9998
# [Output]:
10099
# A CopyTo object to copy the data to the specified device. Copying here
101100
# ensures that the rest of the operations run on the GPU.
102101
############################################################################
103102
if args.storage_device != "cpu":
104-
datapipe = datapipe.copy_to(device=device, extra_attrs=["seeds"])
103+
datapipe = datapipe.copy_to(device=device)
105104

106105
############################################################################
107106
# [Step-3]:

examples/sampling/graphbolt/pyg/node_classification.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def create_dataloader(
122122
graph, fanout if job != "infer" else [-1]
123123
)
124124
# Copy the data to the specified device.
125-
datapipe = datapipe.copy_to(device=device, extra_attrs=["input_nodes"])
125+
datapipe = datapipe.copy_to(device=device)
126126
# Fetch node features for the sampled subgraph.
127127
datapipe = datapipe.fetch_feature(feature, node_feature_keys=["feat"])
128128
# Create and return a DataLoader to handle data loading.

examples/sampling/graphbolt/pyg/node_classification_advanced.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -179,14 +179,14 @@ def create_dataloader(
179179
)
180180
# Copy the data to the specified device.
181181
if args.graph_device != "cpu":
182-
datapipe = datapipe.copy_to(device=device, extra_attrs=["seeds"])
182+
datapipe = datapipe.copy_to(device=device)
183183
# Sample neighbors for each node in the mini-batch.
184184
datapipe = getattr(datapipe, args.sample_mode)(
185185
graph, fanout if job != "infer" else [-1]
186186
)
187187
# Copy the data to the specified device.
188188
if args.feature_device != "cpu":
189-
datapipe = datapipe.copy_to(device=device, extra_attrs=["input_nodes"])
189+
datapipe = datapipe.copy_to(device=device)
190190
# Fetch node features for the sampled subgraph.
191191
datapipe = datapipe.fetch_feature(features, node_feature_keys=["feat"])
192192
# Copy the data to the specified device.

examples/sampling/graphbolt/quickstart/node_classification.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def create_dataloader(dataset, itemset, device):
1818
datapipe = gb.ItemSampler(itemset, batch_size=16)
1919

2020
# Copy the mini-batch to the designated device for sampling and training.
21-
datapipe = datapipe.copy_to(device, extra_attrs=["seeds"])
21+
datapipe = datapipe.copy_to(device)
2222

2323
# Sample neighbors for the seed nodes.
2424
datapipe = datapipe.sample_neighbor(dataset.graph, fanouts=[4, 2])

examples/sampling/graphbolt/rgcn/hetero_rgcn.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def create_dataloader(
117117
# Move the mini-batch to the appropriate device.
118118
# `device`:
119119
# The device to move the mini-batch to.
120-
datapipe = datapipe.copy_to(device, extra_attrs=["seeds"])
120+
datapipe = datapipe.copy_to(device)
121121

122122
# Sample neighbors for each seed node in the mini-batch.
123123
# `graph`:

notebooks/stochastic_training/multigpu_node_classification.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@
140140
" shuffle=is_train,\n",
141141
" drop_uneven_inputs=is_train,\n",
142142
" )\n",
143-
" datapipe = datapipe.copy_to(device, extra_attrs=[\"seed_nodes\"])\n",
143+
" datapipe = datapipe.copy_to(device)\n",
144144
" # Now that we have moved to device, sample_neighbor and fetch_feature steps\n",
145145
" # will be executed on GPUs.\n",
146146
" datapipe = datapipe.sample_neighbor(graph, [10, 10, 10])\n",

notebooks/stochastic_training/node_classification.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@
143143
"source": [
144144
"def create_dataloader(itemset, shuffle):\n",
145145
" datapipe = gb.ItemSampler(itemset, batch_size=1024, shuffle=shuffle)\n",
146-
" datapipe = datapipe.copy_to(device, extra_attrs=[\"seeds\"])\n",
146+
" datapipe = datapipe.copy_to(device)\n",
147147
" datapipe = datapipe.sample_neighbor(graph, [4, 4])\n",
148148
" datapipe = datapipe.fetch_feature(feature, node_feature_keys=[\"feat\"])\n",
149149
" return gb.DataLoader(datapipe)"

python/dgl/graphbolt/base.py

+2-45
Original file line numberDiff line numberDiff line change
@@ -195,8 +195,7 @@ def apply_to(x, device):
195195
class CopyTo(IterDataPipe):
196196
"""DataPipe that transfers each element yielded from the previous DataPipe
197197
to the given device. For MiniBatch, only the related attributes
198-
(automatically inferred) will be transferred by default. If you want to
199-
transfer any other attributes, indicate them in the ``extra_attrs``.
198+
(automatically inferred) will be transferred by default.
200199
201200
Functional name: :obj:`copy_to`.
202201
@@ -208,64 +207,22 @@ class CopyTo(IterDataPipe):
208207
for data in datapipe:
209208
yield data.to(device)
210209
211-
For :class:`~dgl.graphbolt.MiniBatch`, only a part of attributes will be
212-
transferred to accelerate the process by default:
213-
214-
- When ``seed_nodes`` is not None and ``node_pairs`` is None, node related
215-
task is inferred. Only ``labels``, ``sampled_subgraphs``, ``node_features``
216-
and ``edge_features`` will be transferred.
217-
218-
- When ``node_pairs`` is not None and ``seed_nodes`` is None, edge/link
219-
related task is inferred. Only ``labels``, ``compacted_node_pairs``,
220-
``compacted_negative_srcs``, ``compacted_negative_dsts``,
221-
``sampled_subgraphs``, ``node_features`` and ``edge_features`` will be
222-
transferred.
223-
224-
- When ``seeds`` is not None, only ``labels``, ``compacted_seeds``,
225-
``sampled_subgraphs``, ``node_features`` and ``edge_features`` will be
226-
transferred.
227-
228-
- Otherwise, all attributes will be transferred.
229-
230-
- If you want some other attributes to be transferred as well, please
231-
specify the name in the ``extra_attrs``. For instance, the following code
232-
will copy ``seed_nodes`` to the GPU as well:
233-
234-
.. code:: python
235-
236-
datapipe = datapipe.copy_to(device="cuda", extra_attrs=["seed_nodes"])
237-
238210
Parameters
239211
----------
240212
datapipe : DataPipe
241213
The DataPipe.
242214
device : torch.device
243215
The PyTorch CUDA device.
244-
extra_attrs: List[string]
245-
The extra attributes of the data in the DataPipe you want to be carried
246-
to the specific device. The attributes specified in the ``extra_attrs``
247-
will be transferred regardless of the task inferred. It could also be
248-
applied to classes other than :class:`~dgl.graphbolt.MiniBatch`.
249216
"""
250217

251-
def __init__(self, datapipe, device, extra_attrs=None):
218+
def __init__(self, datapipe, device):
252219
super().__init__()
253220
self.datapipe = datapipe
254221
self.device = device
255-
self.extra_attrs = extra_attrs
256222

257223
def __iter__(self):
258224
for data in self.datapipe:
259225
data = recursive_apply(data, apply_to, self.device)
260-
if self.extra_attrs is not None:
261-
for attr in self.extra_attrs:
262-
setattr(
263-
data,
264-
attr,
265-
recursive_apply(
266-
getattr(data, attr), apply_to, self.device
267-
),
268-
)
269226
yield data
270227

271228

tests/python/pytorch/graphbolt/test_base.py

+3-9
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,13 @@ def test_CopyTo():
3636
"node_inference",
3737
"link_prediction",
3838
"edge_classification",
39-
"extra_attrs",
4039
],
4140
)
4241
@unittest.skipIf(F._default_context_str == "cpu", "CopyTo needs GPU to test")
4342
def test_CopyToWithMiniBatches(task):
4443
N = 16
4544
B = 2
46-
if task == "node_classification" or task == "extra_attrs":
45+
if task == "node_classification":
4746
itemset = gb.ItemSet(
4847
(torch.arange(N), torch.arange(N)), names=("seeds", "labels")
4948
)
@@ -114,16 +113,11 @@ def test_data_device(datapipe):
114113
else:
115114
assert var.device.type == "cpu", attr
116115

117-
if task == "extra_attrs":
118-
extra_attrs = ["seed_nodes"]
119-
else:
120-
extra_attrs = None
121-
122116
# Invoke CopyTo via class constructor.
123-
test_data_device(gb.CopyTo(datapipe, "cuda", extra_attrs))
117+
test_data_device(gb.CopyTo(datapipe, "cuda"))
124118

125119
# Invoke CopyTo via functional form.
126-
test_data_device(datapipe.copy_to("cuda", extra_attrs))
120+
test_data_device(datapipe.copy_to("cuda"))
127121

128122

129123
def test_etype_tuple_to_str():

tests/python/pytorch/graphbolt/test_dataloader.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
def test_DataLoader():
1616
N = 40
1717
B = 4
18-
itemset = dgl.graphbolt.ItemSet(torch.arange(N), names="seed_nodes")
18+
itemset = dgl.graphbolt.ItemSet(torch.arange(N), names="seeds")
1919
graph = gb_test_utils.rand_csc_graph(200, 0.15, bidirection_edge=True)
2020
features = {}
2121
keys = [("node", None, "a"), ("node", None, "b")]
@@ -62,7 +62,7 @@ def test_gpu_sampling_DataLoader(
6262
N = 40
6363
B = 4
6464
num_layers = 2
65-
itemset = dgl.graphbolt.ItemSet(torch.arange(N), names="seed_nodes")
65+
itemset = dgl.graphbolt.ItemSet(torch.arange(N), names="seeds")
6666
graph = gb_test_utils.rand_csc_graph(200, 0.15, bidirection_edge=True).to(
6767
F.ctx()
6868
)
@@ -77,7 +77,7 @@ def test_gpu_sampling_DataLoader(
7777
feature_store = dgl.graphbolt.BasicFeatureStore(features)
7878

7979
datapipe = dgl.graphbolt.ItemSampler(itemset, batch_size=B)
80-
datapipe = datapipe.copy_to(F.ctx(), extra_attrs=["seed_nodes"])
80+
datapipe = datapipe.copy_to(F.ctx())
8181
datapipe = getattr(dgl.graphbolt, sampler_name)(
8282
datapipe,
8383
graph,

tutorials/multi/2_node_classification.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def create_dataloader(
108108
shuffle=is_train,
109109
drop_uneven_inputs=is_train,
110110
)
111-
datapipe = datapipe.copy_to(device, extra_attrs=["seed_nodes"])
111+
datapipe = datapipe.copy_to(device)
112112
# Now that we have moved to device, sample_neighbor and fetch_feature steps
113113
# will be executed on GPUs.
114114
datapipe = datapipe.sample_neighbor(graph, [10, 10, 10])

0 commit comments

Comments
 (0)