@@ -156,6 +156,7 @@ import _pickle as pickle | |||||
from copy import deepcopy | from copy import deepcopy | ||||
from typing import Optional, List, Callable, Union, Dict, Any, Mapping | from typing import Optional, List, Callable, Union, Dict, Any, Mapping | ||||
from types import LambdaType | from types import LambdaType | ||||
from subprocess import DEVNULL | |||||
import sys | import sys | ||||
import time | import time | ||||
@@ -231,7 +232,7 @@ def _multi_proc(ds, _apply_field, func, counter, queue): | |||||
""" | """ | ||||
idx = -1 | idx = -1 | ||||
import contextlib | import contextlib | ||||
with contextlib.redirect_stdout(None): # 避免打印触发 rich 的锁 | |||||
with contextlib.redirect_stdout(DEVNULL): # 避免打印触发 rich 的锁 | |||||
logger.set_stdout(stdout='raw') | logger.set_stdout(stdout='raw') | ||||
results = [] | results = [] | ||||
try: | try: | ||||
@@ -86,12 +86,12 @@ def _torch2paddle(torch_tensor: 'torch.Tensor', device: str = None, no_gradient: | |||||
if not no_gradient: | if not no_gradient: | ||||
# 保持梯度并保持反向传播 | # 保持梯度并保持反向传播 | ||||
# paddle的stop_gradient和torch的requires_grad表现是相反的 | # paddle的stop_gradient和torch的requires_grad表现是相反的 | ||||
paddle_tensor = paddle.to_tensor(torch_tensor.detach().numpy(), stop_gradient=False) | |||||
paddle_tensor = paddle.to_tensor(torch_tensor.detach().cpu().numpy(), stop_gradient=False) | |||||
hook = paddle_tensor.register_hook( | hook = paddle_tensor.register_hook( | ||||
lambda grad: torch.autograd.backward(torch_tensor, torch.tensor(grad.numpy())) | lambda grad: torch.autograd.backward(torch_tensor, torch.tensor(grad.numpy())) | ||||
) | ) | ||||
else: | else: | ||||
paddle_tensor = paddle.to_tensor(torch_tensor.detach().numpy(), stop_gradient=True) | |||||
paddle_tensor = paddle.to_tensor(torch_tensor.detach().cpu().numpy(), stop_gradient=True) | |||||
paddle_tensor = paddle_to(paddle_tensor, device) | paddle_tensor = paddle_to(paddle_tensor, device) | ||||
@@ -2179,7 +2179,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin): | |||||
if padding is True: | if padding is True: | ||||
if verbose: | if verbose: | ||||
if max_length is not None and (truncation is False or truncation == "do_not_truncate"): | if max_length is not None and (truncation is False or truncation == "do_not_truncate"): | ||||
logger.warn( | |||||
logger.warning_once( | |||||
"`max_length` is ignored when `padding`=`True` and there is no truncation strategy. " | "`max_length` is ignored when `padding`=`True` and there is no truncation strategy. " | ||||
"To pad to max length, use `padding='max_length'`." | "To pad to max length, use `padding='max_length'`." | ||||
) | ) | ||||