From 4080f8071e96d4dbcc5ae8af10b051e14fea30ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8F=AD=E6=89=AC?= Date: Tue, 1 Nov 2022 12:57:04 +0800 Subject: [PATCH 1/5] temp --- modelscope/hub/api.py | 11 +++++++++++ modelscope/msdatasets/ms_dataset.py | 14 ++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index 7468e5e3..0262fc1d 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -646,6 +646,17 @@ class HubApi: def check_local_cookies(self, use_cookies) -> CookieJar: return self._check_cookie(use_cookies=use_cookies) + def count_uv_by_channel(self, dataset_name: str, namespace: str, channel: str): + # todo: 1. check args 2. + + url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/uv/{channel}' + cookies = ModelScopeConfig.get_cookies() + r = requests.post(url, cookies=cookies, headers=self.headers) + resp = r.json() + raise_on_error(resp) + print(resp) + return resp['Message'] + class ModelScopeConfig: path_credential = expanduser(DEFAULT_CREDENTIALS_PATH) diff --git a/modelscope/msdatasets/ms_dataset.py b/modelscope/msdatasets/ms_dataset.py index 0c537df7..a7d29990 100644 --- a/modelscope/msdatasets/ms_dataset.py +++ b/modelscope/msdatasets/ms_dataset.py @@ -727,3 +727,17 @@ class MsDataset: resp_msg = _delete_manager.delete(object_name=object_name) logger.info(f'Object {object_name} successfully removed!') return resp_msg + + +if __name__ == '__main__': + from modelscope.hub.api import HubApi + api = HubApi() + # api.login('c252d64a-ce7b-4c0c-b583-7bedf628c7da') # online + # api.login('aa14716f-e2de-4f26-bf49-254d81eb8ac6') # test + + channel = 'local' # dsw + dataset_name = 'small_coco_for_test' + namespace = 'wangxingjun778test' + resp = api.count_uv_by_channel( + dataset_name=dataset_name, namespace=namespace, channel=channel) + print(resp) From 84032f90e3f2b4a183725ceda16a4b1dc204c2f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8F=AD=E6=89=AC?= Date: Tue, 1 Nov 2022 15:34:58 +0800 Subject: [PATCH 2/5] add event tracking --- modelscope/hub/api.py | 20 ++++++++++++++------ modelscope/msdatasets/ms_dataset.py | 16 ++-------------- modelscope/utils/constant.py | 8 ++++++++ requirements/framework.txt | 2 +- 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/modelscope/hub/api.py b/modelscope/hub/api.py index 0262fc1d..f2ff822d 100644 --- a/modelscope/hub/api.py +++ b/modelscope/hub/api.py @@ -39,8 +39,8 @@ from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, DEFAULT_MODEL_REVISION, DEFAULT_REPOSITORY_REVISION, MASTER_MODEL_BRANCH, DatasetFormations, - DatasetMetaFormats, DownloadMode, - ModelFile) + DatasetMetaFormats, DownloadChannel, + DownloadMode, ModelFile) from modelscope.utils.logger import get_logger from .utils.utils import (get_endpoint, get_release_datetime, model_id_to_group_owner_name) @@ -646,15 +646,23 @@ class HubApi: def check_local_cookies(self, use_cookies) -> CookieJar: return self._check_cookie(use_cookies=use_cookies) - def count_uv_by_channel(self, dataset_name: str, namespace: str, channel: str): - # todo: 1. check args 2. + def dataset_download_uv(self, dataset_name: str, namespace: str): + if not dataset_name or not namespace: + raise ValueError('dataset_name or namespace cannot be empty!') - url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/uv/{channel}' + # get channel and user_name + channel = DownloadChannel.LOCAL.value + user_name = '' + if MODELSCOPE_ENVIRONMENT in os.environ: + channel = os.environ[MODELSCOPE_ENVIRONMENT] + if MODELSCOPE_USERNAME in os.environ: + user_name = os.environ[MODELSCOPE_USERNAME] + + url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/uv/{channel}?user={user_name}' cookies = ModelScopeConfig.get_cookies() r = requests.post(url, cookies=cookies, headers=self.headers) resp = r.json() raise_on_error(resp) - print(resp) return resp['Message'] diff --git a/modelscope/msdatasets/ms_dataset.py b/modelscope/msdatasets/ms_dataset.py index a7d29990..5c8ea59f 100644 --- a/modelscope/msdatasets/ms_dataset.py +++ b/modelscope/msdatasets/ms_dataset.py @@ -274,6 +274,8 @@ class MsDataset: try: api.on_dataset_download( dataset_name=download_dataset, namespace=namespace) + api.dataset_download_uv( + dataset_name=download_dataset, namespace=namespace) except Exception as e: logger.error(e) @@ -727,17 +729,3 @@ class MsDataset: resp_msg = _delete_manager.delete(object_name=object_name) logger.info(f'Object {object_name} successfully removed!') return resp_msg - - -if __name__ == '__main__': - from modelscope.hub.api import HubApi - api = HubApi() - # api.login('c252d64a-ce7b-4c0c-b583-7bedf628c7da') # online - # api.login('aa14716f-e2de-4f26-bf49-254d81eb8ac6') # test - - channel = 'local' # dsw - dataset_name = 'small_coco_for_test' - namespace = 'wangxingjun778test' - resp = api.count_uv_by_channel( - dataset_name=dataset_name, namespace=namespace, channel=channel) - print(resp) diff --git a/modelscope/utils/constant.py b/modelscope/utils/constant.py index 2729b75a..f0a97dbd 100644 --- a/modelscope/utils/constant.py +++ b/modelscope/utils/constant.py @@ -238,6 +238,14 @@ class DownloadMode(enum.Enum): FORCE_REDOWNLOAD = 'force_redownload' +class DownloadChannel(enum.Enum): + """ Channels of datasets downloading for uv/pv counting. + """ + LOCAL = 'local' + DSW = 'dsw' + EAIS = 'eais' + + class UploadMode(enum.Enum): """ How to upload object to remote. """ diff --git a/requirements/framework.txt b/requirements/framework.txt index 17fbd8a3..e78bc9a9 100644 --- a/requirements/framework.txt +++ b/requirements/framework.txt @@ -1,7 +1,7 @@ addict attrs # version beyond 2.6.0 introduces compatbility issue and is being resolved -datasets<=2.6.0 +datasets<=2.5.2 easydict einops filelock>=3.3.0 From 79c44a68102e182b3194e3b9e6244d4891859274 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8F=AD=E6=89=AC?= Date: Tue, 1 Nov 2022 15:41:01 +0800 Subject: [PATCH 3/5] add event tracking --- requirements/framework.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/framework.txt b/requirements/framework.txt index e78bc9a9..a86c0cc5 100644 --- a/requirements/framework.txt +++ b/requirements/framework.txt @@ -1,6 +1,6 @@ addict attrs -# version beyond 2.6.0 introduces compatbility issue and is being resolved +# version beyond 2.5.2 introduces compatbility issue and is being resolved datasets<=2.5.2 easydict einops From 63a08e7be68bce218eb6ca755ecbc821017d83b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8F=AD=E6=89=AC?= Date: Tue, 1 Nov 2022 15:49:21 +0800 Subject: [PATCH 4/5] add event tracking --- tests/msdatasets/test_dataset_upload.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/msdatasets/test_dataset_upload.py b/tests/msdatasets/test_dataset_upload.py index 3d35d480..b67c2ebb 100644 --- a/tests/msdatasets/test_dataset_upload.py +++ b/tests/msdatasets/test_dataset_upload.py @@ -104,7 +104,11 @@ class DatasetUploadTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_ds_download_dir(self): - test_ds = MsDataset.load(self.dataset_name, self.namespace) + from modelscope.utils.constant import DownloadMode + test_ds = MsDataset.load( + self.dataset_name, + namespace=self.namespace, + download_mode=DownloadMode.FORCE_REDOWNLOAD) assert test_ds.config_kwargs['split_config'].values() @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') From e45ab2c32d66a3ae8014be045d773719b82cb0cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8F=AD=E6=89=AC?= Date: Tue, 1 Nov 2022 15:51:00 +0800 Subject: [PATCH 5/5] add event tracking --- tests/msdatasets/test_dataset_upload.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/msdatasets/test_dataset_upload.py b/tests/msdatasets/test_dataset_upload.py index b67c2ebb..d91f24d7 100644 --- a/tests/msdatasets/test_dataset_upload.py +++ b/tests/msdatasets/test_dataset_upload.py @@ -8,7 +8,8 @@ import zipfile from modelscope.msdatasets import MsDataset from modelscope.msdatasets.utils.dataset_utils import list_dataset_objects from modelscope.utils import logger as logging -from modelscope.utils.constant import DEFAULT_DATASET_REVISION, ModelFile +from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, DownloadMode, + ModelFile) from modelscope.utils.test_utils import test_level logger = logging.get_logger(__name__) @@ -104,7 +105,6 @@ class DatasetUploadTest(unittest.TestCase): @unittest.skipUnless(test_level() >= 1, 'skip test in current test level') def test_ds_download_dir(self): - from modelscope.utils.constant import DownloadMode test_ds = MsDataset.load( self.dataset_name, namespace=self.namespace,