Browse Source

Merge branch dev/msdataset_event_tracking into master

Title: [to #42322933] add event tracking 

1. add event tracking for dataset downloading pv/uv
2. change datasets version: <=2.5.2
        Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/10593016
master
mulin.lyh 2 years ago
parent
commit
4429991646
5 changed files with 39 additions and 6 deletions
  1. +21
    -2
      modelscope/hub/api.py
  2. +2
    -0
      modelscope/msdatasets/ms_dataset.py
  3. +8
    -0
      modelscope/utils/constant.py
  4. +2
    -2
      requirements/framework.txt
  5. +6
    -2
      tests/msdatasets/test_dataset_upload.py

+ 21
- 2
modelscope/hub/api.py View File

@@ -39,8 +39,8 @@ from modelscope.utils.constant import (DEFAULT_DATASET_REVISION,
DEFAULT_MODEL_REVISION,
DEFAULT_REPOSITORY_REVISION,
MASTER_MODEL_BRANCH, DatasetFormations,
DatasetMetaFormats, DownloadMode,
ModelFile)
DatasetMetaFormats, DownloadChannel,
DownloadMode, ModelFile)
from modelscope.utils.logger import get_logger
from .utils.utils import (get_endpoint, get_release_datetime,
model_id_to_group_owner_name)
@@ -646,6 +646,25 @@ class HubApi:
def check_local_cookies(self, use_cookies) -> CookieJar:
return self._check_cookie(use_cookies=use_cookies)

def dataset_download_uv(self, dataset_name: str, namespace: str):
if not dataset_name or not namespace:
raise ValueError('dataset_name or namespace cannot be empty!')

# get channel and user_name
channel = DownloadChannel.LOCAL.value
user_name = ''
if MODELSCOPE_ENVIRONMENT in os.environ:
channel = os.environ[MODELSCOPE_ENVIRONMENT]
if MODELSCOPE_USERNAME in os.environ:
user_name = os.environ[MODELSCOPE_USERNAME]

url = f'{self.endpoint}/api/v1/datasets/{namespace}/{dataset_name}/download/uv/{channel}?user={user_name}'
cookies = ModelScopeConfig.get_cookies()
r = requests.post(url, cookies=cookies, headers=self.headers)
resp = r.json()
raise_on_error(resp)
return resp['Message']


class ModelScopeConfig:
path_credential = expanduser(DEFAULT_CREDENTIALS_PATH)


+ 2
- 0
modelscope/msdatasets/ms_dataset.py View File

@@ -274,6 +274,8 @@ class MsDataset:
try:
api.on_dataset_download(
dataset_name=download_dataset, namespace=namespace)
api.dataset_download_uv(
dataset_name=download_dataset, namespace=namespace)
except Exception as e:
logger.error(e)



+ 8
- 0
modelscope/utils/constant.py View File

@@ -238,6 +238,14 @@ class DownloadMode(enum.Enum):
FORCE_REDOWNLOAD = 'force_redownload'


class DownloadChannel(enum.Enum):
""" Channels of datasets downloading for uv/pv counting.
"""
LOCAL = 'local'
DSW = 'dsw'
EAIS = 'eais'


class UploadMode(enum.Enum):
""" How to upload object to remote.
"""


+ 2
- 2
requirements/framework.txt View File

@@ -1,7 +1,7 @@
addict
attrs
# version beyond 2.6.0 introduces compatbility issue and is being resolved
datasets<=2.6.0
# version beyond 2.5.2 introduces compatbility issue and is being resolved
datasets<=2.5.2
easydict
einops
filelock>=3.3.0


+ 6
- 2
tests/msdatasets/test_dataset_upload.py View File

@@ -8,7 +8,8 @@ import zipfile
from modelscope.msdatasets import MsDataset
from modelscope.msdatasets.utils.dataset_utils import list_dataset_objects
from modelscope.utils import logger as logging
from modelscope.utils.constant import DEFAULT_DATASET_REVISION, ModelFile
from modelscope.utils.constant import (DEFAULT_DATASET_REVISION, DownloadMode,
ModelFile)
from modelscope.utils.test_utils import test_level

logger = logging.get_logger(__name__)
@@ -104,7 +105,10 @@ class DatasetUploadTest(unittest.TestCase):

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')
def test_ds_download_dir(self):
test_ds = MsDataset.load(self.dataset_name, self.namespace)
test_ds = MsDataset.load(
self.dataset_name,
namespace=self.namespace,
download_mode=DownloadMode.FORCE_REDOWNLOAD)
assert test_ds.config_kwargs['split_config'].values()

@unittest.skipUnless(test_level() >= 1, 'skip test in current test level')


Loading…
Cancel
Save