From c4bfd6cceddec741bb6d5aeb22b8b236ed22bcd2 Mon Sep 17 00:00:00 2001 From: "wenmeng.zwm" Date: Tue, 31 May 2022 11:49:46 +0800 Subject: [PATCH] [to #41999503] refine doc and requirements for linux and mac 1. refine quick start and pipeline doc 2. remove tf pytorch easynlp from requirements 3. lazy import for torch and tensorflow 4. test successfully on linux and mac intel cpu 5. update api doc Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/8882373 --- docs/source/api/maas_lib.pipelines.audio.rst | 7 ++ docs/source/api/maas_lib.trainers.nlp.rst | 18 +++++ docs/source/api/maas_lib.trainers.rst | 34 ++++++++ docs/source/faq.md | 31 +++++++ docs/source/quick_start.md | 81 +++++++++++++------ docs/source/tutorials/pipeline.md | 19 +++-- maas_lib/models/__init__.py | 1 + .../nlp/sequence_classification_model.py | 2 +- maas_lib/pipelines/cv/image_matting.py | 7 +- maas_lib/version.py | 2 +- requirements/maas.txt | 2 + requirements/pipeline.txt | 8 +- requirements/runtime.txt | 3 +- 13 files changed, 171 insertions(+), 44 deletions(-) create mode 100644 docs/source/api/maas_lib.pipelines.audio.rst create mode 100644 docs/source/api/maas_lib.trainers.nlp.rst create mode 100644 docs/source/api/maas_lib.trainers.rst create mode 100644 docs/source/faq.md create mode 100644 requirements/maas.txt diff --git a/docs/source/api/maas_lib.pipelines.audio.rst b/docs/source/api/maas_lib.pipelines.audio.rst new file mode 100644 index 00000000..71e29b42 --- /dev/null +++ b/docs/source/api/maas_lib.pipelines.audio.rst @@ -0,0 +1,7 @@ +maas\_lib.pipelines.audio package +================================= + +.. automodule:: maas_lib.pipelines.audio + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/api/maas_lib.trainers.nlp.rst b/docs/source/api/maas_lib.trainers.nlp.rst new file mode 100644 index 00000000..71f484ca --- /dev/null +++ b/docs/source/api/maas_lib.trainers.nlp.rst @@ -0,0 +1,18 @@ +maas\_lib.trainers.nlp package +============================== + +.. automodule:: maas_lib.trainers.nlp + :members: + :undoc-members: + :show-inheritance: + +Submodules +---------- + +maas\_lib.trainers.nlp.sequence\_classification\_trainer module +--------------------------------------------------------------- + +.. automodule:: maas_lib.trainers.nlp.sequence_classification_trainer + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/api/maas_lib.trainers.rst b/docs/source/api/maas_lib.trainers.rst new file mode 100644 index 00000000..eb90ee4f --- /dev/null +++ b/docs/source/api/maas_lib.trainers.rst @@ -0,0 +1,34 @@ +maas\_lib.trainers package +========================== + +.. automodule:: maas_lib.trainers + :members: + :undoc-members: + :show-inheritance: + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + maas_lib.trainers.nlp + +Submodules +---------- + +maas\_lib.trainers.base module +------------------------------ + +.. automodule:: maas_lib.trainers.base + :members: + :undoc-members: + :show-inheritance: + +maas\_lib.trainers.builder module +--------------------------------- + +.. automodule:: maas_lib.trainers.builder + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/faq.md b/docs/source/faq.md new file mode 100644 index 00000000..a93fafdc --- /dev/null +++ b/docs/source/faq.md @@ -0,0 +1,31 @@ +# 常见问题 + + + +### 1. macOS环境pip方式安装tokenizers报错 + +对于tokenizers库, pypi上缺乏针对`macOS`环境预编译包,需要搭建源码编译环境后才能正确安装,步骤如下: + +1. 安装rust + ```shell + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh + pip install setuptools_rust + + ``` + +2. 更新rust环境变量 + + ```shell + source $HOME/.cargo/env + ``` +3. 安装tokenziers + ```shell + pip install tokenziers + ``` +reference: [https://huggingface.co/docs/tokenizers/installation#installation-from-sources](https://huggingface.co/docs/tokenizers/installation#installation-from-sources) + +### 2. pip 安装包冲突 + +> ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts. + +由于依赖库之间的版本不兼容,可能会存在版本冲突的情况,大部分情况下不影响正常运行。 diff --git a/docs/source/quick_start.md b/docs/source/quick_start.md index 3b483081..de5f8da8 100644 --- a/docs/source/quick_start.md +++ b/docs/source/quick_start.md @@ -1,17 +1,53 @@ # 快速开始 -## 环境准备 +## python环境配置 +首先,参考[文档](https://docs.anaconda.com/anaconda/install/) 安装配置Anaconda环境 -方式一: whl包安装, 执行如下命令 +安装完成后,执行如下命令为maas library创建对应的python环境。 ```shell -pip install http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/release/maas_lib-0.1.0-py3-none-any.whl +conda create -n maas python=3.6 +conda activate maas ``` +检查python和pip命令是否切换到conda环境下。 +```shell +which python +# ~/workspace/anaconda3/envs/maas/bin/python + +which pip +# ~/workspace/anaconda3/envs/maas/bin/pip +``` +注: 本项目只支持`python3`环境,请勿使用python2环境。 + +## 第三方依赖安装 + +MaaS Library支持tensorflow,pytorch两大深度学习框架进行模型训练、推理, 在Python 3.6+, Pytorch 1.8+, Tensorflow 2.6上测试可运行,用户可以根据所选模型对应的计算框架进行安装,可以参考如下链接进行安装所需框架: + +* [Pytorch安装指导](https://pytorch.org/get-started/locally/) +* [Tensorflow安装指导](https://www.tensorflow.org/install/pip) + -方式二: 源码环境指定, 适合本地开发调试使用,修改源码后可以直接执行 +## MaaS library 安装 + +注: 如果在安装过程中遇到错误,请前往[常见问题](faq.md)查找解决方案。 + +### pip安装 +```shell +pip install -r http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/release/maas/maas.txt +``` + +安装成功后,可以执行如下命令进行验证安装是否正确 +```shell +python -c "from maas_lib.pipelines import pipeline;print(pipeline('image-matting',model='damo/image-matting-person')('http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png'))" +``` + + +### 使用源码 + +适合本地开发调试使用,修改源码后可以直接执行 ```shell git clone git@gitlab.alibaba-inc.com:Ali-MaaS/MaaS-lib.git maaslib -git fetch origin release/0.1 -git checkout release/0.1 +git fetch origin master +git checkout master cd maaslib @@ -22,7 +58,11 @@ pip install -r requirements.txt export PYTHONPATH=`pwd` ``` -备注: mac arm cpu暂时由于依赖包版本问题会导致requirements暂时无法安装,请使用mac intel cpu, linux cpu/gpu机器测试。 +安装成功后,可以执行如下命令进行验证安装是否正确 +```shell +python -c "from maas_lib.pipelines import pipeline;print(pipeline('image-matting',model='damo/image-matting-person')('http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png'))" +``` + ## 训练 @@ -34,31 +74,22 @@ to be done to be done ## 推理 -to be done - +print(f'result file path is {osp.abspath("result.png")}') +``` diff --git a/docs/source/tutorials/pipeline.md b/docs/source/tutorials/pipeline.md index ad73c773..a91d15bd 100644 --- a/docs/source/tutorials/pipeline.md +++ b/docs/source/tutorials/pipeline.md @@ -27,7 +27,7 @@ 执行如下python代码 ```python >>> from maas_lib.pipelines import pipeline - >>> img_matting = pipeline(task='image-matting', model_path='matting_person.pb') + >>> img_matting = pipeline(task='image-matting', model='damo/image-matting-person') ``` 2. 传入单张图像url进行处理 @@ -35,6 +35,8 @@ >>> import cv2 >>> result = img_matting('http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/data/test/maas/image_matting/test.png') >>> cv2.imwrite('result.png', result['output_png']) + >>> import os.path as osp + >>> print(f'result file path is {osp.abspath("result.png")}') ``` pipeline对象也支持传入一个列表输入,返回对应输出列表,每个元素对应输入样本的返回结果 @@ -57,10 +59,12 @@ pipeline函数支持传入实例化的预处理对象、模型对象,从而支持用户在推理过程中定制化预处理、模型。 下面以文本情感分类为例进行介绍。 +由于demo模型为EasyNLP提供的模型,首先,安装EasyNLP +```shell +pip install https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com/release/package/whl/easynlp-0.0.4-py2.py3-none-any.whl +``` -注: 当前release版本还未实现AutoModel的语法糖,需要手动实例化模型,后续会加上对应语法糖简化调用 - 下载模型文件 ```shell wget https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com/release/easynlp_modelzoo/alibaba-pai/bert-base-sst2.zip && unzip bert-base-sst2.zip @@ -68,18 +72,17 @@ wget https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com/release/easynlp_modelz 创建tokenizer和模型 ```python ->>> from maas_lib.models.nlp import SequenceClassificationModel ->>> path = 'bert-base-sst2' ->>> model = SequenceClassificationModel(path) +>>> from maas_lib.models import Model >>> from maas_lib.preprocessors import SequenceClassificationPreprocessor +>>> model = Model.from_pretrained('damo/bert-base-sst2') >>> tokenizer = SequenceClassificationPreprocessor( - path, first_sequence='sentence', second_sequence=None) + model.model_dir, first_sequence='sentence', second_sequence=None) ``` 使用tokenizer和模型对象创建pipeline ```python >>> from maas_lib.pipelines import pipeline ->>> semantic_cls = pipeline('text-classification', model=model, preprocessor=tokenizer) +>>> semantic_cls = pipeline('text-classification', model=model, preprocessor=tokenizer) >>> semantic_cls("Hello world!") ``` diff --git a/maas_lib/models/__init__.py b/maas_lib/models/__init__.py index f1ba8980..aa1b3f14 100644 --- a/maas_lib/models/__init__.py +++ b/maas_lib/models/__init__.py @@ -2,3 +2,4 @@ from .base import Model from .builder import MODELS, build_model +from .nlp import SequenceClassificationModel diff --git a/maas_lib/models/nlp/sequence_classification_model.py b/maas_lib/models/nlp/sequence_classification_model.py index dbb86105..d29587a0 100644 --- a/maas_lib/models/nlp/sequence_classification_model.py +++ b/maas_lib/models/nlp/sequence_classification_model.py @@ -1,7 +1,6 @@ from typing import Any, Dict, Optional, Union import numpy as np -import torch from maas_lib.utils.constant import Tasks from ..base import Model @@ -26,6 +25,7 @@ class SequenceClassificationModel(Model): super().__init__(model_dir, *args, **kwargs) from easynlp.appzoo import SequenceClassification from easynlp.core.predictor import get_model_predictor + import torch self.model = get_model_predictor( model_dir=self.model_dir, model_cls=SequenceClassification, diff --git a/maas_lib/pipelines/cv/image_matting.py b/maas_lib/pipelines/cv/image_matting.py index 73796552..fdb443f9 100644 --- a/maas_lib/pipelines/cv/image_matting.py +++ b/maas_lib/pipelines/cv/image_matting.py @@ -4,7 +4,6 @@ from typing import Any, Dict, List, Tuple, Union import cv2 import numpy as np import PIL -import tensorflow as tf from cv2 import COLOR_GRAY2RGB from maas_lib.pipelines.base import Input @@ -14,9 +13,6 @@ from maas_lib.utils.logger import get_logger from ..base import Pipeline from ..builder import PIPELINES -if tf.__version__ >= '2.0': - tf = tf.compat.v1 - logger = get_logger() @@ -26,6 +22,9 @@ class ImageMatting(Pipeline): def __init__(self, model: str): super().__init__(model=model) + import tensorflow as tf + if tf.__version__ >= '2.0': + tf = tf.compat.v1 model_path = osp.join(self.model, 'matting_person.pb') config = tf.ConfigProto(allow_soft_placement=True) diff --git a/maas_lib/version.py b/maas_lib/version.py index b794fd40..df9144c5 100644 --- a/maas_lib/version.py +++ b/maas_lib/version.py @@ -1 +1 @@ -__version__ = '0.1.0' +__version__ = '0.1.1' diff --git a/requirements/maas.txt b/requirements/maas.txt new file mode 100644 index 00000000..3b64c375 --- /dev/null +++ b/requirements/maas.txt @@ -0,0 +1,2 @@ +http://pai-vision-data-hz.oss-cn-zhangjiakou.aliyuncs.com/release/maas/maas_lib-0.1.1-py3-none-any.whl +https://maashub.oss-cn-hangzhou.aliyuncs.com/releases/maas_hub-0.1.0.dev0-py2.py3-none-any.whl diff --git a/requirements/pipeline.txt b/requirements/pipeline.txt index 259bbb1b..64500a6b 100644 --- a/requirements/pipeline.txt +++ b/requirements/pipeline.txt @@ -1,6 +1,6 @@ #https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com/release/package/whl/easynlp-0.0.4-py2.py3-none-any.whl -tensorflow +# tensorflow #--find-links https://download.pytorch.org/whl/torch_stable.html -torch<1.10,>=1.8.0 -torchaudio -torchvision +# torch<1.10,>=1.8.0 +# torchaudio +# torchvision diff --git a/requirements/runtime.txt b/requirements/runtime.txt index 303a084c..8f74e780 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -5,5 +5,6 @@ opencv-python-headless Pillow pyyaml requests -transformers +tokenizers<=0.10.3 +transformers<=4.16.2 yapf