Browse Source

[MNT] modify details and format code

tags/v0.3.2
Gene 2 years ago
parent
commit
edf6b76bf5
11 changed files with 21 additions and 44 deletions
  1. +0
    -5
      .github/workflows/install_learnware_with_pip.yaml
  2. +0
    -5
      .github/workflows/install_learnware_with_source.yaml
  3. +1
    -3
      examples/dataset_m5_workflow/main.py
  4. +1
    -3
      examples/dataset_pfs_workflow/main.py
  5. +6
    -4
      examples/dataset_text_workflow2/get_data.py
  6. +2
    -2
      examples/dataset_text_workflow2/main.py
  7. +8
    -8
      examples/dataset_text_workflow2/utils.py
  8. +1
    -1
      learnware/market/easy/checker.py
  9. +2
    -3
      learnware/specification/regular/table/rkme.py
  10. +0
    -9
      setup.py
  11. +0
    -1
      tests/test_learnware_client/test_check_learnware.py

+ 0
- 5
.github/workflows/install_learnware_with_pip.yaml View File

@@ -39,11 +39,6 @@ jobs:
conda run -n learnware python -m pip install --upgrade pip
conda run -n learnware python -m pip install pytest

- name: Install faiss for MacOS
if: ${{ matrix.os == 'macos-11' || matrix.os == 'macos-latest' }}
run: |
conda run -n learnware conda install -c pytorch faiss

- name: Install learnware
run: |
conda run -n learnware python -m pip install learnware


+ 0
- 5
.github/workflows/install_learnware_with_source.yaml View File

@@ -44,11 +44,6 @@ jobs:
# stop the build if there are Python syntax errors or undefined names
conda run -n learnware python -m flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics

- name: Install faiss for MacOS
if: ${{ matrix.os == 'macos-11' || matrix.os == 'macos-latest' }}
run: |
conda run -n learnware conda install -c pytorch faiss

- name: Install learnware
run: |
conda run -n learnware python -m pip install .


+ 1
- 3
examples/dataset_m5_workflow/main.py View File

@@ -8,7 +8,6 @@ from shutil import copyfile, rmtree

import learnware
from learnware.market import instantiate_learnware_market, BaseUserInfo
# from learnware.market import database_ops
from learnware.reuse import JobSelectorReuser, AveragingReuser
from learnware.specification import generate_rkme_spec
from m5 import DataLoader
@@ -51,6 +50,7 @@ user_semantic = {
"Output": output_description,
}


class M5DatasetWorkflow:
def _init_m5_dataset(self):
m5 = DataLoader()
@@ -82,8 +82,6 @@ class M5DatasetWorkflow:
easy_market.add_learnware(zip_path, semantic_spec)

print("Total Item:", len(easy_market))
# curr_inds = easy_market._get_ids()
# print("Available ids:", curr_inds)

def prepare_learnware(self, regenerate_flag=False):
if regenerate_flag:


+ 1
- 3
examples/dataset_pfs_workflow/main.py View File

@@ -55,7 +55,7 @@ class PFSDatasetWorkflow:
pfs = Dataloader()
pfs.regenerate_data()

algo_list = ["ridge"] # "ridge", "lgb"
algo_list = ["ridge"] # "ridge", "lgb"
for algo in algo_list:
pfs.set_algo(algo)
pfs.retrain_models()
@@ -79,8 +79,6 @@ class PFSDatasetWorkflow:
easy_market.add_learnware(zip_path, semantic_spec)

print("Total Item:", len(easy_market))
# curr_inds = easy_market._get_ids()
# print("Available ids:", curr_inds)

def prepare_learnware(self, regenerate_flag=False):
if regenerate_flag:


+ 6
- 4
examples/dataset_text_workflow2/get_data.py View File

@@ -8,7 +8,9 @@ def get_data(data_root="./data"):
dtest = pd.read_csv(os.path.join(data_root, "test.csv"))

# returned X(DataFrame), y(Series)
return (dtrain[['discourse_text', 'discourse_type']],
dtrain["discourse_effectiveness"],
dtest[['discourse_text', 'discourse_type']],
dtest["discourse_effectiveness"])
return (
dtrain[["discourse_text", "discourse_type"]],
dtrain["discourse_effectiveness"],
dtest[["discourse_text", "discourse_type"]],
dtest["discourse_effectiveness"],
)

+ 2
- 2
examples/dataset_text_workflow2/main.py View File

@@ -78,10 +78,10 @@ def prepare_model():
modelv_save_path = os.path.join(model_save_root, "uploader_v_%d.pth" % (i))
modell_save_path = os.path.join(model_save_root, "uploader_l_%d.pth" % (i))

with open(modelv_save_path, 'wb') as f:
with open(modelv_save_path, "wb") as f:
pickle.dump(vectorizer, f)

with open(modell_save_path, 'wb') as f:
with open(modell_save_path, "wb") as f:
pickle.dump(lgbm, f)

logger.info("Model saved to '%s' and '%s'" % (modelv_save_path, modell_save_path))


+ 8
- 8
examples/dataset_text_workflow2/utils.py View File

@@ -39,11 +39,11 @@ def generate_uploader(data_x: pd.Series, data_y: pd.Series, n_uploaders=50, data
return
os.makedirs(data_save_root, exist_ok=True)

types = data_x['discourse_type'].unique()
types = data_x["discourse_type"].unique()

for i in range(n_uploaders):
indices = data_x['discourse_type'] == types[i]
selected_X = data_x[indices]['discourse_text'].to_list()
indices = data_x["discourse_type"] == types[i]
selected_X = data_x[indices]["discourse_text"].to_list()
selected_y = data_y[indices].to_list()

X_save_dir = os.path.join(data_save_root, "uploader_%d_X.pkl" % (i))
@@ -61,11 +61,11 @@ def generate_user(data_x, data_y, n_users=50, data_save_root=None):
return
os.makedirs(data_save_root, exist_ok=True)

types = data_x['discourse_type'].unique()
types = data_x["discourse_type"].unique()

for i in range(n_users):
indices = data_x['discourse_type'] == types[i]
selected_X = data_x[indices]['discourse_text'].to_list()
indices = data_x["discourse_type"] == types[i]
selected_X = data_x[indices]["discourse_text"].to_list()
selected_y = data_y[indices].to_list()

X_save_dir = os.path.join(data_save_root, "user_%d_X.pkl" % (i))
@@ -80,10 +80,10 @@ def generate_user(data_x, data_y, n_users=50, data_save_root=None):

# Train Uploaders' models
def train(X, y, out_classes):
vectorizer = TfidfVectorizer(stop_words='english')
vectorizer = TfidfVectorizer(stop_words="english")
X_tfidf = vectorizer.fit_transform(X)

lgbm = LGBMClassifier(boosting_type='dart', n_estimators=500, num_leaves=21)
lgbm = LGBMClassifier(boosting_type="dart", n_estimators=500, num_leaves=21)
lgbm.fit(X_tfidf, y)

return vectorizer, lgbm


+ 1
- 1
learnware/market/easy/checker.py View File

@@ -116,7 +116,7 @@ class EasyStatChecker(BaseChecker):
elif spec_type == "RKMEImageSpecification":
inputs = np.random.randint(0, 255, size=(10, *input_shape))
else:
raise ValueError(f"not supported spec type for spec_type = {spec_type}")
raise ValueError(f"not supported spec type for spec_type = {spec_type}")

# Check output
try:


+ 2
- 3
learnware/specification/regular/table/rkme.py View File

@@ -1,14 +1,12 @@
from __future__ import annotations

import os

import copy
import torch
import json
import codecs
import random
import numpy as np
# from cvxopt import solvers, matrix
from qpsolvers import solve_qp, Problem, solve_problem
from collections import Counter
from typing import Tuple, Any, List, Union, Dict
@@ -20,6 +18,7 @@ from ....logger import get_module_logger

logger = get_module_logger("rkme")


class RKMETableSpecification(RegularStatsSpecification):
"""Reduced Kernel Mean Embedding (RKME) Specification"""

@@ -137,7 +136,7 @@ class RKMETableSpecification(RegularStatsSpecification):
Size of the construced reduced set.
"""
X = X.astype("float32")
kmeans = MiniBatchKMeans(n_clusters=K, max_iter=100, verbose=False, n_init='auto')
kmeans = MiniBatchKMeans(n_clusters=K, max_iter=100, verbose=False, n_init="auto")
kmeans.fit(X)
center = torch.from_numpy(kmeans.cluster_centers_).double()
self.z = center


+ 0
- 9
setup.py View File

@@ -54,12 +54,6 @@ REQUIRED = [
"numpy>=1.20.0",
"pandas>=0.25.1",
"scipy>=1.0.0",
<<<<<<< HEAD
"matplotlib>=3.1.3",
"torch>=1.11.0",
=======
"cvxopt>=1.3.0",
>>>>>>> 93df27b2a16a169ecfb93e3a2e149b6c1ea56902
"tqdm>=4.65.0",
"scikit-learn>=0.22",
"joblib>=1.2.0",
@@ -76,9 +70,6 @@ REQUIRED = [
"qpsolvers[clarabel]>=4.0.1",
]

# if get_platform() != MACOS:
# REQUIRED.append("faiss-cpu>=1.7.1")

here = os.path.abspath(os.path.dirname(__file__))
with open(os.path.join(here, "README.md"), encoding="utf-8") as f:
long_description = f.read()


+ 0
- 1
tests/test_learnware_client/test_check_learnware.py View File

@@ -32,4 +32,3 @@ class TestCheckLearnware(unittest.TestCase):

if __name__ == "__main__":
unittest.main()


Loading…
Cancel
Save