You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

config.py 17 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732
  1. from learnware.tests.benchmarks import BenchmarkConfig
  2. homo_n_labeled_list = [100, 200, 500, 1000, 2000, 4000, 6000, 8000, 10000]
  3. homo_n_repeat_list = [10, 10, 10, 3, 3, 3, 3, 3, 3]
  4. hetero_n_labeled_list = [10, 30, 50, 75, 100, 200, 500, 1000, 2000]
  5. hetero_n_repeat_list = [10, 10, 10, 10, 10, 10, 3, 3, 3]
  6. user_semantic = {
  7. "Data": {"Values": ["Table"], "Type": "Class"},
  8. "Task": {"Values": ["Regression"], "Type": "Class"},
  9. "Library": {"Values": ["Others"], "Type": "Class"},
  10. "Scenario": {"Values": ["Business"], "Type": "Tag"},
  11. "Description": {"Values": "", "Type": "String"},
  12. "Name": {"Values": "", "Type": "String"},
  13. }
  14. styles = {
  15. "user_model": {"color": "navy", "marker": "o", "linestyle": "-"},
  16. "select_score": {"color": "gold", "marker": "s", "linestyle": "--"},
  17. "oracle_score": {"color": "darkorange", "marker": "^", "linestyle": "-."},
  18. "mean_score": {"color": "gray", "marker": "x", "linestyle": ":"},
  19. "single_aug": {"color": "gold", "marker": "s", "linestyle": "--"},
  20. "multiple_avg": {"color": "blue", "marker": "*", "linestyle": "-"},
  21. "multiple_aug": {"color": "purple", "marker": "d", "linestyle": "--"},
  22. "ensemble_pruning": {"color": "magenta", "marker": "d", "linestyle": "-."},
  23. }
  24. labels = {
  25. "user_model": "User Model",
  26. "single_aug": "Single Learnware Reuse (FeatAug)",
  27. "select_score": "Single Learnware Reuse (FeatAug)",
  28. "multiple_aug": "Multiple Learnware Reuse (FeatAug)",
  29. "ensemble_pruning": "Multiple Learnware Reuse (EnsemblePrune)",
  30. "multiple_avg": "Multiple Learnware Reuse (Averaging)",
  31. }
  32. align_model_params = {
  33. "network_type": "ArbitraryMapping", # ["ArbitraryMapping", "BaseMapping", "BaseMapping_BN", "BaseMapping_Dropout"]
  34. "num_epoch": 50,
  35. "lr": 1e-5,
  36. "dropout_ratio": 0.2,
  37. "activation": "relu",
  38. "use_bn": True,
  39. "hidden_dims": [128, 256, 128, 256],
  40. }
  41. market_mapping_params = {
  42. "lr": 1e-4,
  43. "num_epoch": 50,
  44. "batch_size": 64,
  45. "num_partition": 2, # num of column partitions for pos/neg sampling
  46. "overlap_ratio": 0.7, # specify the overlap ratio of column partitions during the CL
  47. "hidden_dim": 256, # the dimension of hidden embeddings
  48. "num_layer": 6, # the number of transformer layers used in the encoder
  49. "num_attention_head": 8, # the numebr of heads of multihead self-attention layer in the transformers, should be divisible by hidden_dim
  50. "hidden_dropout_prob": 0.5, # the dropout ratio in the transformer encoder
  51. "ffn_dim": 512, # the dimension of feed-forward layer in the transformer layer
  52. "activation": "leakyrelu",
  53. }
  54. user_model_params = {
  55. "M5": {
  56. "lgb": {
  57. "params": {
  58. "boosting_type": "gbdt",
  59. "objective": "rmse",
  60. "metric": "rmse",
  61. "learning_rate": 0.015,
  62. "num_leaves": 300,
  63. "max_depth": 500,
  64. "n_estimators": 100000,
  65. "boost_from_average": False,
  66. "num_threads": 32,
  67. "verbose": -1,
  68. },
  69. "MAX_ROUNDS": 1000,
  70. "early_stopping_rounds": 1000,
  71. }
  72. },
  73. "PFS": {
  74. "lgb": {
  75. "params": {
  76. "boosting_type": "gbdt",
  77. "num_leaves": 2**7 - 1,
  78. "learning_rate": 0.01,
  79. "objective": "rmse",
  80. "metric": "rmse",
  81. "feature_fraction": 0.75,
  82. "bagging_fraction": 0.75,
  83. "bagging_freq": 5,
  84. "seed": 1,
  85. "verbose": -100,
  86. "n_estimators": 100000,
  87. },
  88. "MAX_ROUNDS": 1000,
  89. "early_stopping_rounds": 1000,
  90. }
  91. },
  92. "PFS_HOMO": {
  93. "lgb": {
  94. "params": {
  95. "boosting_type": "gbdt",
  96. "num_leaves": 2**7 - 1,
  97. "learning_rate": 0.01,
  98. "objective": "rmse",
  99. "metric": "rmse",
  100. "feature_fraction": 0.75,
  101. "bagging_fraction": 0.75,
  102. "bagging_freq": 5,
  103. "seed": 1,
  104. "verbose": -100,
  105. "n_estimators": 100000,
  106. },
  107. "MAX_ROUNDS": 1000,
  108. "early_stopping_rounds": 1000,
  109. }
  110. },
  111. }
  112. homo_table_benchmark_config = BenchmarkConfig(
  113. name="PFS_HOMO",
  114. user_num=53,
  115. learnware_ids=[
  116. "00002265",
  117. "00002266",
  118. "00002267",
  119. "00002268",
  120. "00002269",
  121. "00002270",
  122. "00002271",
  123. "00002272",
  124. "00002273",
  125. "00002274",
  126. "00002275",
  127. "00002276",
  128. "00002277",
  129. "00002278",
  130. "00002279",
  131. "00002280",
  132. "00002281",
  133. "00002282",
  134. "00002283",
  135. "00002284",
  136. "00002285",
  137. "00002286",
  138. "00002287",
  139. "00002288",
  140. "00002289",
  141. "00002290",
  142. "00002291",
  143. "00002292",
  144. "00002293",
  145. "00002294",
  146. "00002295",
  147. "00002296",
  148. "00002297",
  149. "00002298",
  150. "00002299",
  151. "00002300",
  152. "00002301",
  153. "00002302",
  154. "00002303",
  155. "00002304",
  156. "00002305",
  157. "00002306",
  158. "00002307",
  159. "00002308",
  160. "00002309",
  161. "00002310",
  162. "00002311",
  163. "00002312",
  164. "00002313",
  165. "00002314",
  166. "00002315",
  167. "00002316",
  168. "00002317",
  169. ],
  170. test_data_path="PFS_HOMO/test_data.zip",
  171. train_data_path="PFS_HOMO/train_data.zip",
  172. extra_info_path="PFS_HOMO/extra_info.zip",
  173. )
  174. hetero_cross_feat_eng_benchmark_config = BenchmarkConfig(
  175. name="PFS",
  176. user_num=41,
  177. learnware_ids=[
  178. "00000342",
  179. "00000343",
  180. "00000344",
  181. "00000345",
  182. "00000346",
  183. "00000347",
  184. "00000348",
  185. "00000349",
  186. "00000350",
  187. "00000351",
  188. "00000352",
  189. "00000353",
  190. "00000354",
  191. "00000355",
  192. "00000356",
  193. "00000357",
  194. "00000358",
  195. "00000359",
  196. "00000360",
  197. "00000361",
  198. "00000362",
  199. "00000363",
  200. "00000364",
  201. "00000365",
  202. "00000366",
  203. "00000367",
  204. "00000368",
  205. "00000369",
  206. "00000370",
  207. "00000371",
  208. "00000372",
  209. "00000373",
  210. "00000374",
  211. "00000375",
  212. "00000376",
  213. "00000377",
  214. "00000378",
  215. "00000379",
  216. "00000380",
  217. "00000381",
  218. "00000382",
  219. "00000383",
  220. "00000384",
  221. "00000385",
  222. "00000386",
  223. "00000387",
  224. "00000388",
  225. "00000389",
  226. "00000390",
  227. "00000391",
  228. "00000392",
  229. "00000393",
  230. "00000394",
  231. "00000395",
  232. "00000396",
  233. "00000397",
  234. "00000398",
  235. "00000399",
  236. "00000400",
  237. "00000401",
  238. "00000402",
  239. "00000403",
  240. "00000404",
  241. "00000405",
  242. "00000406",
  243. "00000407",
  244. "00000408",
  245. "00000409",
  246. "00000410",
  247. "00000411",
  248. "00000412",
  249. "00000413",
  250. "00000414",
  251. "00000415",
  252. "00000416",
  253. "00000417",
  254. "00000418",
  255. "00000419",
  256. "00000420",
  257. "00000421",
  258. "00000422",
  259. "00000423",
  260. "00000424",
  261. "00000425",
  262. "00000426",
  263. "00000427",
  264. "00000428",
  265. "00000429",
  266. "00000430",
  267. "00000431",
  268. "00000432",
  269. "00000433",
  270. "00000434",
  271. "00000435",
  272. "00000436",
  273. "00000437",
  274. "00000438",
  275. "00000439",
  276. "00000440",
  277. "00000441",
  278. "00000442",
  279. "00000443",
  280. "00000444",
  281. "00000730",
  282. "00000731",
  283. "00000732",
  284. "00000733",
  285. "00000734",
  286. "00000735",
  287. "00000736",
  288. "00000737",
  289. "00000738",
  290. "00000739",
  291. "00000740",
  292. "00000741",
  293. "00000742",
  294. "00000743",
  295. "00000744",
  296. "00000745",
  297. "00000746",
  298. "00000747",
  299. "00000748",
  300. "00000749",
  301. "00000750",
  302. "00000751",
  303. "00000752",
  304. "00000753",
  305. "00000754",
  306. "00000755",
  307. "00000756",
  308. "00000757",
  309. "00000758",
  310. "00000759",
  311. "00000760",
  312. "00000761",
  313. "00000762",
  314. "00000763",
  315. "00000764",
  316. "00000765",
  317. "00000766",
  318. "00000767",
  319. "00000768",
  320. "00000769",
  321. "00000770",
  322. "00000771",
  323. "00000772",
  324. "00000773",
  325. "00000774",
  326. "00000775",
  327. "00000776",
  328. "00000777",
  329. "00000778",
  330. "00000779",
  331. "00000780",
  332. "00000781",
  333. "00000782",
  334. "00000783",
  335. "00000786",
  336. "00000787",
  337. "00000788",
  338. "00000789",
  339. "00000790",
  340. "00000791",
  341. "00000792",
  342. "00000793",
  343. "00000794",
  344. "00000795",
  345. "00000796",
  346. "00000797",
  347. "00000798",
  348. "00000799",
  349. "00000800",
  350. "00000801",
  351. "00000802",
  352. "00000803",
  353. "00000804",
  354. "00000805",
  355. "00000806",
  356. "00000807",
  357. "00000808",
  358. "00000809",
  359. "00000810",
  360. "00000811",
  361. "00000812",
  362. "00000813",
  363. "00000814",
  364. "00000815",
  365. "00000816",
  366. "00000817",
  367. "00000818",
  368. "00000819",
  369. "00000820",
  370. "00000821",
  371. "00000822",
  372. "00000823",
  373. "00000824",
  374. "00000825",
  375. "00000826",
  376. "00000827",
  377. "00000828",
  378. "00000829",
  379. "00000830",
  380. "00000831",
  381. "00000832",
  382. "00000833",
  383. "00000834",
  384. "00000835",
  385. "00000836",
  386. "00000837",
  387. "00000838",
  388. "00000839",
  389. "00000859",
  390. "00000860",
  391. "00000861",
  392. "00000862",
  393. "00000863",
  394. "00000864",
  395. "00000865",
  396. "00000866",
  397. "00000867",
  398. "00000868",
  399. "00000869",
  400. "00000870",
  401. "00000871",
  402. "00000872",
  403. "00000873",
  404. "00000874",
  405. "00000875",
  406. "00000876",
  407. "00000877",
  408. "00000878",
  409. "00000879",
  410. "00000880",
  411. "00000881",
  412. "00000882",
  413. "00000883",
  414. "00000884",
  415. "00000885",
  416. "00000886",
  417. "00000887",
  418. "00000888",
  419. "00000889",
  420. "00000890",
  421. "00000891",
  422. "00000892",
  423. "00000893",
  424. "00000894",
  425. "00000895",
  426. "00000896",
  427. "00000897",
  428. "00000898",
  429. "00000899",
  430. "00000900",
  431. "00000901",
  432. "00000902",
  433. "00000903",
  434. "00000904",
  435. "00000905",
  436. "00000906",
  437. "00000907",
  438. "00000908",
  439. "00000909",
  440. "00000910",
  441. "00000911",
  442. "00000912",
  443. ],
  444. test_data_path="PFS/test_data.zip",
  445. train_data_path="PFS/train_data.zip",
  446. extra_info_path="PFS/extra_info.zip",
  447. )
  448. hetero_cross_task_benchmark_config = BenchmarkConfig(
  449. name="M5",
  450. user_num=30,
  451. learnware_ids=[
  452. "00000342",
  453. "00000343",
  454. "00000344",
  455. "00000345",
  456. "00000346",
  457. "00000347",
  458. "00000348",
  459. "00000349",
  460. "00000350",
  461. "00000351",
  462. "00000352",
  463. "00000353",
  464. "00000354",
  465. "00000355",
  466. "00000356",
  467. "00000357",
  468. "00000358",
  469. "00000359",
  470. "00000360",
  471. "00000361",
  472. "00000362",
  473. "00000363",
  474. "00000364",
  475. "00000365",
  476. "00000366",
  477. "00000367",
  478. "00000368",
  479. "00000369",
  480. "00000370",
  481. "00000371",
  482. "00000372",
  483. "00000373",
  484. "00000374",
  485. "00000375",
  486. "00000376",
  487. "00000377",
  488. "00000378",
  489. "00000379",
  490. "00000380",
  491. "00000381",
  492. "00000382",
  493. "00000383",
  494. "00000384",
  495. "00000385",
  496. "00000386",
  497. "00000387",
  498. "00000388",
  499. "00000389",
  500. "00000390",
  501. "00000391",
  502. "00000392",
  503. "00000393",
  504. "00000394",
  505. "00000395",
  506. "00000396",
  507. "00000397",
  508. "00000398",
  509. "00000399",
  510. "00000400",
  511. "00000401",
  512. "00000402",
  513. "00000403",
  514. "00000404",
  515. "00000405",
  516. "00000406",
  517. "00000407",
  518. "00000408",
  519. "00000409",
  520. "00000410",
  521. "00000411",
  522. "00000412",
  523. "00000413",
  524. "00000414",
  525. "00000415",
  526. "00000416",
  527. "00000417",
  528. "00000418",
  529. "00000419",
  530. "00000420",
  531. "00000421",
  532. "00000422",
  533. "00000423",
  534. "00000424",
  535. "00000425",
  536. "00000426",
  537. "00000427",
  538. "00000428",
  539. "00000429",
  540. "00000430",
  541. "00000431",
  542. "00000432",
  543. "00000433",
  544. "00000434",
  545. "00000435",
  546. "00000436",
  547. "00000437",
  548. "00000438",
  549. "00000439",
  550. "00000440",
  551. "00000441",
  552. "00000442",
  553. "00000443",
  554. "00000444",
  555. "00000730",
  556. "00000731",
  557. "00000732",
  558. "00000733",
  559. "00000734",
  560. "00000735",
  561. "00000736",
  562. "00000737",
  563. "00000738",
  564. "00000739",
  565. "00000740",
  566. "00000741",
  567. "00000742",
  568. "00000743",
  569. "00000744",
  570. "00000745",
  571. "00000746",
  572. "00000747",
  573. "00000748",
  574. "00000749",
  575. "00000750",
  576. "00000751",
  577. "00000752",
  578. "00000753",
  579. "00000754",
  580. "00000755",
  581. "00000756",
  582. "00000757",
  583. "00000758",
  584. "00000759",
  585. "00000760",
  586. "00000761",
  587. "00000762",
  588. "00000763",
  589. "00000764",
  590. "00000765",
  591. "00000766",
  592. "00000767",
  593. "00000768",
  594. "00000769",
  595. "00000770",
  596. "00000771",
  597. "00000772",
  598. "00000773",
  599. "00000774",
  600. "00000775",
  601. "00000776",
  602. "00000777",
  603. "00000778",
  604. "00000779",
  605. "00000780",
  606. "00000781",
  607. "00000782",
  608. "00000783",
  609. "00000786",
  610. "00000787",
  611. "00000788",
  612. "00000789",
  613. "00000790",
  614. "00000791",
  615. "00000792",
  616. "00000793",
  617. "00000794",
  618. "00000795",
  619. "00000796",
  620. "00000797",
  621. "00000798",
  622. "00000799",
  623. "00000800",
  624. "00000801",
  625. "00000802",
  626. "00000803",
  627. "00000804",
  628. "00000805",
  629. "00000806",
  630. "00000807",
  631. "00000808",
  632. "00000809",
  633. "00000810",
  634. "00000811",
  635. "00000812",
  636. "00000813",
  637. "00000814",
  638. "00000815",
  639. "00000816",
  640. "00000817",
  641. "00000818",
  642. "00000819",
  643. "00000820",
  644. "00000821",
  645. "00000822",
  646. "00000823",
  647. "00000824",
  648. "00000825",
  649. "00000826",
  650. "00000827",
  651. "00000828",
  652. "00000829",
  653. "00000830",
  654. "00000831",
  655. "00000832",
  656. "00000833",
  657. "00000834",
  658. "00000835",
  659. "00000836",
  660. "00000837",
  661. "00000838",
  662. "00000839",
  663. "00000859",
  664. "00000860",
  665. "00000861",
  666. "00000862",
  667. "00000863",
  668. "00000864",
  669. "00000865",
  670. "00000866",
  671. "00000867",
  672. "00000868",
  673. "00000869",
  674. "00000870",
  675. "00000871",
  676. "00000872",
  677. "00000873",
  678. "00000874",
  679. "00000875",
  680. "00000876",
  681. "00000877",
  682. "00000878",
  683. "00000879",
  684. "00000880",
  685. "00000881",
  686. "00000882",
  687. "00000883",
  688. "00000884",
  689. "00000885",
  690. "00000886",
  691. "00000887",
  692. "00000888",
  693. "00000889",
  694. "00000890",
  695. "00000891",
  696. "00000892",
  697. "00000893",
  698. "00000894",
  699. "00000895",
  700. "00000896",
  701. "00000897",
  702. "00000898",
  703. "00000899",
  704. "00000900",
  705. "00000901",
  706. "00000902",
  707. "00000903",
  708. "00000904",
  709. "00000905",
  710. "00000906",
  711. "00000907",
  712. "00000908",
  713. "00000909",
  714. "00000910",
  715. "00000911",
  716. "00000912",
  717. ],
  718. test_data_path="M5/test_data.zip",
  719. train_data_path="M5/train_data.zip",
  720. extra_info_path="M5/extra_info.zip",
  721. )