@@ -0,0 +1,160 @@ | |||||
import cv2 | |||||
import os | |||||
import numpy as np | |||||
import shutil | |||||
import argparse | |||||
def parse_args(): | |||||
parser = argparse.ArgumentParser(description='get AOI class') | |||||
parser.add_argument('--AOI_path', help='AOI path') | |||||
parser.add_argument('--coco_path', help='coco path') | |||||
parser.add_argument('--classes_file', help='classes file path') | |||||
args = parser.parse_args() | |||||
return args | |||||
def setDir(filepath): | |||||
''' | |||||
如果文件夹不存在就创建,如果文件存在就清空! | |||||
:param filepath:需要创建的文件夹路径 | |||||
:return: | |||||
''' | |||||
if not os.path.exists(filepath): | |||||
os.mkdir(filepath) | |||||
else: | |||||
shutil.rmtree(filepath,ignore_errors=True) | |||||
# os.mkdir(filepath) | |||||
args = parse_args() | |||||
path = args.AOI_path+"/ng/" | |||||
coco_path = args.coco_path | |||||
coco_ann_path = args.coco_path+"/annotations/" | |||||
coco_img_path = args.coco_path+"/images/" | |||||
labels_path = args.coco_path+"/labels/" | |||||
classes_file = args.classes_file | |||||
setDir(coco_path) | |||||
setDir(coco_ann_path) | |||||
setDir(coco_img_path) | |||||
setDir(labels_path) | |||||
def order_points(pts): | |||||
# pts为轮廓坐标 | |||||
# 列表中存储元素分别为左上角,右上角,右下角和左下角 | |||||
rect = np.zeros((4, 2), dtype = "float32") | |||||
# 左上角的点具有最小的和,而右下角的点具有最大的和 | |||||
s = pts.sum(axis = 1) | |||||
rect[0] = pts[np.argmin(s)] | |||||
rect[2] = pts[np.argmax(s)] | |||||
# 计算点之间的差值 | |||||
# 右上角的点具有最小的差值, | |||||
# 左下角的点具有最大的差值 | |||||
diff = np.diff(pts, axis = 1) | |||||
rect[1] = pts[np.argmin(diff)] | |||||
rect[3] = pts[np.argmax(diff)] | |||||
# 返回排序坐标(依次为左上右上右下左下) | |||||
return rect | |||||
def angle_cos(p0, p1, p2): | |||||
d1, d2 = (p0-p1).astype('float'), (p2-p1).astype('float') | |||||
return abs( np.dot(d1, d2) / np.sqrt( np.dot(d1, d1)*np.dot(d2, d2) ) ) | |||||
def cv_rename(file_path = ""): | |||||
file_path_gbk = file_path.encode('gbk') | |||||
return file_path_gbk.decode() | |||||
imgs = os.listdir(path) | |||||
i = 0 | |||||
class_AOI_name = {"bu_pi_pei":"1","fang_xiang_fan":"2","err.txt_c_not_f":"3"} | |||||
class_name = {"yi_wei":"1","lou_jian":"2","ce_li":"3","li_bei":"4","shang_xia_fan_t":"5","lian_xi":"6","duo_jian":"7","sun_huai":"8","shao_xi":"9","jia_han":"10","yi_wu":"11",\ | |||||
"移位_Component_":"1","缺件_Component_":"2","侧立_Stand_Up":"3","立碑_Tombstone":"4","翻贴_Upside_Dow":"5","连锡_Solder_Bri":"6","Solderbridge":"6",\ | |||||
"损坏_Bad_Compon":"8","少锡_Insufficie":"9","假焊_Pseudo_Sol":"10", "qi_ta": "10"} | |||||
count = 0 | |||||
count_qita = 0 | |||||
for img in imgs: | |||||
i = i+1 | |||||
#print(img) | |||||
img_name = img.split("@") | |||||
#print(img_name[2], img_name[3]) | |||||
src = cv2.imdecode(np.fromfile(path+img, dtype=np.uint8), cv2.IMREAD_COLOR)# 默认的彩色图(IMREAD_COLOR)方式读入原始图像 | |||||
mask = cv2.imdecode(np.fromfile(path+img, dtype=np.uint8), 0) # 灰度图(IMREAD_GRAYSCALE)方式读入水印蒙版图像 | |||||
mask = np.where(mask<240, 239, mask) | |||||
mask = (np.divide(mask-239, 16)*255) | |||||
mask = np.where(mask<60, 0, mask) | |||||
mask = np.uint8(mask) | |||||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(3,3)) | |||||
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel,iterations=3) | |||||
contours, hierarchy = cv2.findContours(mask,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE) | |||||
#print(contours) | |||||
squares = [] | |||||
result = [] | |||||
k = 0 | |||||
for cnt in contours: | |||||
cnt_len = cv2.arcLength(cnt, True) #计算轮廓周长 | |||||
#cnt = cv2.approxPolyDP(cnt, 0.02*cnt_len, True) #多边形逼近 | |||||
# 条件判断逼近边的数量是否为4,轮廓面积是否大于1000,检测轮廓是否为凸的 | |||||
#print(len(cnt), cv2.contourArea(cnt), cv2.isContourConvex(cnt)) | |||||
if len(cnt) >= 4 and cv2.contourArea(cnt) > 30: | |||||
#print("***********************") | |||||
M = cv2.moments(cnt) #计算轮廓的矩 | |||||
cx = int(M['m10']/M['m00']) | |||||
cy = int(M['m01']/M['m00'])#轮廓重心 | |||||
cnt = cnt.reshape(-1, 2) | |||||
max_cos = np.max([angle_cos( cnt[i], cnt[(i+1) % 4], cnt[(i+2) % 4] ) for i in range(4)]) | |||||
# 只检测矩形(cos90° = 0) | |||||
#if max_cos < 0.3: | |||||
# 检测四边形(不限定角度范围) | |||||
if True: | |||||
#index = index + 1 | |||||
#cv2.putText(img,("#%d"%index),(cx,cy),font,0.7,(255,0,255),2) | |||||
if cv2.contourArea(cnt)>k: | |||||
k = cv2.contourArea(cnt) | |||||
result = cnt | |||||
squares.append(cnt) | |||||
mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR) | |||||
try: | |||||
rect = order_points(result) | |||||
except: | |||||
print(img) | |||||
os.remove(path+img) | |||||
continue | |||||
#print(rect) # 获取最大内接矩形的4个顶点坐标 | |||||
x1 = 1000 | |||||
y1 = 1000 | |||||
x2 = 0 | |||||
y2 = 0 | |||||
for xy in rect: | |||||
if x1>xy[0]: | |||||
x1 = xy[0] | |||||
if y1>xy[1]: | |||||
y1 = xy[1] | |||||
if x2<xy[0]: | |||||
x2 = xy[0] | |||||
if y2<xy[1]: | |||||
y2 = xy[1] | |||||
cv2.rectangle(src, (int(x1), int(y1)), (int(x2), int(y2)), (255, 255, 0), 3) | |||||
label_result = img+" "+str(int(x1))+","+str(int(y1))+","+str(int(x2))+","+str(int(y2))+","+class_name[img_name[3]] | |||||
#shutil.copy(path+img, coco_path+img) | |||||
#print(img, img[:-4]) | |||||
path_file_name = labels_path+img[:-4]+".txt" | |||||
if not os.path.exists(path_file_name): | |||||
with open(path_file_name, "a") as f: | |||||
f.write(label_result) | |||||
#break | |||||
print(class_name) | |||||
print(count, count_qita) | |||||
print("copying img") | |||||
ok_path = args.AOI_path+"/ok/" | |||||
ng_path = args.AOI_path+"/ng/" | |||||
imgs = os.listdir(ok_path) | |||||
for img in imgs: | |||||
shutil.copy(ok_path+img, coco_img_path+img) | |||||
imgs = os.listdir(ng_path) | |||||
for img in imgs: | |||||
shutil.copy(ng_path+img, coco_img_path+img) | |||||
shutil.copy(classes_file, coco_path+"classes.txt") |
@@ -0,0 +1,58 @@ | |||||
import cv2 | |||||
import os | |||||
import numpy as np | |||||
import shutil | |||||
import argparse | |||||
def parse_args(): | |||||
parser = argparse.ArgumentParser(description='get AOI class') | |||||
parser.add_argument('--AOI_path', help='AOI path') | |||||
args = parser.parse_args() | |||||
return args | |||||
args = parse_args() | |||||
#path = "D:/Work/20211119-dsxw/PCBA_dataset/dsxw_report/SMD12-09-Report/SMD12-Manual/img/ng/" | |||||
path = args.AOI_path+"/ng/" | |||||
print(path) | |||||
imgs = os.listdir(path) | |||||
i = 0 | |||||
class_AOI_name = {"bu_pi_pei":"1","fang_xiang_fan":"2","err.txt_c_not_f":"3"} | |||||
class_name = {"yi_wei":"1","lou_jian":"2","ce_li":"3","li_bei":"4","shang_xia_fan_t":"5","lian_xi":"6","duo_jian":"7","sun_huai":"8","shao_xi":"9","jia_han":"10","yi_wu":"11",\ | |||||
"移位_Component_":"1","缺件_Component_":"2","侧立_Stand_Up":"3","立碑_Tombstone":"4","翻贴_Upside_Dow":"5","连锡_Solder_Bri":"6","Solderbridge":"6",\ | |||||
"损坏_Bad_Compon":"8","少锡_Insufficie":"9","假焊_Pseudo_Sol":"10"} | |||||
class_name_other = {"上锡不良_Poor_S":"12","qita":"13","limit_error":"13","极性错_Wrong_Po":"13","其它_Others":"13"} | |||||
count = 0 | |||||
count_qita = 0 | |||||
bpp_ok = 0 | |||||
bpp_ng = 0 | |||||
count_ng = 0 | |||||
def cv_imread(file_path): | |||||
cv_img = cv2.imdecode(np.fromfile(file_path, dtype=np.uint8), 0) | |||||
return cv_img | |||||
for img in imgs: | |||||
i = i+1 | |||||
src = cv_imread(path+img) | |||||
#print(img) | |||||
img_name = img.split("@") | |||||
#print(img_name[2], img_name[3]) | |||||
if img_name[2] in class_AOI_name.keys(): | |||||
#print(img_name[2], img) | |||||
if img_name[3]=="Pseudo_Error": | |||||
bpp_ok += 1 | |||||
else: | |||||
bpp_ng += 1 | |||||
count += 1 | |||||
os.remove(path+img) | |||||
continue | |||||
if img_name[3] not in class_name.keys(): | |||||
print(img_name[3], img) | |||||
count_qita += 1 | |||||
os.remove(path+img) | |||||
continue | |||||
count_ng += 1 | |||||
print(count, bpp_ok, bpp_ng) | |||||
print(count_qita) | |||||
print(count_ng) |
@@ -0,0 +1,182 @@ | |||||
""" | |||||
YOLO 格式的数据集转化为 COCO 格式的数据集 | |||||
--root_dir 输入根路径 | |||||
--save_path 保存文件的名字(没有random_split时使用) | |||||
--random_split 有则会随机划分数据集,然后再分别保存为3个文件。 | |||||
--split_by_file 按照 ./train.txt ./val.txt ./test.txt 来对数据集进行划分。 | |||||
""" | |||||
import os | |||||
import cv2 | |||||
import json | |||||
from tqdm import tqdm | |||||
from sklearn.model_selection import train_test_split | |||||
import argparse | |||||
import numpy as np | |||||
parser = argparse.ArgumentParser() | |||||
parser.add_argument('--root_dir', default='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_11_12_hard_score_04/train',type=str, help="root path of images and labels, include ./images and ./labels and classes.txt") | |||||
parser.add_argument('--save_path', type=str,default='./train.json', help="if not split the dataset, give a path to a json file") | |||||
parser.add_argument('--random_split', action='store_true', help="random split the dataset, default ratio is 8:1:1") | |||||
parser.add_argument('--split_by_file', action='store_true', help="define how to split the dataset, include ./train.txt ./val.txt ./test.txt ") | |||||
arg = parser.parse_args() | |||||
def train_test_val_split_random(img_paths,ratio_train=0.8,ratio_test=0.1,ratio_val=0.1): | |||||
# 这里可以修改数据集划分的比例。 | |||||
assert int(ratio_train+ratio_test+ratio_val) == 1 | |||||
train_img, middle_img = train_test_split(img_paths,test_size=1-ratio_train, random_state=233) | |||||
ratio=ratio_val/(1-ratio_train) | |||||
val_img, test_img =train_test_split(middle_img,test_size=ratio, random_state=233) | |||||
print("NUMS of train:val:test = {}:{}:{}".format(len(train_img), len(val_img), len(test_img))) | |||||
return train_img, val_img, test_img | |||||
def train_test_val_split_by_files(img_paths, root_dir): | |||||
# 根据文件 train.txt, val.txt, test.txt(里面写的都是对应集合的图片名字) 来定义训练集、验证集和测试集 | |||||
phases = ['train', 'val', 'test'] | |||||
img_split = [] | |||||
for p in phases: | |||||
define_path = os.path.join(root_dir, f'{p}.txt') | |||||
print(f'Read {p} dataset definition from {define_path}') | |||||
assert os.path.exists(define_path) | |||||
with open(define_path, 'r') as f: | |||||
img_paths = f.readlines() | |||||
# img_paths = [os.path.split(img_path.strip())[1] for img_path in img_paths] # NOTE 取消这句备注可以读取绝对地址。 | |||||
img_split.append(img_paths) | |||||
return img_split[0], img_split[1], img_split[2] | |||||
def yolo2coco(arg): | |||||
root_path = arg.root_dir | |||||
print("Loading data from ",root_path) | |||||
assert os.path.exists(root_path) | |||||
originLabelsDir = os.path.join(root_path, 'labels') | |||||
originImagesDir = os.path.join(root_path, 'images') | |||||
with open(os.path.join(root_path, 'classes.txt')) as f: | |||||
classes = f.read().strip().split() | |||||
# images dir name | |||||
indexes = os.listdir(originImagesDir) | |||||
if arg.random_split or arg.split_by_file: | |||||
# 用于保存所有数据的图片信息和标注信息 | |||||
train_dataset = {'categories': [], 'annotations': [], 'images': []} | |||||
val_dataset = {'categories': [], 'annotations': [], 'images': []} | |||||
test_dataset = {'categories': [], 'annotations': [], 'images': []} | |||||
# 建立类别标签和数字id的对应关系, 类别id从0开始。 | |||||
for i, cls in enumerate(classes, 0): | |||||
train_dataset['categories'].append({'id': i, 'name': cls, 'supercategory': 'mark'}) | |||||
val_dataset['categories'].append({'id': i, 'name': cls, 'supercategory': 'mark'}) | |||||
test_dataset['categories'].append({'id': i, 'name': cls, 'supercategory': 'mark'}) | |||||
if arg.random_split: | |||||
print("spliting mode: random split") | |||||
train_img, val_img, test_img = train_test_val_split_random(indexes,0.8,0.1,0.1) | |||||
elif arg.split_by_file: | |||||
print("spliting mode: split by files") | |||||
train_img, val_img, test_img = train_test_val_split_by_files(indexes, root_path) | |||||
else: | |||||
dataset = {'categories': [], 'annotations': [], 'images': []} | |||||
for i, cls in enumerate(classes, 0): | |||||
dataset['categories'].append({'id': i, 'name': cls, 'supercategory': 'mark'}) | |||||
# 标注的id | |||||
ann_id_cnt = 0 | |||||
ans = 0 | |||||
for k, index in enumerate(tqdm(indexes)): | |||||
# 支持 png jpg 格式的图片。 | |||||
txtFile = index.replace('images','txt').replace('.jpg','.txt').replace('.png','.txt') | |||||
print(txtFile) | |||||
# 读取图像的宽和高 | |||||
#im = cv2.imread(os.path.join(root_path, 'images/') + index) | |||||
im = cv2.imdecode(np.fromfile(os.path.join(root_path, 'images/') + index, dtype=np.uint8), cv2.IMREAD_COLOR) | |||||
height, width, _ = im.shape | |||||
if arg.random_split or arg.split_by_file: | |||||
# 切换dataset的引用对象,从而划分数据集 | |||||
if index in train_img: | |||||
dataset = train_dataset | |||||
elif index in val_img: | |||||
dataset = val_dataset | |||||
elif index in test_img: | |||||
dataset = test_dataset | |||||
# 添加图像的信息 | |||||
dataset['images'].append({'file_name': index, | |||||
'id': k, | |||||
'width': width, | |||||
'height': height}) | |||||
if not os.path.exists(os.path.join(originLabelsDir, txtFile)): | |||||
# 如没标签,跳过,只保留图片信息。 | |||||
continue | |||||
with open(os.path.join(originLabelsDir, txtFile), 'r') as fr: | |||||
labelList = fr.readline() | |||||
labelList = labelList.strip().split(" ") | |||||
if len(labelList)==1: | |||||
continue | |||||
labelList = labelList[1:] | |||||
for label in labelList: | |||||
label = label.split(",") | |||||
cls_id = int(label[4]) - 1 | |||||
x1 = float(label[0]) | |||||
y1 = float(label[1]) | |||||
x2 = float(label[2]) | |||||
y2 = float(label[3]) | |||||
# convert x,y,w,h to x1,y1,x2,y2 | |||||
H, W, _ = im.shape | |||||
if x1<0: | |||||
x1 = 0 | |||||
elif x1>W: | |||||
x1 = W-1 | |||||
if x2<0: | |||||
x2 = 0 | |||||
elif x2>W: | |||||
x2 = W-1 | |||||
if y1<0: | |||||
y1 = 0 | |||||
elif y1>H: | |||||
y1 = H-1 | |||||
if y2<0: | |||||
y2 = 0 | |||||
elif y2>H: | |||||
y2 = H-1 | |||||
# 标签序号从0开始计算, coco2017数据集标号混乱,不管它了。 | |||||
width = max(0, x2 - x1) | |||||
height = max(0, y2 - y1) | |||||
dataset['annotations'].append({ | |||||
'area': width * height, | |||||
'bbox': [x1, y1, width, height], | |||||
'category_id': cls_id, | |||||
'id': ann_id_cnt, | |||||
'image_id': k, | |||||
'iscrowd': 0, | |||||
# mask, 矩形是从左上角点按顺时针的四个顶点 | |||||
'segmentation': [[x1, y1, x2, y1, x2, y2, x1, y2]] | |||||
}) | |||||
ann_id_cnt += 1 | |||||
# 保存结果 | |||||
print(ann_id_cnt) | |||||
folder = os.path.join(root_path, 'annotations') | |||||
if not os.path.exists(folder): | |||||
os.makedirs(folder) | |||||
if arg.random_split or arg.split_by_file: | |||||
for phase in ['train','val','test']: | |||||
json_name = os.path.join(root_path, 'annotations/{}.json'.format(phase)) | |||||
with open(json_name, 'w') as f: | |||||
if phase == 'train': | |||||
json.dump(train_dataset, f) | |||||
elif phase == 'val': | |||||
json.dump(val_dataset, f) | |||||
elif phase == 'test': | |||||
json.dump(test_dataset, f) | |||||
print('Save annotation to {}'.format(json_name)) | |||||
else: | |||||
json_name = os.path.join(root_path, 'annotations/{}'.format(arg.save_path)) | |||||
with open(json_name, 'w') as f: | |||||
json.dump(dataset, f) | |||||
print('Save annotation to {}'.format(json_name)) | |||||
if __name__ == "__main__": | |||||
yolo2coco(arg) |
@@ -0,0 +1,8 @@ | |||||
cff-version: 1.2.0 | |||||
message: "If you use this software, please cite it as below." | |||||
authors: | |||||
- name: "MMDetection Contributors" | |||||
title: "OpenMMLab Detection Toolbox and Benchmark" | |||||
date-released: 2018-08-22 | |||||
url: "https://github.com/open-mmlab/mmdetection" | |||||
license: Apache-2.0 |
@@ -0,0 +1,203 @@ | |||||
Copyright 2018-2023 OpenMMLab. All rights reserved. | |||||
Apache License | |||||
Version 2.0, January 2004 | |||||
http://www.apache.org/licenses/ | |||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION | |||||
1. Definitions. | |||||
"License" shall mean the terms and conditions for use, reproduction, | |||||
and distribution as defined by Sections 1 through 9 of this document. | |||||
"Licensor" shall mean the copyright owner or entity authorized by | |||||
the copyright owner that is granting the License. | |||||
"Legal Entity" shall mean the union of the acting entity and all | |||||
other entities that control, are controlled by, or are under common | |||||
control with that entity. For the purposes of this definition, | |||||
"control" means (i) the power, direct or indirect, to cause the | |||||
direction or management of such entity, whether by contract or | |||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the | |||||
outstanding shares, or (iii) beneficial ownership of such entity. | |||||
"You" (or "Your") shall mean an individual or Legal Entity | |||||
exercising permissions granted by this License. | |||||
"Source" form shall mean the preferred form for making modifications, | |||||
including but not limited to software source code, documentation | |||||
source, and configuration files. | |||||
"Object" form shall mean any form resulting from mechanical | |||||
transformation or translation of a Source form, including but | |||||
not limited to compiled object code, generated documentation, | |||||
and conversions to other media types. | |||||
"Work" shall mean the work of authorship, whether in Source or | |||||
Object form, made available under the License, as indicated by a | |||||
copyright notice that is included in or attached to the work | |||||
(an example is provided in the Appendix below). | |||||
"Derivative Works" shall mean any work, whether in Source or Object | |||||
form, that is based on (or derived from) the Work and for which the | |||||
editorial revisions, annotations, elaborations, or other modifications | |||||
represent, as a whole, an original work of authorship. For the purposes | |||||
of this License, Derivative Works shall not include works that remain | |||||
separable from, or merely link (or bind by name) to the interfaces of, | |||||
the Work and Derivative Works thereof. | |||||
"Contribution" shall mean any work of authorship, including | |||||
the original version of the Work and any modifications or additions | |||||
to that Work or Derivative Works thereof, that is intentionally | |||||
submitted to Licensor for inclusion in the Work by the copyright owner | |||||
or by an individual or Legal Entity authorized to submit on behalf of | |||||
the copyright owner. For the purposes of this definition, "submitted" | |||||
means any form of electronic, verbal, or written communication sent | |||||
to the Licensor or its representatives, including but not limited to | |||||
communication on electronic mailing lists, source code control systems, | |||||
and issue tracking systems that are managed by, or on behalf of, the | |||||
Licensor for the purpose of discussing and improving the Work, but | |||||
excluding communication that is conspicuously marked or otherwise | |||||
designated in writing by the copyright owner as "Not a Contribution." | |||||
"Contributor" shall mean Licensor and any individual or Legal Entity | |||||
on behalf of whom a Contribution has been received by Licensor and | |||||
subsequently incorporated within the Work. | |||||
2. Grant of Copyright License. Subject to the terms and conditions of | |||||
this License, each Contributor hereby grants to You a perpetual, | |||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable | |||||
copyright license to reproduce, prepare Derivative Works of, | |||||
publicly display, publicly perform, sublicense, and distribute the | |||||
Work and such Derivative Works in Source or Object form. | |||||
3. Grant of Patent License. Subject to the terms and conditions of | |||||
this License, each Contributor hereby grants to You a perpetual, | |||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable | |||||
(except as stated in this section) patent license to make, have made, | |||||
use, offer to sell, sell, import, and otherwise transfer the Work, | |||||
where such license applies only to those patent claims licensable | |||||
by such Contributor that are necessarily infringed by their | |||||
Contribution(s) alone or by combination of their Contribution(s) | |||||
with the Work to which such Contribution(s) was submitted. If You | |||||
institute patent litigation against any entity (including a | |||||
cross-claim or counterclaim in a lawsuit) alleging that the Work | |||||
or a Contribution incorporated within the Work constitutes direct | |||||
or contributory patent infringement, then any patent licenses | |||||
granted to You under this License for that Work shall terminate | |||||
as of the date such litigation is filed. | |||||
4. Redistribution. You may reproduce and distribute copies of the | |||||
Work or Derivative Works thereof in any medium, with or without | |||||
modifications, and in Source or Object form, provided that You | |||||
meet the following conditions: | |||||
(a) You must give any other recipients of the Work or | |||||
Derivative Works a copy of this License; and | |||||
(b) You must cause any modified files to carry prominent notices | |||||
stating that You changed the files; and | |||||
(c) You must retain, in the Source form of any Derivative Works | |||||
that You distribute, all copyright, patent, trademark, and | |||||
attribution notices from the Source form of the Work, | |||||
excluding those notices that do not pertain to any part of | |||||
the Derivative Works; and | |||||
(d) If the Work includes a "NOTICE" text file as part of its | |||||
distribution, then any Derivative Works that You distribute must | |||||
include a readable copy of the attribution notices contained | |||||
within such NOTICE file, excluding those notices that do not | |||||
pertain to any part of the Derivative Works, in at least one | |||||
of the following places: within a NOTICE text file distributed | |||||
as part of the Derivative Works; within the Source form or | |||||
documentation, if provided along with the Derivative Works; or, | |||||
within a display generated by the Derivative Works, if and | |||||
wherever such third-party notices normally appear. The contents | |||||
of the NOTICE file are for informational purposes only and | |||||
do not modify the License. You may add Your own attribution | |||||
notices within Derivative Works that You distribute, alongside | |||||
or as an addendum to the NOTICE text from the Work, provided | |||||
that such additional attribution notices cannot be construed | |||||
as modifying the License. | |||||
You may add Your own copyright statement to Your modifications and | |||||
may provide additional or different license terms and conditions | |||||
for use, reproduction, or distribution of Your modifications, or | |||||
for any such Derivative Works as a whole, provided Your use, | |||||
reproduction, and distribution of the Work otherwise complies with | |||||
the conditions stated in this License. | |||||
5. Submission of Contributions. Unless You explicitly state otherwise, | |||||
any Contribution intentionally submitted for inclusion in the Work | |||||
by You to the Licensor shall be under the terms and conditions of | |||||
this License, without any additional terms or conditions. | |||||
Notwithstanding the above, nothing herein shall supersede or modify | |||||
the terms of any separate license agreement you may have executed | |||||
with Licensor regarding such Contributions. | |||||
6. Trademarks. This License does not grant permission to use the trade | |||||
names, trademarks, service marks, or product names of the Licensor, | |||||
except as required for reasonable and customary use in describing the | |||||
origin of the Work and reproducing the content of the NOTICE file. | |||||
7. Disclaimer of Warranty. Unless required by applicable law or | |||||
agreed to in writing, Licensor provides the Work (and each | |||||
Contributor provides its Contributions) on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | |||||
implied, including, without limitation, any warranties or conditions | |||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A | |||||
PARTICULAR PURPOSE. You are solely responsible for determining the | |||||
appropriateness of using or redistributing the Work and assume any | |||||
risks associated with Your exercise of permissions under this License. | |||||
8. Limitation of Liability. In no event and under no legal theory, | |||||
whether in tort (including negligence), contract, or otherwise, | |||||
unless required by applicable law (such as deliberate and grossly | |||||
negligent acts) or agreed to in writing, shall any Contributor be | |||||
liable to You for damages, including any direct, indirect, special, | |||||
incidental, or consequential damages of any character arising as a | |||||
result of this License or out of the use or inability to use the | |||||
Work (including but not limited to damages for loss of goodwill, | |||||
work stoppage, computer failure or malfunction, or any and all | |||||
other commercial damages or losses), even if such Contributor | |||||
has been advised of the possibility of such damages. | |||||
9. Accepting Warranty or Additional Liability. While redistributing | |||||
the Work or Derivative Works thereof, You may choose to offer, | |||||
and charge a fee for, acceptance of support, warranty, indemnity, | |||||
or other liability obligations and/or rights consistent with this | |||||
License. However, in accepting such obligations, You may act only | |||||
on Your own behalf and on Your sole responsibility, not on behalf | |||||
of any other Contributor, and only if You agree to indemnify, | |||||
defend, and hold each Contributor harmless for any liability | |||||
incurred by, or claims asserted against, such Contributor by reason | |||||
of your accepting any such warranty or additional liability. | |||||
END OF TERMS AND CONDITIONS | |||||
APPENDIX: How to apply the Apache License to your work. | |||||
To apply the Apache License to your work, attach the following | |||||
boilerplate notice, with the fields enclosed by brackets "[]" | |||||
replaced with your own identifying information. (Don't include | |||||
the brackets!) The text should be enclosed in the appropriate | |||||
comment syntax for the file format. We also recommend that a | |||||
file or class name and description of purpose be included on the | |||||
same "printed page" as the copyright notice for easier | |||||
identification within third-party archives. | |||||
Copyright 2018-2023 OpenMMLab. | |||||
Licensed under the Apache License, Version 2.0 (the "License"); | |||||
you may not use this file except in compliance with the License. | |||||
You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. |
@@ -0,0 +1,6 @@ | |||||
include requirements/*.txt | |||||
include mmdet/VERSION | |||||
include mmdet/.mim/model-index.yml | |||||
include mmdet/.mim/demo/*/* | |||||
recursive-include mmdet/.mim/configs *.py *.yml | |||||
recursive-include mmdet/.mim/tools *.sh *.py |
@@ -0,0 +1,33 @@ | |||||
## 安装环境 | |||||
conda install pytorch==1.8.0 torchvision==0.9.0 torchaudio==0.8.0 cudatoolkit=11.1 -c pytorch -c conda-forge | |||||
pip install opencv-python -i https://pypi.tuna.tsinghua.edu.cn/simple | |||||
pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html | |||||
pip install terminaltables | |||||
pip install pycocotools | |||||
## 筛选需要检测的NG类别 | |||||
python AOI_select.py --AOI_path AOI数据文件夹 | |||||
## 找到图像中白色目标框并生成对应的box和label | |||||
python AOI_get_box --AOI_path AOI数据文件夹 --coco_path COCO格式文件夹 --classes_file 类别文件 | |||||
## 转换为COCO数据集 | |||||
python AOI_to_coco.py --root_dir COCO格式文件夹 --save_path json标签命名(./train.json) | |||||
## 修改参数文件 | |||||
configs/AD_detection/AD_dsxw_test66.py | |||||
## 单卡训练 | |||||
python tools/train.py configs/AD_detection/AD_dsxw_test66.py --gpus 1 | |||||
## 多卡训练 | |||||
tools/dist_train.sh configs/AD_detection/AD_dsxw_test66.py 8(GPU数量) | |||||
## 模型评估 | |||||
python tools/test.py 参数文件 权重文件 --eval bbox | |||||
## 搜索最优置信度 | |||||
python select_threshold.py --config_file 模型参数文件 --checkpoint_file 模型权重文件 --images_path 测试集路径(包含ok和ng两个文件夹) --test_batch_size 测试阶段的batch size大小 | |||||
## 推理结果(置信度,feature等) | |||||
python get_score_csv.py --config_file 模型参数文件 --checkpoint_file 模型权重文件 --images_path 测试集路径(未标注数据) --test_batch_size 测试阶段的batch size大小 --result_path test.csv(绝对路径) |
@@ -0,0 +1,33 @@ | |||||
## environment | |||||
conda install pytorch==1.8.0 torchvision==0.9.0 torchaudio==0.8.0 cudatoolkit=11.1 -c pytorch -c conda-forge | |||||
pip install opencv-python -i https://pypi.tuna.tsinghua.edu.cn/simple | |||||
pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html | |||||
pip install terminaltables | |||||
pip install pycocotools | |||||
## Filter ng categories to be detected | |||||
python AOI_select.py --AOI_path AOI-data-path | |||||
## Find the white target box in the image and generate the corresponding box and label | |||||
python AOI_get_box --AOI_path AOI-data-path --coco_path COCO_format_path --classes_file class_file | |||||
## convert to COCO dataset format | |||||
python AOI_to_coco.py --root_dir COCO_format_path --save_path json_file(./train.json) | |||||
## Modify parameter file | |||||
configs/AD_detection/AD_dsxw_test66.py | |||||
## single gpu train | |||||
python tools/train.py configs/AD_detection/AD_dsxw_test66.py --gpus 1 | |||||
## distribute train | |||||
tools/dist_train.sh configs/AD_detection/AD_dsxw_test66.py 8(GPU_number) | |||||
## model eval | |||||
python tools/test.py config_file ckpt_file --eval bbox | |||||
## search best threshold | |||||
python select_threshold.py --config_file config_file --checkpoint_file ckpt_file --images_path testset_path(Contains two folders, OK and ng) --test_batch_size batch_size | |||||
## infer score result(Confidence,feature, etc.) | |||||
python get_score_csv.py --config_file config_file --checkpoint_file ckpt_file --images_path testset_path(unlabel_data) --test_batch_size batch_size --result_path test.csv(Absolute path) |
@@ -0,0 +1,11 @@ | |||||
yiwei | |||||
loujian | |||||
celi | |||||
libei | |||||
fantie | |||||
lianxi | |||||
duojian | |||||
shunjian | |||||
shaoxi | |||||
jiahan | |||||
yiwu |
@@ -0,0 +1,163 @@ | |||||
_base_ = [ | |||||
'../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py' | |||||
] | |||||
model = dict( | |||||
type='DeformableDETR', | |||||
backbone=dict( | |||||
type='ResNet', | |||||
depth=50, | |||||
num_stages=4, | |||||
out_indices=(1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=dict(type='BN', requires_grad=False), | |||||
norm_eval=True, | |||||
style='pytorch', | |||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), | |||||
neck=dict( | |||||
type='ChannelMapper', | |||||
in_channels=[512, 1024, 2048], | |||||
kernel_size=1, | |||||
out_channels=256, | |||||
act_cfg=None, | |||||
norm_cfg=dict(type='GN', num_groups=32), | |||||
num_outs=4), | |||||
bbox_head=dict( | |||||
type='DeformableDETRHead', | |||||
with_box_refine=True, | |||||
as_two_stage=True, | |||||
num_query=300, | |||||
num_classes=11, | |||||
in_channels=2048, | |||||
sync_cls_avg_factor=True, | |||||
transformer=dict( | |||||
type='DeformableDetrTransformer', | |||||
encoder=dict( | |||||
type='DetrTransformerEncoder', | |||||
num_layers=6, | |||||
transformerlayers=dict( | |||||
type='BaseTransformerLayer', | |||||
attn_cfgs=dict( | |||||
type='MultiScaleDeformableAttention', embed_dims=256), | |||||
feedforward_channels=1024, | |||||
ffn_dropout=0.1, | |||||
operation_order=('self_attn', 'norm', 'ffn', 'norm'))), | |||||
decoder=dict( | |||||
type='DeformableDetrTransformerDecoder', | |||||
num_layers=6, | |||||
return_intermediate=True, | |||||
transformerlayers=dict( | |||||
type='DetrTransformerDecoderLayer', | |||||
attn_cfgs=[ | |||||
dict( | |||||
type='MultiheadAttention', | |||||
embed_dims=256, | |||||
num_heads=8, | |||||
dropout=0.1), | |||||
dict( | |||||
type='MultiScaleDeformableAttention', | |||||
embed_dims=256) | |||||
], | |||||
feedforward_channels=1024, | |||||
ffn_dropout=0.1, | |||||
operation_order=('self_attn', 'norm', 'cross_attn', 'norm', | |||||
'ffn', 'norm')))), | |||||
positional_encoding=dict( | |||||
type='SinePositionalEncoding', | |||||
num_feats=128, | |||||
normalize=True, | |||||
offset=-0.5), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=2.0), | |||||
loss_bbox=dict(type='L1Loss', loss_weight=5.0), | |||||
loss_iou=dict(type='GIoULoss', loss_weight=2.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict( | |||||
type='HungarianAssigner', | |||||
cls_cost=dict(type='FocalLossCost', weight=2.0), | |||||
reg_cost=dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'), | |||||
iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))), | |||||
test_cfg=dict(max_per_img=100)) | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different | |||||
# from the default setting in mmdet. | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=(400, 300), | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict( | |||||
type='AdamW', | |||||
lr=2e-4, | |||||
weight_decay=0.0001, | |||||
paramwise_cfg=dict( | |||||
custom_keys={ | |||||
'backbone': dict(lr_mult=0.1), | |||||
'sampling_offsets': dict(lr_mult=0.1), | |||||
'reference_points': dict(lr_mult=0.1) | |||||
})) | |||||
optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2)) | |||||
# learning policy | |||||
lr_config = dict(policy='step', step=[40]) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,132 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('False_welding','Missing_parts','Displacement') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
flip=True, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=2000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=40) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,132 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('False_welding','Missing_parts','Displacement') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 300), (500, 400), (600, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 300), (500, 400), (600, 500)], | |||||
flip=True, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=2000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=40) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,132 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('False_welding','Missing_parts','Displacement') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
flip=True, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=2000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,132 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('False_welding','Missing_parts','Displacement') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
flip=True, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=2000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,132 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('False_welding','Missing_parts','Displacement') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
flip=True, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=2000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=100) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,159 @@ | |||||
#_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
_base_ = [ | |||||
'../_base_/models/cascade_rcnn_r50_fpn.py', | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
model = dict( | |||||
backbone=dict( | |||||
type='DetectoRS_ResNet', | |||||
conv_cfg=dict(type='ConvAWS'), | |||||
sac=dict(type='SAC', use_deform=True), | |||||
stage_with_sac=(False, True, True, True), | |||||
output_img=True), | |||||
neck=dict( | |||||
type='RFP', | |||||
rfp_steps=2, | |||||
aspp_out_channels=64, | |||||
aspp_dilations=(1, 3, 6, 1), | |||||
rfp_backbone=dict( | |||||
rfp_inplanes=256, | |||||
type='DetectoRS_ResNet', | |||||
depth=50, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=dict(type='BN', requires_grad=True), | |||||
norm_eval=True, | |||||
conv_cfg=dict(type='ConvAWS'), | |||||
pretrained='torchvision://resnet50', | |||||
style='pytorch')), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('False_welding','Missing_parts','Displacement') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
flip=True, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=2000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,161 @@ | |||||
#_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
_base_ = [ | |||||
'../_base_/models/cascade_rcnn_r50_fpn.py', | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
model = dict( | |||||
backbone=dict( | |||||
type='DetectoRS_ResNeXt', | |||||
conv_cfg=dict(type='ConvAWS'), | |||||
sac=dict(type='SAC', use_deform=True), | |||||
stage_with_sac=(False, True, True, True), | |||||
output_img=True), | |||||
neck=dict( | |||||
type='RFP', | |||||
rfp_steps=2, | |||||
aspp_out_channels=64, | |||||
aspp_dilations=(1, 3, 6, 1), | |||||
rfp_backbone=dict( | |||||
rfp_inplanes=256, | |||||
type='DetectoRS_ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=dict(type='BN', requires_grad=True), | |||||
norm_eval=True, | |||||
conv_cfg=dict(type='ConvAWS'), | |||||
pretrained='open-mmlab://resnext101_64x4d', | |||||
style='pytorch')), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('False_welding','Missing_parts','Displacement') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
flip=True, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=8, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=2000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,158 @@ | |||||
_base_ = [ | |||||
'../_base_/models/cascade_rcnn_r50_fpn.py', | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth' | |||||
model = dict( | |||||
backbone=dict( | |||||
depths=[2, 2, 18, 2], | |||||
_delete_=True, | |||||
type='SwinTransformer', | |||||
embed_dims=96, | |||||
num_heads=[3, 6, 12, 24], | |||||
window_size=7, | |||||
mlp_ratio=4, | |||||
qkv_bias=True, | |||||
qk_scale=None, | |||||
drop_rate=0., | |||||
attn_drop_rate=0., | |||||
drop_path_rate=0.2, | |||||
patch_norm=True, | |||||
out_indices=(0, 1, 2, 3), | |||||
with_cp=False, | |||||
convert_weights=True, | |||||
init_cfg=dict(type='Pretrained', checkpoint=pretrained)), | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[96, 192, 384, 768], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('False_welding','Missing_parts','Displacement') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
flip=True, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=2000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,149 @@ | |||||
_base_ = [ | |||||
'../_base_/models/cascade_rcnn_r50_fpn.py', | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
model = dict( | |||||
backbone=dict( | |||||
_delete_=True, | |||||
type='PyramidVisionTransformerV2', | |||||
embed_dims=64, | |||||
num_layers=[3, 6, 40, 3], | |||||
mlp_ratios=(4, 4, 4, 4), | |||||
init_cfg=dict(checkpoint='https://github.com/whai362/PVT/' | |||||
'releases/download/v2/pvt_v2_b5.pth')), | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[64, 128, 320, 512], | |||||
out_channels=256, | |||||
num_outs=5, | |||||
start_level=1, | |||||
add_extra_convs='on_input'), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=3, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('False_welding','Missing_parts','Displacement') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
flip=True, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=2000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,137 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
flip=True, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v3/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v3/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v3/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v3/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v3/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v3/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=2000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,137 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
flip=True, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=2000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,137 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(600, 600), (700, 700)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(600, 600), (700, 700)], | |||||
flip=True, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=8, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,163 @@ | |||||
_base_ = [ | |||||
'../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py' | |||||
] | |||||
model = dict( | |||||
type='DeformableDETR', | |||||
backbone=dict( | |||||
type='ResNet', | |||||
depth=50, | |||||
num_stages=4, | |||||
out_indices=(1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=dict(type='BN', requires_grad=False), | |||||
norm_eval=True, | |||||
style='pytorch', | |||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), | |||||
neck=dict( | |||||
type='ChannelMapper', | |||||
in_channels=[512, 1024, 2048], | |||||
kernel_size=1, | |||||
out_channels=256, | |||||
act_cfg=None, | |||||
norm_cfg=dict(type='GN', num_groups=32), | |||||
num_outs=4), | |||||
bbox_head=dict( | |||||
type='DeformableDETRHead', | |||||
with_box_refine=True, | |||||
as_two_stage=True, | |||||
num_query=300, | |||||
num_classes=11, | |||||
in_channels=2048, | |||||
sync_cls_avg_factor=True, | |||||
transformer=dict( | |||||
type='DeformableDetrTransformer', | |||||
encoder=dict( | |||||
type='DetrTransformerEncoder', | |||||
num_layers=6, | |||||
transformerlayers=dict( | |||||
type='BaseTransformerLayer', | |||||
attn_cfgs=dict( | |||||
type='MultiScaleDeformableAttention', embed_dims=256), | |||||
feedforward_channels=1024, | |||||
ffn_dropout=0.1, | |||||
operation_order=('self_attn', 'norm', 'ffn', 'norm'))), | |||||
decoder=dict( | |||||
type='DeformableDetrTransformerDecoder', | |||||
num_layers=6, | |||||
return_intermediate=True, | |||||
transformerlayers=dict( | |||||
type='DetrTransformerDecoderLayer', | |||||
attn_cfgs=[ | |||||
dict( | |||||
type='MultiheadAttention', | |||||
embed_dims=256, | |||||
num_heads=8, | |||||
dropout=0.1), | |||||
dict( | |||||
type='MultiScaleDeformableAttention', | |||||
embed_dims=256) | |||||
], | |||||
feedforward_channels=1024, | |||||
ffn_dropout=0.1, | |||||
operation_order=('self_attn', 'norm', 'cross_attn', 'norm', | |||||
'ffn', 'norm')))), | |||||
positional_encoding=dict( | |||||
type='SinePositionalEncoding', | |||||
num_feats=128, | |||||
normalize=True, | |||||
offset=-0.5), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=2.0), | |||||
loss_bbox=dict(type='L1Loss', loss_weight=5.0), | |||||
loss_iou=dict(type='GIoULoss', loss_weight=2.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict( | |||||
type='HungarianAssigner', | |||||
cls_cost=dict(type='FocalLossCost', weight=2.0), | |||||
reg_cost=dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'), | |||||
iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))), | |||||
test_cfg=dict(max_per_img=100)) | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different | |||||
# from the default setting in mmdet. | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=(400, 300), | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict( | |||||
type='AdamW', | |||||
lr=2e-4, | |||||
weight_decay=0.0001, | |||||
paramwise_cfg=dict( | |||||
custom_keys={ | |||||
'backbone': dict(lr_mult=0.1), | |||||
'sampling_offsets': dict(lr_mult=0.1), | |||||
'reference_points': dict(lr_mult=0.1) | |||||
})) | |||||
optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2)) | |||||
# learning policy | |||||
lr_config = dict(policy='step', step=[40]) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,140 @@ | |||||
_base_ = ['../_base_/schedules/schedule_1x_original.py', '../_base_/default_runtime.py'] | |||||
# model settings | |||||
model = dict( | |||||
type='YOLOX', | |||||
backbone=dict(type='CSPDarknet', deepen_factor=1.0, widen_factor=1.0), | |||||
neck=dict( | |||||
type='YOLOXPAFPN', | |||||
in_channels=[256, 512, 1024], | |||||
out_channels=256, | |||||
num_csp_blocks=3), | |||||
bbox_head=dict( | |||||
type='YOLOXHead', num_classes=11, in_channels=256, feat_channels=256), | |||||
train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)), | |||||
# In order to align the source code, the threshold of the val phase is | |||||
# 0.01, and the threshold of the test phase is 0.001. | |||||
test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65))) | |||||
# dataset settings | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
img_scale = (400, 400) | |||||
train_pipeline = [ | |||||
dict(type='Mosaic', img_scale=img_scale, pad_val=0), | |||||
dict( | |||||
type='RandomAffine', | |||||
scaling_ratio_range=(0.1, 2), | |||||
border=(-img_scale[0] // 2, -img_scale[1] // 2)), | |||||
dict( | |||||
type='PhotoMetricDistortion', | |||||
brightness_delta=32, | |||||
contrast_range=(0.5, 1.5), | |||||
saturation_range=(0.5, 1.5), | |||||
hue_delta=18), | |||||
dict(type='RandomFlip', flip_ratio=0.5), | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) | |||||
] | |||||
train_dataset = dict( | |||||
type='MultiImageMixDataset', | |||||
dataset=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_train/annotations/train.json', | |||||
pipeline=[ | |||||
dict(type='LoadImageFromFile', to_float32=True), | |||||
dict(type='LoadAnnotations', with_bbox=True) | |||||
], | |||||
filter_empty_gt=False, | |||||
), | |||||
pipeline=train_pipeline, | |||||
dynamic_scale=img_scale) | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=img_scale, | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img']) | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=8, | |||||
workers_per_gpu=8, | |||||
train=train_dataset, | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v4/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
# default 8 gpu | |||||
optimizer = dict( | |||||
type='SGD', | |||||
lr=0.01, | |||||
momentum=0.9, | |||||
weight_decay=5e-4, | |||||
nesterov=True, | |||||
paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.)) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
_delete_=True, | |||||
policy='YOLOX', | |||||
warmup='exp', | |||||
by_epoch=False, | |||||
warmup_by_epoch=True, | |||||
warmup_ratio=1, | |||||
warmup_iters=5, # 5 epoch | |||||
num_last_epochs=15, | |||||
min_lr_ratio=0.05) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
resume_from = None | |||||
interval = 5 | |||||
custom_hooks = [ | |||||
dict(type='YOLOXModeSwitchHook', num_last_epochs=15, priority=48), | |||||
dict( | |||||
type='SyncRandomSizeHook', | |||||
ratio_range=(14, 26), | |||||
img_scale=img_scale, | |||||
priority=48), | |||||
dict( | |||||
type='SyncNormHook', | |||||
num_last_epochs=15, | |||||
interval=interval, | |||||
priority=48), | |||||
dict(type='ExpMomentumEMAHook', resume_from=resume_from, priority=49) | |||||
] | |||||
checkpoint_config = dict(interval=interval) | |||||
evaluation = dict(interval=interval, metric='bbox') | |||||
log_config = dict(interval=50) |
@@ -0,0 +1,145 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
backbone=dict( | |||||
_delete_=True, | |||||
type='PyramidVisionTransformerV2', | |||||
mlp_ratios=(4, 4, 4, 4), | |||||
embed_dims=64, | |||||
num_layers=[3, 4, 18, 3], | |||||
init_cfg=dict(checkpoint='https://github.com/whai362/PVT/' | |||||
'releases/download/v2/pvt_v2_b3.pth')), | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[64, 128, 320, 512], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
flip=True, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=3000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,155 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth' | |||||
model = dict( | |||||
backbone=dict( | |||||
_delete_=True, | |||||
type='SwinTransformer', | |||||
embed_dims=96, | |||||
depths=[2, 2, 18, 2], | |||||
num_heads=[3, 6, 12, 24], | |||||
window_size=7, | |||||
mlp_ratio=4, | |||||
qkv_bias=True, | |||||
qk_scale=None, | |||||
drop_rate=0., | |||||
attn_drop_rate=0., | |||||
drop_path_rate=0.2, | |||||
patch_norm=True, | |||||
out_indices=(0, 1, 2, 3), | |||||
with_cp=False, | |||||
convert_weights=True, | |||||
init_cfg=dict(type='Pretrained', checkpoint=pretrained)), | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[96, 192, 384, 768], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
flip=True, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=3000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,137 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 300), (500, 400)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 300)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=3000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,137 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=3000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,168 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
backbone=dict( | |||||
_delete_=True, | |||||
type='HRNet', | |||||
extra=dict( | |||||
stage1=dict( | |||||
num_modules=1, | |||||
num_branches=1, | |||||
block='BOTTLENECK', | |||||
num_blocks=(4, ), | |||||
num_channels=(64, )), | |||||
stage2=dict( | |||||
num_modules=1, | |||||
num_branches=2, | |||||
block='BASIC', | |||||
num_blocks=(4, 4), | |||||
num_channels=(32, 64)), | |||||
stage3=dict( | |||||
num_modules=4, | |||||
num_branches=3, | |||||
block='BASIC', | |||||
num_blocks=(4, 4, 4), | |||||
num_channels=(32, 64, 128)), | |||||
stage4=dict( | |||||
num_modules=3, | |||||
num_branches=4, | |||||
block='BASIC', | |||||
num_blocks=(4, 4, 4, 4), | |||||
num_channels=(32, 64, 128, 256))), | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w32')), | |||||
neck=dict( | |||||
_delete_=True, | |||||
type='HRFPN', | |||||
in_channels=[32, 64, 128, 256], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=3000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,152 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py' | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
backbone=dict( | |||||
type='ResNeSt', | |||||
stem_channels=128, | |||||
depth=101, | |||||
radix=2, | |||||
reduction_factor=4, | |||||
avg_down_stride=True, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
norm_eval=False, | |||||
style='pytorch', | |||||
init_cfg=dict(type='Pretrained', checkpoint='open-mmlab://resnest101')), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared4Conv1FCBBoxHead', | |||||
in_channels=256, | |||||
conv_out_channels=256, | |||||
fc_out_channels=1024, | |||||
norm_cfg=norm_cfg, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared4Conv1FCBBoxHead', | |||||
in_channels=256, | |||||
conv_out_channels=256, | |||||
fc_out_channels=1024, | |||||
norm_cfg=norm_cfg, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared4Conv1FCBBoxHead', | |||||
in_channels=256, | |||||
conv_out_channels=256, | |||||
fc_out_channels=1024, | |||||
norm_cfg=norm_cfg, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
], )) | |||||
# # use ResNeSt img_norm | |||||
img_norm_cfg = dict( | |||||
mean=[123.68, 116.779, 103.939], std=[58.393, 57.12, 57.375], to_rgb=True) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=8, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=40) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,143 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='SeesawLoss', | |||||
p=0.8, | |||||
q=2.0, | |||||
num_classes=11, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='SeesawLoss', | |||||
p=0.8, | |||||
q=2.0, | |||||
num_classes=11, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='SeesawLoss', | |||||
p=0.8, | |||||
q=2.0, | |||||
num_classes=11, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=(400, 400), | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=3000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,170 @@ | |||||
_base_ = [ | |||||
'../_base_/models/cascade_rcnn_r50_fpn.py', | |||||
'../_base_/schedules/schedule_1x_original.py', '../_base_/default_runtime.py' | |||||
] | |||||
pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth' | |||||
model = dict( | |||||
backbone=dict( | |||||
_delete_=True, | |||||
type='SwinTransformer', | |||||
embed_dims=96, | |||||
depths=[2, 2, 18, 2], | |||||
num_heads=[3, 6, 12, 24], | |||||
window_size=7, | |||||
mlp_ratio=4, | |||||
qkv_bias=True, | |||||
qk_scale=None, | |||||
drop_rate=0., | |||||
attn_drop_rate=0., | |||||
drop_path_rate=0.2, | |||||
patch_norm=True, | |||||
out_indices=(0, 1, 2, 3), | |||||
with_cp=False, | |||||
convert_weights=True, | |||||
init_cfg=dict(type='Pretrained', checkpoint=pretrained)), | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[96, 192, 384, 768], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (480, 480)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=(400, 400), | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer_config = dict(grad_clip=None) | |||||
optimizer = dict( | |||||
_delete_=True, | |||||
type='AdamW', | |||||
lr=0.0001, | |||||
betas=(0.9, 0.999), | |||||
weight_decay=0.05, | |||||
paramwise_cfg=dict( | |||||
custom_keys={ | |||||
'absolute_pos_embed': dict(decay_mult=0.), | |||||
'relative_position_bias_table': dict(decay_mult=0.), | |||||
'norm': dict(decay_mult=0.) | |||||
})) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='step', | |||||
warmup='linear', | |||||
warmup_iters=3000, | |||||
warmup_ratio=0.001, | |||||
step=[8, 11]) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,150 @@ | |||||
_base_ = [ | |||||
'../_base_/models/cascade_rcnn_r50_fpn.py', | |||||
'../_base_/schedules/schedule_1x_original.py', '../_base_/default_runtime.py' | |||||
] | |||||
model = dict( | |||||
backbone=dict( | |||||
_delete_=True, | |||||
type='PyramidVisionTransformerV2', | |||||
mlp_ratios=(4, 4, 4, 4), | |||||
embed_dims=64, | |||||
num_layers=[3, 4, 18, 3], | |||||
init_cfg=dict(checkpoint='https://github.com/whai362/PVT/' | |||||
'releases/download/v2/pvt_v2_b3.pth')), | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[64, 128, 320, 512], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (450, 450)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=(400, 400), | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer_config = dict(grad_clip=None) | |||||
optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='step', | |||||
warmup='linear', | |||||
warmup_iters=3000, | |||||
warmup_ratio=0.001, | |||||
step=[8, 11]) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,137 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v6/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v6/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v6/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v6/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v6/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v6/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=3000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,137 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v6/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v6/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=3000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,137 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v6/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v6/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=3000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,143 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v6/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v6/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=3000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,143 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v6/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v6/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=3000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=40) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,145 @@ | |||||
_base_ = ['../_base_/schedules/schedule_1x_original.py', '../_base_/default_runtime.py'] | |||||
# model settings | |||||
model = dict( | |||||
type='YOLOX', | |||||
backbone=dict(type='CSPDarknet', deepen_factor=1.0, widen_factor=1.0), | |||||
neck=dict( | |||||
type='YOLOXPAFPN', | |||||
in_channels=[256, 512, 1024], | |||||
out_channels=256, | |||||
num_csp_blocks=3), | |||||
bbox_head=dict( | |||||
type='YOLOXHead', num_classes=11, in_channels=256, feat_channels=256), | |||||
train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)), | |||||
# In order to align the source code, the threshold of the val phase is | |||||
# 0.01, and the threshold of the test phase is 0.001. | |||||
test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65))) | |||||
# dataset settings | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
img_scale = (448, 448) | |||||
train_pipeline = [ | |||||
dict(type='Mosaic', img_scale=img_scale, pad_val=0), | |||||
dict( | |||||
type='MixUp', | |||||
img_scale=img_scale, | |||||
ratio_range=(0.8, 1.6), | |||||
pad_val=114.0), | |||||
dict( | |||||
type='RandomAffine', | |||||
scaling_ratio_range=(0.1, 2), | |||||
border=(-img_scale[0] // 2, -img_scale[1] // 2)), | |||||
dict( | |||||
type='PhotoMetricDistortion', | |||||
brightness_delta=32, | |||||
contrast_range=(0.5, 1.5), | |||||
saturation_range=(0.5, 1.5), | |||||
hue_delta=18), | |||||
dict(type='RandomFlip', flip_ratio=0.5), | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) | |||||
] | |||||
train_dataset = dict( | |||||
type='MultiImageMixDataset', | |||||
dataset=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v6/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v6/dsxw_train/annotations/train.json', | |||||
pipeline=[ | |||||
dict(type='LoadImageFromFile', to_float32=True), | |||||
dict(type='LoadAnnotations', with_bbox=True) | |||||
], | |||||
filter_empty_gt=False, | |||||
), | |||||
pipeline=train_pipeline, | |||||
dynamic_scale=img_scale) | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=img_scale, | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img']) | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=train_dataset, | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
# default 8 gpu | |||||
optimizer = dict( | |||||
type='SGD', | |||||
lr=0.01, | |||||
momentum=0.9, | |||||
weight_decay=5e-4, | |||||
nesterov=True, | |||||
paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.)) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
_delete_=True, | |||||
policy='YOLOX', | |||||
warmup='exp', | |||||
by_epoch=False, | |||||
warmup_by_epoch=True, | |||||
warmup_ratio=1, | |||||
warmup_iters=5, # 5 epoch | |||||
num_last_epochs=15, | |||||
min_lr_ratio=0.05) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
resume_from = None | |||||
interval = 5 | |||||
custom_hooks = [ | |||||
dict(type='YOLOXModeSwitchHook', num_last_epochs=15, priority=48), | |||||
dict( | |||||
type='SyncRandomSizeHook', | |||||
ratio_range=(11, 17), | |||||
img_scale=img_scale, | |||||
priority=48), | |||||
dict( | |||||
type='SyncNormHook', | |||||
num_last_epochs=15, | |||||
interval=interval, | |||||
priority=48), | |||||
dict(type='ExpMomentumEMAHook', resume_from=resume_from, priority=49) | |||||
] | |||||
checkpoint_config = dict(interval=interval) | |||||
evaluation = dict(interval=interval, metric='bbox') | |||||
log_config = dict(interval=50) |
@@ -0,0 +1,143 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v5/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=3000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=40) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,143 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v7/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v7/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=3000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=40) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,155 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN_CARAFE', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5, | |||||
start_level=0, | |||||
end_level=-1, | |||||
norm_cfg=None, | |||||
act_cfg=None, | |||||
order=('conv', 'norm', 'act'), | |||||
upsample_cfg=dict( | |||||
type='carafe', | |||||
up_kernel=5, | |||||
up_group=1, | |||||
encoder_kernel=3, | |||||
encoder_dilation=1, | |||||
compressed_channels=64)), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v7/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v7/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_test_0121_0130/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=3000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=40) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,137 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasetsdsxw_dataset_v8/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=3000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=40) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,143 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasetsdsxw_dataset_v8/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=3000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=40) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,155 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN_CARAFE', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5, | |||||
start_level=0, | |||||
end_level=-1, | |||||
norm_cfg=None, | |||||
act_cfg=None, | |||||
order=('conv', 'norm', 'act'), | |||||
upsample_cfg=dict( | |||||
type='carafe', | |||||
up_kernel=5, | |||||
up_group=1, | |||||
encoder_kernel=3, | |||||
encoder_dilation=1, | |||||
compressed_channels=64)), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,155 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN_CARAFE', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5, | |||||
start_level=0, | |||||
end_level=-1, | |||||
norm_cfg=None, | |||||
act_cfg=None, | |||||
order=('conv', 'norm', 'act'), | |||||
upsample_cfg=dict( | |||||
type='carafe', | |||||
up_kernel=5, | |||||
up_group=1, | |||||
encoder_kernel=3, | |||||
encoder_dilation=1, | |||||
compressed_channels=64)), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='SeesawLoss', | |||||
p=0.8, | |||||
q=2.0, | |||||
num_classes=11, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='SeesawLoss', | |||||
p=0.8, | |||||
q=2.0, | |||||
num_classes=11, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='SeesawLoss', | |||||
p=0.8, | |||||
q=2.0, | |||||
num_classes=11, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasetsdsxw_dataset_v8/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,169 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
backbone=dict( | |||||
type='ResNeSt', | |||||
stem_channels=128, | |||||
depth=101, | |||||
radix=2, | |||||
reduction_factor=4, | |||||
avg_down_stride=True, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
norm_eval=False, | |||||
style='pytorch', | |||||
init_cfg=dict(type='Pretrained', checkpoint='open-mmlab://resnest101')), | |||||
neck=dict( | |||||
type='FPN_CARAFE', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5, | |||||
start_level=0, | |||||
end_level=-1, | |||||
norm_cfg=None, | |||||
act_cfg=None, | |||||
order=('conv', 'norm', 'act'), | |||||
upsample_cfg=dict( | |||||
type='carafe', | |||||
up_kernel=5, | |||||
up_group=1, | |||||
encoder_kernel=3, | |||||
encoder_dilation=1, | |||||
compressed_channels=64)), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=8, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasetsdsxw_dataset_v8/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,158 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
backbone=dict( | |||||
dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), | |||||
stage_with_dcn=(False, True, True, True)), | |||||
neck=dict( | |||||
type='FPN_CARAFE', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5, | |||||
start_level=0, | |||||
end_level=-1, | |||||
norm_cfg=None, | |||||
act_cfg=None, | |||||
order=('conv', 'norm', 'act'), | |||||
upsample_cfg=dict( | |||||
type='carafe', | |||||
up_kernel=5, | |||||
up_group=1, | |||||
encoder_kernel=3, | |||||
encoder_dilation=1, | |||||
compressed_channels=64)), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasetsdsxw_dataset_v8/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,158 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
backbone=dict( | |||||
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), | |||||
stage_with_dcn=(False, True, True, True)), | |||||
neck=dict( | |||||
type='FPN_CARAFE', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5, | |||||
start_level=0, | |||||
end_level=-1, | |||||
norm_cfg=None, | |||||
act_cfg=None, | |||||
order=('conv', 'norm', 'act'), | |||||
upsample_cfg=dict( | |||||
type='carafe', | |||||
up_kernel=5, | |||||
up_group=1, | |||||
encoder_kernel=3, | |||||
encoder_dilation=1, | |||||
compressed_channels=64)), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasetsdsxw_dataset_v8/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,155 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN_CARAFE', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5, | |||||
start_level=0, | |||||
end_level=-1, | |||||
norm_cfg=None, | |||||
act_cfg=None, | |||||
order=('conv', 'norm', 'act'), | |||||
upsample_cfg=dict( | |||||
type='carafe', | |||||
up_kernel=5, | |||||
up_group=1, | |||||
encoder_kernel=3, | |||||
encoder_dilation=1, | |||||
compressed_channels=64)), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=1, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=1, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=1, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v9/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v9/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v9/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v9/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v9/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v9/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=1000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,155 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN_CARAFE', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5, | |||||
start_level=0, | |||||
end_level=-1, | |||||
norm_cfg=None, | |||||
act_cfg=None, | |||||
order=('conv', 'norm', 'act'), | |||||
upsample_cfg=dict( | |||||
type='carafe', | |||||
up_kernel=5, | |||||
up_group=1, | |||||
encoder_kernel=3, | |||||
encoder_dilation=1, | |||||
compressed_channels=64)), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=1, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=1, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=1, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v9_1/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v9_1/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v9_1/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v9_1/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v9_1/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v9_1/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=1000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,159 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
backbone=dict( | |||||
norm_cfg=norm_cfg, | |||||
dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), | |||||
stage_with_dcn=(False, True, True, True)), | |||||
neck=dict( | |||||
type='FPN_CARAFE', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5, | |||||
start_level=0, | |||||
end_level=-1, | |||||
norm_cfg=None, | |||||
act_cfg=None, | |||||
order=('conv', 'norm', 'act'), | |||||
upsample_cfg=dict( | |||||
type='carafe', | |||||
up_kernel=5, | |||||
up_group=1, | |||||
encoder_kernel=3, | |||||
encoder_dilation=1, | |||||
compressed_channels=64)), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasetsdsxw_dataset_v8/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.03, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=2000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,145 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
backbone=dict( | |||||
norm_cfg=norm_cfg), | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasetsdsxw_dataset_v8/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=120) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,138 @@ | |||||
_base_ = [ | |||||
'../_base_/datasets/coco_detection.py', | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=dict(type='BN', requires_grad=True), | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v8/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasetsdsxw_dataset_v8/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,143 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,132 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
model = dict( | |||||
type='GFL', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=dict(type='BN', requires_grad=True), | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='GFLHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
loss_cls=dict( | |||||
type='QualityFocalLoss', | |||||
use_sigmoid=True, | |||||
beta=2.0, | |||||
loss_weight=1.0), | |||||
loss_dfl=dict(type='DistributionFocalLoss', loss_weight=0.25), | |||||
reg_max=16, | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasetsdsxw_dataset_v10/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,137 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=dict(type='BN', requires_grad=True), | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,147 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
backbone=dict( | |||||
norm_cfg=norm_cfg, | |||||
dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), | |||||
stage_with_dcn=(False, True, True, True)), | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,140 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False), | |||||
stage_with_dcn=(False, True, True, True), | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v10/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,143 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,137 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=dict(type='BN', requires_grad=True), | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,143 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.1, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.1, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.1, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,143 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=3.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=3.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=3.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,143 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.5, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.5, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.5, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v11/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,144 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v12/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v12/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v12/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v12/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v12/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v12/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=40) | |||||
evaluation = dict(interval=2, metric='bbox') | |||||
checkpoint_config = dict(interval=2) |
@@ -0,0 +1,138 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=dict(type='BN', requires_grad=True), | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v12/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v12/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v12/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v12/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v12/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v12/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=40) | |||||
evaluation = dict(interval=2, metric='bbox') | |||||
checkpoint_config = dict(interval=2) |
@@ -0,0 +1,138 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=dict(type='BN', requires_grad=True), | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v13/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v13/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v13/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v13/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v13/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v13/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=2, metric='bbox') | |||||
checkpoint_config = dict(interval=2) |
@@ -0,0 +1,144 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=11, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v12/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v12/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v12/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v12/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v12/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/dsxw_dataset_v12/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=2, metric='bbox') | |||||
checkpoint_config = dict(interval=2) |
@@ -0,0 +1,139 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v14/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v14/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v14/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v14/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v14/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v14/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=10000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=80) | |||||
evaluation = dict(interval=2, metric='bbox') | |||||
checkpoint_config = dict(interval=2) |
@@ -0,0 +1,138 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=dict(type='BN', requires_grad=True), | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v14/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v14/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v14/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v14/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v14/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v14/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=40) | |||||
evaluation = dict(interval=2, metric='bbox') | |||||
checkpoint_config = dict(interval=2) |
@@ -0,0 +1,139 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=2, metric='bbox') | |||||
checkpoint_config = dict(interval=2) |
@@ -0,0 +1,141 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeSt', | |||||
stem_channels=128, | |||||
depth=101, | |||||
radix=2, | |||||
reduction_factor=4, | |||||
avg_down_stride=True, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
norm_eval=False, | |||||
style='pytorch', | |||||
init_cfg=dict(type='Pretrained', checkpoint='open-mmlab://resnest101')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=40) | |||||
evaluation = dict(interval=2, metric='bbox') | |||||
checkpoint_config = dict(interval=2) |
@@ -0,0 +1,134 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='PyramidVisionTransformerV2', | |||||
mlp_ratios=(4, 4, 4, 4), | |||||
embed_dims=64, | |||||
num_layers=[3, 4, 18, 3], | |||||
init_cfg=dict(checkpoint='https://github.com/whai362/PVT/' | |||||
'releases/download/v2/pvt_v2_b3.pth')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[64, 128, 320, 512], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=6, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=5000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=40) | |||||
evaluation = dict(interval=2, metric='bbox') | |||||
checkpoint_config = dict(interval=2) |
@@ -0,0 +1,139 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=2000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=40) | |||||
evaluation = dict(interval=2, metric='bbox') | |||||
checkpoint_config = dict(interval=2) |
@@ -0,0 +1,139 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v0/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v0/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v0/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v0/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v0/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v0/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=2000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=40) | |||||
evaluation = dict(interval=2, metric='bbox') | |||||
checkpoint_config = dict(interval=2) |
@@ -0,0 +1,139 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v1/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v1/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v1/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v1/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v1/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v1/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=2000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=40) | |||||
evaluation = dict(interval=2, metric='bbox') | |||||
checkpoint_config = dict(interval=2) |
@@ -0,0 +1,137 @@ | |||||
_base_ = ['../_base_/default_runtime.py'] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v1_finetune/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v1_finetune/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v1/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v1/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v1/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v1/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=10) | |||||
evaluation = dict(interval=1, metric='bbox') | |||||
checkpoint_config = dict(interval=1) | |||||
load_from = '/home/shanwei-luo/userdata/mmdetection/work_dirs/AD_dsxw_test70/epoch_38.pth' |
@@ -0,0 +1,139 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v2/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v2/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v2/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v2/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v2/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v2/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=2000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=40) | |||||
evaluation = dict(interval=2, metric='bbox') | |||||
checkpoint_config = dict(interval=2) |
@@ -0,0 +1,137 @@ | |||||
_base_ = ['../_base_/default_runtime.py'] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v2_finetune/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v2_finetune/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v1/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v1/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v1/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v1/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=10) | |||||
evaluation = dict(interval=1, metric='bbox') | |||||
checkpoint_config = dict(interval=1) | |||||
load_from = '/home/shanwei-luo/userdata/mmdetection/work_dirs/AD_dsxw_test70/epoch_38.pth' |
@@ -0,0 +1,137 @@ | |||||
_base_ = ['../_base_/default_runtime.py'] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v3_finetune/dsxw_train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v3_finetune/dsxw_train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v1/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v1/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v1/dsxw_test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/PCBA_dataset_v15_MLOPS/v1/dsxw_test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=10) | |||||
evaluation = dict(interval=1, metric='bbox') | |||||
checkpoint_config = dict(interval=1) | |||||
load_from = '/home/shanwei-luo/userdata/mmdetection/work_dirs/AD_dsxw_test70/epoch_38.pth' |
@@ -0,0 +1,146 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=15, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('quebiaoqian', 'loutiejian', 'loutongjian', 'louxibo_1', 'louxibo_2', 'louxibo_3', 'louxiangjiaodian', 'kagouduanlie', 'xiangjiaodianwaixie', 'xiaopaomianwaixie', 'louxiaopaomian', 'loudapaomian', 'xiboqiaoqi', 'dapaomianwaixie', 'loudingweixibo') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(1024, 1024), (1280, 1280)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(1280, 1280)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=2, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/yuhai_dataset/yuhai_dataset0406/structure/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/yuhai_dataset/yuhai_dataset0406/structure/annotations/train_cat_mode_addok.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/yuhai_dataset/yuhai_dataset0406/structure/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/yuhai_dataset/yuhai_dataset0406/structure/annotations/val_cat_mode_addok.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/yuhai_dataset/yuhai_dataset0406/structure/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/yuhai_dataset/yuhai_dataset0406/structure/val_cat_mode_addok.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=100, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=2, metric='bbox') | |||||
checkpoint_config = dict(interval=2) | |||||
log_config = dict( | |||||
interval=1, | |||||
hooks=[ | |||||
dict(type='TextLoggerHook'), | |||||
# dict(type='TensorboardLoggerHook') | |||||
]) |
@@ -0,0 +1,146 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=4, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('posun', 'huahen', 'yiwu', 'cashang') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(2048, 2048), (2560, 2560)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(2560, 2560)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=2, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/yuhai_dataset/yuhai_dataset0406/side/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/yuhai_dataset/yuhai_dataset0406/side/annotations/train_cat_mode_addok.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/yuhai_dataset/yuhai_dataset0406/side/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/yuhai_dataset/yuhai_dataset0406/side/annotations/val_cat_mode_addok.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/yuhai_dataset/yuhai_dataset0406/side/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/yuhai_dataset/yuhai_dataset0406/side/val_cat_mode_addok.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=1000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=2, metric='bbox') | |||||
checkpoint_config = dict(interval=2) | |||||
log_config = dict( | |||||
interval=10, | |||||
hooks=[ | |||||
dict(type='TextLoggerHook'), | |||||
# dict(type='TensorboardLoggerHook') | |||||
]) |
@@ -0,0 +1,146 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=2, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('zangwuyise', 'guashang') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(1024, 1024), (1280, 1280)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(1280, 1280)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=2, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/yuhai_dataset/yuhai_dataset0406/surf/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/yuhai_dataset/yuhai_dataset0406/surf/annotations/train_cat_mode_addok.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/yuhai_dataset/yuhai_dataset0406/surf/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/yuhai_dataset/yuhai_dataset0406/surf/annotations/val_cat_mode_addok.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/yuhai_dataset/yuhai_dataset0406/surf/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/yuhai_dataset/yuhai_dataset0406/surf/val_cat_mode_addok.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=100, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=2, metric='bbox') | |||||
checkpoint_config = dict(interval=2) | |||||
log_config = dict( | |||||
interval=1, | |||||
hooks=[ | |||||
dict(type='TextLoggerHook'), | |||||
# dict(type='TensorboardLoggerHook') | |||||
]) |
@@ -0,0 +1,137 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=4, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=4, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=4, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('0','1','2','3') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(768, 768), (1024, 1024)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(768, 768), (1024, 1024)], | |||||
flip=True, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=4, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/train/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/annotations/instances_train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/val/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/annotations/instances_val.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/val/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/annotations/instances_val.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=2000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,137 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=4, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=4, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=4, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('0','1','2','3') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(768, 768), (1024, 1024)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(768, 768), (1024, 1024)], | |||||
flip=True, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=4, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/train/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/annotations/instances_train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/val/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/annotations/instances_val.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/val/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/annotations/instances_val.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=1000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,137 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=4, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=4, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=4, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('0','1','2','3') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(768, 768), (1024, 1024)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(768, 768), (1024, 1024)], | |||||
flip=True, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=4, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/train/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/annotations/instances_train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/val/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/annotations/instances_val.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/val/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/annotations/instances_val.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=500, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,137 @@ | |||||
_base_ = '../cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py' | |||||
model = dict( | |||||
neck=dict( | |||||
type='FPN',#FPN PAFPN | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
num_outs=5), | |||||
roi_head=dict( | |||||
bbox_head=[ | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=4, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=4, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.05, 0.05, 0.1, 0.1]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, | |||||
loss_weight=1.0)), | |||||
dict( | |||||
type='Shared2FCBBoxHead', | |||||
in_channels=256, | |||||
fc_out_channels=1024, | |||||
roi_feat_size=7, | |||||
num_classes=4, | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[0., 0., 0., 0.], | |||||
target_stds=[0.033, 0.033, 0.067, 0.067]), | |||||
reg_class_agnostic=True, | |||||
loss_cls=dict( | |||||
type='CrossEntropyLoss', | |||||
use_sigmoid=False, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) | |||||
])) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('ground', 'offground', 'ground_safebelt', 'offground_safebelt') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(768, 768), (1024, 1024)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(768, 768), (1024, 1024)], | |||||
flip=True, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=4, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/val/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/annotations/instances_val.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/val/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/annotations/instances_val.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/val/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/userdata/datasets/gs_dataset/coco/annotations/instances_val.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=2000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=5, metric='bbox') |
@@ -0,0 +1,143 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
norm_cfg = dict(type='BN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='/home/kaijie-tang/userdata/qizhi/code_test/resnext101_64x4d-ee2c6f71.pth')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=80, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.0001, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(512, 512), (640, 640)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(640, 640)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=4, | |||||
workers_per_gpu=2, | |||||
train=dict( | |||||
type='AD_ClassBalancedDataset', | |||||
dataset=dict( | |||||
type=dataset_type, | |||||
img_prefix='data/coco/train2017/', | |||||
classes=classes, | |||||
ann_file='data/coco/annotations/instances_train2017.json', | |||||
pipeline=train_pipeline, | |||||
), | |||||
oversample_thr = 1.0), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='data/coco/train2017/', | |||||
classes=classes, | |||||
ann_file='data/coco/annotations/instances_train2017.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='data/coco/train2017/', | |||||
classes=classes, | |||||
ann_file='data/coco/annotations/instances_train2017.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=1000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=20) | |||||
evaluation = dict(interval=1, metric='bbox') | |||||
checkpoint_config = dict(interval=1) |
@@ -0,0 +1,139 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_11_12_hard_score_01/train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_11_12_hard_score_01/train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=1000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=2, metric='bbox') | |||||
checkpoint_config = dict(interval=2) |
@@ -0,0 +1,139 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=8, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_11_12_hard_score_02/train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_11_12_hard_score_02/train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=1000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=2, metric='bbox') | |||||
checkpoint_config = dict(interval=2) |
@@ -0,0 +1,139 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=8, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_11_12_hard_score_03/train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_11_12_hard_score_03/train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=1000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=2, metric='bbox') | |||||
checkpoint_config = dict(interval=2) |
@@ -0,0 +1,139 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=16, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_11_12_hard_score_04/train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_11_12_hard_score_04/train/annotations/train.json', | |||||
pipeline=train_pipeline), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=1000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=2, metric='bbox') | |||||
checkpoint_config = dict(interval=2) |
@@ -0,0 +1,143 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=8, | |||||
workers_per_gpu=6, | |||||
train=dict( | |||||
type='AD_ClassBalancedDataset', | |||||
dataset=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_11_12_hard_score_04/train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_11_12_hard_score_04/train/annotations/train.json', | |||||
pipeline=train_pipeline, | |||||
), | |||||
oversample_thr = 1.0), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=1000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=2, metric='bbox') | |||||
checkpoint_config = dict(interval=2) |
@@ -0,0 +1,143 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=8, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type='AD_ClassBalancedDataset', | |||||
dataset=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_11_12_hard_score_03/train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_11_12_hard_score_03/train/annotations/train.json', | |||||
pipeline=train_pipeline, | |||||
), | |||||
oversample_thr = 1.0), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=1000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=60) | |||||
evaluation = dict(interval=2, metric='bbox') | |||||
checkpoint_config = dict(interval=2) |
@@ -0,0 +1,143 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=8, | |||||
workers_per_gpu=6, | |||||
train=dict( | |||||
type='AD_ClassBalancedDataset', | |||||
dataset=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_11_12_hard_score_02/train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_11_12_hard_score_02/train/annotations/train.json', | |||||
pipeline=train_pipeline, | |||||
), | |||||
oversample_thr = 1.0), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=1000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=20) | |||||
evaluation = dict(interval=1, metric='bbox') | |||||
checkpoint_config = dict(interval=1) |
@@ -0,0 +1,143 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=8, | |||||
workers_per_gpu=8, | |||||
train=dict( | |||||
type='AD_ClassBalancedDataset', | |||||
dataset=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_11_12_hard_score_01/train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_11_12_hard_score_01/train/annotations/train.json', | |||||
pipeline=train_pipeline, | |||||
), | |||||
oversample_thr = 1.0), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=1000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=20) | |||||
evaluation = dict(interval=1, metric='bbox') | |||||
checkpoint_config = dict(interval=1) |
@@ -0,0 +1,143 @@ | |||||
_base_ = [ | |||||
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' | |||||
] | |||||
norm_cfg = dict(type='SyncBN', requires_grad=True) | |||||
model = dict( | |||||
type='ATSS', | |||||
backbone=dict( | |||||
type='ResNeXt', | |||||
depth=101, | |||||
groups=64, | |||||
base_width=4, | |||||
num_stages=4, | |||||
out_indices=(0, 1, 2, 3), | |||||
frozen_stages=1, | |||||
norm_cfg=norm_cfg, | |||||
style='pytorch', | |||||
init_cfg=dict( | |||||
type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')), | |||||
neck=dict( | |||||
type='FPN', | |||||
in_channels=[256, 512, 1024, 2048], | |||||
out_channels=256, | |||||
start_level=1, | |||||
add_extra_convs='on_output', | |||||
num_outs=5), | |||||
bbox_head=dict( | |||||
type='ATSSHead', | |||||
num_classes=11, | |||||
in_channels=256, | |||||
stacked_convs=4, | |||||
feat_channels=256, | |||||
anchor_generator=dict( | |||||
type='AnchorGenerator', | |||||
ratios=[1.0], | |||||
octave_base_scale=8, | |||||
scales_per_octave=1, | |||||
strides=[8, 16, 32, 64, 128]), | |||||
bbox_coder=dict( | |||||
type='DeltaXYWHBBoxCoder', | |||||
target_means=[.0, .0, .0, .0], | |||||
target_stds=[0.1, 0.1, 0.2, 0.2]), | |||||
loss_cls=dict( | |||||
type='FocalLoss', | |||||
use_sigmoid=True, | |||||
gamma=2.0, | |||||
alpha=0.25, | |||||
loss_weight=1.0), | |||||
loss_bbox=dict(type='GIoULoss', loss_weight=2.0), | |||||
loss_centerness=dict( | |||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), | |||||
# training and testing settings | |||||
train_cfg=dict( | |||||
assigner=dict(type='ATSSAssigner', topk=9), | |||||
allowed_border=-1, | |||||
pos_weight=-1, | |||||
debug=False), | |||||
test_cfg=dict( | |||||
nms_pre=1000, | |||||
min_bbox_size=0, | |||||
score_thr=0.05, | |||||
nms=dict(type='nms', iou_threshold=0.6), | |||||
max_per_img=100)) | |||||
dataset_type = 'CocoDataset' | |||||
classes = ('yiwei','loujian','celi','libei','fantie','lianxi','duojian','shunjian','shaoxi','jiahan','yiwu') | |||||
img_norm_cfg = dict( | |||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) | |||||
train_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict(type='LoadAnnotations', with_bbox=True), | |||||
dict( | |||||
type='Resize', | |||||
img_scale=[(400, 400), (500, 500)], | |||||
multiscale_mode='value', | |||||
keep_ratio=True), | |||||
dict(type='RandomFlip', flip_ratio=[0.2,0.2,0.2], direction=['horizontal', 'vertical', 'diagonal']), | |||||
dict(type='BrightnessTransform', level=5, prob=0.5), | |||||
dict(type='ContrastTransform', level=5, prob=0.5), | |||||
dict(type='RandomShift', shift_ratio=0.5), | |||||
dict(type='MinIoURandomCrop', min_ious=(0.5, 0.7, 0.9), min_crop_size=0.8), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='DefaultFormatBundle'), | |||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), | |||||
] | |||||
test_pipeline = [ | |||||
dict(type='LoadImageFromFile'), | |||||
dict( | |||||
type='MultiScaleFlipAug', | |||||
img_scale=[(400, 400)], | |||||
flip=False, | |||||
transforms=[ | |||||
dict(type='Resize', keep_ratio=True), | |||||
dict(type='RandomFlip'), | |||||
dict(type='Normalize', **img_norm_cfg), | |||||
dict(type='Pad', size_divisor=32), | |||||
dict(type='ImageToTensor', keys=['img']), | |||||
dict(type='Collect', keys=['img']), | |||||
]) | |||||
] | |||||
data = dict( | |||||
samples_per_gpu=8, | |||||
workers_per_gpu=6, | |||||
train=dict( | |||||
type='AD_ClassBalancedDataset', | |||||
dataset=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2106_10_sample_11_12_coco/train/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2106_10_sample_11_12_coco/train/annotations/train.json', | |||||
pipeline=train_pipeline, | |||||
), | |||||
oversample_thr = 1.0), | |||||
val=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/annotations/test.json', | |||||
pipeline=test_pipeline), | |||||
test=dict( | |||||
type=dataset_type, | |||||
img_prefix='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/images/', | |||||
classes=classes, | |||||
ann_file='/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_2112_coco/test/annotations/test.json', | |||||
pipeline=test_pipeline)) | |||||
# optimizer | |||||
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) | |||||
optimizer_config = dict(grad_clip=None) | |||||
# learning policy | |||||
lr_config = dict( | |||||
policy='CosineAnnealing', | |||||
warmup='linear', | |||||
warmup_iters=1000, | |||||
warmup_ratio=1.0 / 10, | |||||
min_lr_ratio=1e-5) | |||||
runner = dict(type='EpochBasedRunner', max_epochs=20) | |||||
evaluation = dict(interval=1, metric='bbox') | |||||
checkpoint_config = dict(interval=1) |