From 7b7edcc66615f0b2e12a68cbced5885179d3a450 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=A8=80=E6=9E=AB?= Date: Wed, 29 Jun 2022 15:17:35 +0800 Subject: [PATCH] change processor --- .../preprocessors/space/dst_processors.py | 616 +++++++++--------- 1 file changed, 313 insertions(+), 303 deletions(-) diff --git a/modelscope/preprocessors/space/dst_processors.py b/modelscope/preprocessors/space/dst_processors.py index c5ce14af..fae53995 100644 --- a/modelscope/preprocessors/space/dst_processors.py +++ b/modelscope/preprocessors/space/dst_processors.py @@ -33,318 +33,324 @@ DIALOG_ACT = 'Dialog_Act' utter1 = { 'User-1': - "I'd really like to take my client out to a nice restaurant that serves indian food." + "I need train reservations from norwich to cambridge" } history_states1 = [ {}, ] utter2 = { 'User-1': - "I'd really like to take my client out to a nice restaurant that serves indian food.", + "I need train reservations from norwich to cambridge", 'System-1': - 'I show many restaurants that serve Indian food in that price range. What area would you like to travel to?', + 'I have 133 trains matching your request. Is there a specific day and time you would like to travel?', 'Dialog_Act-1': { - 'Restaurant-Inform': [['choice', 'many'], ['food', 'Indian'], - ['pricerange', 'that price range']] + "Train-Inform": [ + [ + "Choice", + "133" + ] + ], + "Train-Request": [ + [ + "Leave", + "?" + ], + [ + "Day", + "?" + ] + ] }, 'User-2': - 'I am looking for an expensive indian restaurant in the area of centre.', + 'I\'d like to leave on Monday and arrive by 18:00.', } history_states2 = [{}, { - 'attraction': { - 'book': { - 'booked': [] - }, - 'semi': { - 'area': '', - 'name': '', - 'type': '' - } - }, - 'hospital': { - 'book': { - 'booked': [] - }, - 'semi': { - 'department': '' - } - }, - 'hotel': { - 'book': { - 'booked': [{ - 'name': 'alexander bed and breakfast', - 'reference': 'JXVKZ7KV' - }], - 'day': - 'sunday', - 'people': - '6', - 'stay': - '4' - }, - 'semi': { - 'area': '', - 'internet': 'yes', - 'name': 'alexander bed and breakfast', - 'parking': 'yes', - 'pricerange': 'cheap', - 'stars': '', - 'type': 'guesthouse' - } - }, - 'police': { - 'book': { - 'booked': [] - }, - 'semi': {} - }, - 'restaurant': { - 'book': { - 'booked': [{ - 'name': 'ask', - 'reference': 'Y2Y8QYBY' - }], - 'day': 'sunday', - 'people': '6', - 'time': '18:45' - }, - 'semi': { - 'area': 'centre', - 'food': 'italian', - 'name': 'ask', - 'pricerange': 'cheap' - } - }, - 'taxi': { - 'book': { - 'booked': [] - }, - 'semi': { - 'arriveBy': '', - 'departure': '', - 'destination': '', - 'leaveAt': '' - } - }, - 'train': { - 'book': { - 'booked': [], - 'people': '' - }, - 'semi': { - 'arriveBy': '', - 'day': '', - 'departure': '', - 'destination': '', - 'leaveAt': '' - } - } -}, {}] + "taxi": { + "book": { + "booked": [] + }, + "semi": { + "leaveAt": "", + "destination": "", + "departure": "", + "arriveBy": "" + } + }, + "police": { + "book": { + "booked": [] + }, + "semi": {} + }, + "restaurant": { + "book": { + "booked": [], + "time": "", + "day": "", + "people": "" + }, + "semi": { + "food": "", + "pricerange": "", + "name": "", + "area": "" + } + }, + "hospital": { + "book": { + "booked": [] + }, + "semi": { + "department": "" + } + }, + "hotel": { + "book": { + "booked": [], + "stay": "", + "day": "", + "people": "" + }, + "semi": { + "name": "", + "area": "", + "parking": "", + "pricerange": "", + "stars": "", + "internet": "", + "type": "" + } + }, + "attraction": { + "book": { + "booked": [] + }, + "semi": { + "type": "", + "name": "", + "area": "" + } + }, + "train": { + "book": { + "booked": [], + "people": "" + }, + "semi": { + "leaveAt": "not mentioned", + "destination": "cambridge", + "day": "not mentioned", + "arriveBy": "not mentioned", + "departure": "norwich" + } + } + }, {}] utter3 = { 'User-1': - "I'd really like to take my client out to a nice restaurant that serves indian food.", + "I need train reservations from norwich to cambridge", 'System-1': - 'I show many restaurants that serve Indian food in that price range. What area would you like to travel to?', + 'I have 133 trains matching your request. Is there a specific day and time you would like to travel?', 'Dialog_Act-1': { - 'Restaurant-Inform': [['choice', 'many'], ['food', 'Indian'], - ['pricerange', 'that price range']] + "Train-Inform": [ + [ + "Choice", + "133" + ] + ], + "Train-Request": [ + [ + "Leave", + "?" + ], + [ + "Day", + "?" + ] + ] }, 'User-2': - 'I am looking for an expensive indian restaurant in the area of centre.', + 'I\'d like to leave on Monday and arrive by 18:00.', 'System-2': - 'Might I recommend Saffron Brasserie? That is an expensive Indian restaurant ' - 'in the center of town. I can book a table for you, if you like.', + 'There are 12 trains for the day and time you request. Would you like to book it now?', 'Dialog_Act-2': { - 'Restaurant-Recommend': [['area', 'center of town'], - ['food', 'Indian'], - ['name', 'Saffron Brasserie'], - ['pricerange', 'expensive']] + "Train-Inform": [ + [ + "Choice", + "12" + ] + ], + "Train-OfferBook": [ + [ + "none", + "none" + ] + ] }, 'User-3': - 'Sure thing, please book for 6 people at 19:30 on Saturday.' + 'Before booking, I would also like to know the travel time, price, and departure time please.' } history_states3 = [{}, { - 'attraction': { - 'book': { - 'booked': [] - }, - 'semi': { - 'area': '', - 'name': '', - 'type': '' - } - }, - 'hospital': { - 'book': { - 'booked': [] - }, - 'semi': { - 'department': '' - } - }, - 'hotel': { - 'book': { - 'booked': [{ - 'name': 'alexander bed and breakfast', - 'reference': 'JXVKZ7KV' - }], - 'day': - 'sunday', - 'people': - '6', - 'stay': - '4' - }, - 'semi': { - 'area': '', - 'internet': 'yes', - 'name': 'alexander bed and breakfast', - 'parking': 'yes', - 'pricerange': 'cheap', - 'stars': '', - 'type': 'guesthouse' - } - }, - 'police': { - 'book': { - 'booked': [] - }, - 'semi': {} - }, - 'restaurant': { - 'book': { - 'booked': [{ - 'name': 'ask', - 'reference': 'Y2Y8QYBY' - }], - 'day': 'sunday', - 'people': '6', - 'time': '18:45' - }, - 'semi': { - 'area': 'centre', - 'food': 'italian', - 'name': 'ask', - 'pricerange': 'cheap' - } - }, - 'taxi': { - 'book': { - 'booked': [] - }, - 'semi': { - 'arriveBy': '', - 'departure': '', - 'destination': '', - 'leaveAt': '' - } - }, - 'train': { - 'book': { - 'booked': [], - 'people': '' - }, - 'semi': { - 'arriveBy': '', - 'day': '', - 'departure': '', - 'destination': '', - 'leaveAt': '' - } - } -}, {}, { - 'attraction': { - 'book': { - 'booked': [] - }, - 'semi': { - 'area': '', - 'name': '', - 'type': '' - } - }, - 'hospital': { - 'book': { - 'booked': [] - }, - 'semi': { - 'department': '' - } - }, - 'hotel': { - 'book': { - 'booked': [{ - 'name': 'alexander bed and breakfast', - 'reference': 'JXVKZ7KV' - }], - 'day': - 'sunday', - 'people': - '6', - 'stay': - '4' - }, - 'semi': { - 'area': '', - 'internet': 'yes', - 'name': 'alexander bed and breakfast', - 'parking': 'yes', - 'pricerange': 'cheap', - 'stars': '', - 'type': 'guesthouse' - } - }, - 'police': { - 'book': { - 'booked': [] - }, - 'semi': {} - }, - 'restaurant': { - 'book': { - 'booked': [{ - 'name': 'ask', - 'reference': 'Y2Y8QYBY' - }], - 'day': 'sunday', - 'people': '6', - 'time': '18:45' - }, - 'semi': { - 'area': 'centre', - 'food': 'italian', - 'name': 'ask', - 'pricerange': 'cheap' - } - }, - 'taxi': { - 'book': { - 'booked': [] - }, - 'semi': { - 'arriveBy': '', - 'departure': '', - 'destination': '', - 'leaveAt': '' - } - }, - 'train': { - 'book': { - 'booked': [], - 'people': '' - }, - 'semi': { - 'arriveBy': '', - 'day': '', - 'departure': '', - 'destination': '', - 'leaveAt': '' - } - } -}, {}] + "taxi": { + "book": { + "booked": [] + }, + "semi": { + "leaveAt": "", + "destination": "", + "departure": "", + "arriveBy": "" + } + }, + "police": { + "book": { + "booked": [] + }, + "semi": {} + }, + "restaurant": { + "book": { + "booked": [], + "time": "", + "day": "", + "people": "" + }, + "semi": { + "food": "", + "pricerange": "", + "name": "", + "area": "" + } + }, + "hospital": { + "book": { + "booked": [] + }, + "semi": { + "department": "" + } + }, + "hotel": { + "book": { + "booked": [], + "stay": "", + "day": "", + "people": "" + }, + "semi": { + "name": "", + "area": "", + "parking": "", + "pricerange": "", + "stars": "", + "internet": "", + "type": "" + } + }, + "attraction": { + "book": { + "booked": [] + }, + "semi": { + "type": "", + "name": "", + "area": "" + } + }, + "train": { + "book": { + "booked": [], + "people": "" + }, + "semi": { + "leaveAt": "not mentioned", + "destination": "cambridge", + "day": "not mentioned", + "arriveBy": "not mentioned", + "departure": "norwich" + } + } + }, {}, {"taxi": { + "book": { + "booked": [] + }, + "semi": { + "leaveAt": "", + "destination": "", + "departure": "", + "arriveBy": "" + } + }, + "police": { + "book": { + "booked": [] + }, + "semi": {} + }, + "restaurant": { + "book": { + "booked": [], + "time": "", + "day": "", + "people": "" + }, + "semi": { + "food": "", + "pricerange": "", + "name": "", + "area": "" + } + }, + "hospital": { + "book": { + "booked": [] + }, + "semi": { + "department": "" + } + }, + "hotel": { + "book": { + "booked": [], + "stay": "", + "day": "", + "people": "" + }, + "semi": { + "name": "", + "area": "", + "parking": "", + "pricerange": "", + "stars": "", + "internet": "", + "type": "" + } + }, + "attraction": { + "book": { + "booked": [] + }, + "semi": { + "type": "", + "name": "", + "area": "" + } + }, + "train": { + "book": { + "booked": [], + "people": "" + }, + "semi": { + "leaveAt": "not mentioned", + "destination": "cambridge", + "day": "monday", + "arriveBy": "18:00", + "departure": "norwich" + } + }}, {}] class DSTProcessor(object): @@ -821,6 +827,10 @@ class multiwoz22Processor(DSTProcessor): new_hst_utt_tok_label_dict = hst_utt_tok_label_dict.copy() new_diag_state = diag_state.copy() + ###### + mod_slots_list = [] + ##### + for i in range(0, len(utt_tok_list) - 1, 2): sys_utt_tok_label_dict = {} usr_utt_tok_label_dict = {} @@ -977,23 +987,23 @@ class multiwoz22Processor(DSTProcessor): example = DSTExample( guid=guid, - text_a=txt_a, - text_b=txt_b, - history=hst_utt_tok, - text_a_label=txt_a_lbl, - text_b_label=txt_b_lbl, - history_label=hst_utt_tok_label_dict, - values=diag_seen_slots_value_dict.copy(), - inform_label=inform_dict, - inform_slot_label=inform_slot_dict, - refer_label=referral_dict, - diag_state=diag_state, - class_label=class_type_dict) - # Update some variables. - hst_utt_tok_label_dict = new_hst_utt_tok_label_dict.copy() - diag_state = new_diag_state.copy() - - turn_itr += 1 + text_a=txt_a, # 必要 input, 对话文本 + text_b=txt_b, # 必要 input, 对话文本 + history=hst_utt_tok, # 必要 input, 对话文本 + text_a_label=txt_a_lbl, # 输出label,不管, 最后变成 start/end pos + text_b_label=txt_b_lbl, # 输出label,不管, 最后变成 start/end pos + history_label=hst_utt_tok_label_dict, # 输出label,不管, 最后变成 start/end pos + values=diag_seen_slots_value_dict.copy(), # 后面没用上,不管 + inform_label=inform_dict, # 后面没用上,不管 + inform_slot_label=inform_slot_dict, # 必要 input, 代表 system dialog action + refer_label=referral_dict, # 输出label,不管, 最后变成 refer_id + diag_state=diag_state, # input, 代表 history dialog state + class_label=class_type_dict) # 输出label,不管, 最后变成 class_label_id + # Update some variables. + hst_utt_tok_label_dict = new_hst_utt_tok_label_dict.copy() + diag_state = new_diag_state.copy() + + turn_itr += 1 #### 缩进不正确 return example def create_example(self, @@ -1517,7 +1527,7 @@ if __name__ == '__main__': unk_token = '[UNK]' analyze = False - example = processor.create_example(utter1, history_states1, set_type, + example = processor.create_example(utter3, history_states3, set_type, slot_list, {}, append_history, use_history_labels, swap_utterances, label_value_repetitions,