Browse Source

add test case

master
ly119399 3 years ago
parent
commit
f63c37fc1f
2 changed files with 68 additions and 96 deletions
  1. +2
    -72
      modelscope/preprocessors/space/dst_processors.py
  2. +66
    -24
      tests/pipelines/nlp/test_dialog_state_tracking.py

+ 2
- 72
modelscope/preprocessors/space/dst_processors.py View File

@@ -462,39 +462,6 @@ class multiwoz22Processor(DSTProcessor):
utt_tok_list.append(self.tokenize(
utt['text'])) # normalize utterances

# modified_slots = {}

# If sys utt, extract metadata (identify and collect modified slots)
# if is_sys_utt:
# for d in utt['metadata']:
# booked = utt['metadata'][d]['book']['booked']
# booked_slots = {}
# # Check the booked section
# if booked != []:
# for s in booked[0]:
# booked_slots[s] = self.normalize_label(
# '%s-%s' % (d, s),
# booked[0][s]) # normalize labels
# # Check the semi and the inform slots
# for category in ['book', 'semi']:
# for s in utt['metadata'][d][category]:
# cs = '%s-book_%s' % (
# d, s) if category == 'book' else '%s-%s' % (d,
# s)
# value_label = self.normalize_label(
# cs, utt['metadata'][d][category]
# [s]) # normalize labels
# # Prefer the slot value as stored in the booked section
# if s in booked_slots:
# value_label = booked_slots[s]
# # Remember modified slots and entire dialog state
# if cs in slot_list and cumulative_labels[
# cs] != value_label:
# modified_slots[cs] = value_label
# cumulative_labels[cs] = value_label
#
# mod_slots_list.append(modified_slots.copy())

# Form proper (usr, sys) turns
turn_itr = 0
diag_seen_slots_dict = {}
@@ -938,8 +905,8 @@ def convert_examples_to_features(examples,
# Account for [CLS], [SEP], [SEP], [SEP] with "- 4" (BERT)
if len(tokens_a) + len(tokens_b) + len(
history) > max_seq_length - model_specs['TOKEN_CORRECTION']:
logger.info('Truncate Example %s. Total len=%d.' %
(guid, len(tokens_a) + len(tokens_b) + len(history)))
# logger.info('Truncate Example %s. Total len=%d.' %
# (guid, len(tokens_a) + len(tokens_b) + len(history)))
input_text_too_long = True
else:
input_text_too_long = False
@@ -968,7 +935,6 @@ def convert_examples_to_features(examples,

def _get_start_end_pos(class_type, token_label_ids, max_seq_length):
if class_type == 'copy_value' and 1 not in token_label_ids:
# logger.warn("copy_value label, but token_label not detected. Setting label to 'none'.")
class_type = 'none'
start_pos = 0
end_pos = 0
@@ -1045,9 +1011,6 @@ def convert_examples_to_features(examples,
features = []
# Convert single example
for (example_index, example) in enumerate(examples):
if example_index % 1000 == 0:
logger.info('Writing example %d of %d' %
(example_index, len(examples)))

total_cnt += 1

@@ -1075,17 +1038,6 @@ def convert_examples_to_features(examples,
model_specs, example.guid)

if input_text_too_long:
if example_index < 10:
if len(token_labels_a) > len(tokens_a):
logger.info(' tokens_a truncated labels: %s'
% str(token_labels_a[len(tokens_a):]))
if len(token_labels_b) > len(tokens_b):
logger.info(' tokens_b truncated labels: %s'
% str(token_labels_b[len(tokens_b):]))
if len(token_labels_history) > len(tokens_history):
logger.info(
' tokens_history truncated labels: %s'
% str(token_labels_history[len(tokens_history):]))

token_labels_a = token_labels_a[:len(tokens_a)]
token_labels_b = token_labels_b[:len(tokens_b)]
@@ -1136,25 +1088,6 @@ def convert_examples_to_features(examples,

assert (len(input_ids) == len(input_ids_unmasked))

# if example_index < 10:
# logger.info('*** Example ***')
# logger.info('guid: %s' % (example.guid))
# logger.info('tokens: %s' % ' '.join(tokens))
# logger.info('input_ids: %s' % ' '.join([str(x)
# for x in input_ids]))
# logger.info('input_mask: %s'
# % ' '.join([str(x) for x in input_mask]))
# logger.info('segment_ids: %s'
# % ' '.join([str(x) for x in segment_ids]))
# logger.info('start_pos: %s' % str(start_pos_dict))
# logger.info('end_pos: %s' % str(end_pos_dict))
# logger.info('values: %s' % str(value_dict))
# logger.info('inform: %s' % str(inform_dict))
# logger.info('inform_slot: %s' % str(inform_slot_dict))
# logger.info('refer_id: %s' % str(refer_id_dict))
# logger.info('diag_state: %s' % str(diag_state_dict))
# logger.info('class_label_id: %s' % str(class_label_id_dict))

features.append(
InputFeatures(
guid=example.guid,
@@ -1171,9 +1104,6 @@ def convert_examples_to_features(examples,
diag_state=diag_state_dict,
class_label_id=class_label_id_dict))

logger.info('========== %d out of %d examples have text too long' %
(too_long_cnt, total_cnt))

return features




+ 66
- 24
tests/pipelines/nlp/test_dialog_state_tracking.py View File

@@ -15,25 +15,66 @@ class DialogStateTrackingTest(unittest.TestCase):
model_id = 'damo/nlp_space_dialog-state-tracking'
test_case = [{
'User-1':
'am looking for a place to to stay that has cheap price range it should be in a type of hotel'
'Hi, I\'m looking for a train that is going to cambridge and arriving there by 20:45, '
'is there anything like that?'
}, {
'System-1':
'Okay, do you have a specific area you want to stay in?',
'There are over 1,000 trains like that. Where will you be departing from?',
'Dialog_Act-1': {
'Hotel-Request': [['Area', '?']]
'Train-Inform': [['Choice', 'over 1'], ['Choice', '000']],
'Train-Request': [['Depart', '?']]
},
'User-2':
"no, i just need to make sure it's cheap. oh, and i need parking"
'User-2': 'I am departing from birmingham new street.'
}, {
'System-2':
'I found 1 cheap hotel for you that includes parking. Do you like me to book it?',
'System-2': 'Can you confirm your desired travel day?',
'Dialog_Act-2': {
'Booking-Inform': [['none', 'none']],
'Hotel-Inform': [['Price', 'cheap'], ['Choice', '1'],
['Parking', 'none']]
'Train-Request': [['Day', '?']]
},
'User-3':
'Yes, please. 6 people 3 nights starting on tuesday.'
'User-3': 'I would like to leave on wednesday'
}, {
'System-3':
'I show a train leaving birmingham new street at 17:40 and arriving at 20:23 on Wednesday. '
'Will this work for you?',
'Dialog_Act-3': {
'Train-Inform': [['Arrive', '20:23'], ['Leave', '17:40'],
['Day', 'Wednesday'],
['Depart', 'birmingham new street']]
},
'User-4':
'That will, yes. Please make a booking for 5 people please.',
}, {
'System-4':
'I\'ve booked your train tickets, and your reference number is A9NHSO9Y.',
'Dialog_Act-4': {
'Train-OfferBooked': [['Ref', 'A9NHSO9Y']]
},
'User-5':
'Thanks so much. I would also need a place to say. '
'I am looking for something with 4 stars and has free wifi.'
}, {
'System-5':
'How about the cambridge belfry? '
'It has all the attributes you requested and a great name! '
'Maybe even a real belfry?',
'Dialog_Act-5': {
'Hotel-Recommend': [['Name', 'the cambridge belfry']]
},
'User-6':
'That sounds great, could you make a booking for me please?',
}, {
'System-6':
'What day would you like your booking for?',
'Dialog_Act-6': {
'Booking-Request': [['Day', '?']]
},
'User-7':
'Please book it for Wednesday for 5 people and 5 nights, please.',
}, {
'System-7': 'Booking was successful. Reference number is : 5NAWGJDC.',
'Dialog_Act-7': {
'Booking-Book': [['Ref', '5NAWGJDC']]
},
'User-8': 'Thank you, goodbye',
}]

def test_run(self):
@@ -51,21 +92,22 @@ class DialogStateTrackingTest(unittest.TestCase):
preprocessor=preprocessor)
]

history_states = [{}]
utter = {}
pipelines_len = len(pipelines)
import json
for step, item in enumerate(self.test_case):
utter.update(item)
result = pipelines[step % pipelines_len]({
'utter':
utter,
'history_states':
history_states
})
print(json.dumps(result))
for _test_case in self.test_case:
history_states = [{}]
utter = {}
for step, item in enumerate(_test_case):
utter.update(item)
result = pipelines[step % pipelines_len]({
'utter':
utter,
'history_states':
history_states
})
print(json.dumps(result))

history_states.extend([result['dialog_states'], {}])
history_states.extend([result['dialog_states'], {}])

@unittest.skip('test with snapshot_download')
def test_run_with_model_from_modelhub(self):


Loading…
Cancel
Save