@misc { ,
	title = {Evaluating human-machine conversation for appropriateness.},
	abstract = {Evaluation of complex, collaborative dialogue systems is a difficult task.Traditionally, developers have relied upon subjective feedback from the user,and parametrisation over observable metrics. However, both models place somereliance on the notion of a task; that is, the system is helping to userachieve some clearly defined goal, such as book a flight or complete a bankingtransaction. It is not clear that such metrics are as useful when dealing witha system that has a more complex task, or even no definable task at all, beyondmaintain and performing a collaborative dialogue. Working within the EU fundedCOMPANIONS program, we investigate the use of appropriateness as a measure ofconversation quality, the hypothesis being that good companions need to be goodconversational partners . We report initial work in the direction of annotatingdialogue for indicators of good conversation, including the annotation andcomparison of the output of two generations of the same dialogue system},
	conference = {IREC 2010, Seventh International Conference on Language Resources and Evaluation},
	isbn = {2951740867},
	organization = {Valletta, Malta},
	pages = {84-91},
	publicationstatus = {Published},
	publisher = {European Language Resources Association (ELRA)},
	url = {http://researchrepository.napier.ac.uk/id/eprint/3767},
	keyword = {004 Data processing & computer science, QA76 Computer software, Dialogue, Evaluation methodologies, Usability, user satisfaction;},
	year = {2010},
	author = {Webb, Nick and Benyon, David and Hansen, Preben and Mival, Oli}
}