by Atkinson, MP, van Hemert, J, Han, L, Hume, A and Liew, CS
Abstract:
This paper presents the rationale for a new architecture to support a significant increase in the scale of data integration and data mining. It proposes the composition into one framework of (1) data mining and (2) data access and integration. We name the combined activity DMI It supports enactment of DMI processes across heterogeneous and distributed data resources and data mining services. It posits that a useful division can be made between the facilities established to support the definition of DMI processes and the computational infrastructure provided to enact DMI processes. Communication between those two divisions is restricted to requests submitted to gateway services in a canonical DMI language. Larger-scale processes are enabled by incremental refinement of DMI-process definitions often by recomposition of lower-level definitions. Autonomous evolution of data resources and services is supported by types and descriptions which will support detection of inconsistencies and semi-automatic insertion of adaptations. These architectural ideas are being evaluated in a feasibility study that involves an application scenario and representatives of the community.
Reference:
A distributed architecture for data mining and integration (Atkinson, MP, van Hemert, J, Han, L, Hume, A and Liew, CS), In Data-Aware Distributed Computing, ACM, volume 2, 2009.
Bibtex Entry:
@article{Atkinson2009a,
_day = {07},
abstract = {This paper presents the rationale for a new architecture to support a significant increase in the scale of data integration and data mining. It proposes the composition into one framework of (1) data mining and (2) data access and integration. We name the combined activity DMI It supports enactment of DMI processes across heterogeneous and distributed data resources and data mining services. It posits that a useful division can be made between the facilities established to support the definition of DMI processes and the computational infrastructure provided to enact DMI processes. Communication between those two divisions is restricted to requests submitted to gateway services in a canonical DMI language. Larger-scale processes are enabled by incremental refinement of DMI-process definitions often by recomposition of lower-level definitions. Autonomous evolution of data resources and services is supported by types and descriptions which will support detection of inconsistencies and semi-automatic insertion of adaptations. These architectural ideas are being evaluated in a feasibility study that involves an application scenario and representatives of the community.},
address = {New York, NY, USA},
author = {Atkinson, MP and van Hemert, J and Han, L and Hume, A and Liew, CS},
date-added = {2009-06-19 18:45:47 +0100},
date-modified = {2009-09-30 13:59:11 +0100},
doi = {http://doi.acm.org/10.1145/1552280.1552282},
isbn = {978-1-60558-589-5},
journal = {Data-Aware Distributed Computing},
keywords = {data mining; data integration; distributed systems},
location = {Garching, Germany},
pages = {11--20},
publisher = {ACM},
title = {A distributed architecture for data mining and integration},
volume = {2},
year = {2009},
bdsk-url-1 = {http://doi.acm.org/10.1145/1552280.1552282}}