% Data Mining Bibliographies Copyright Information
% 
% The author reserves the 
% 
%     Copyright (C) 1997 Andy Pryke. All rights reserved.
% 
% for the compilation of this KDD bibliography collection. 
% 
% If you find the bibliography collection useful for your work, I would
% be happy if you acknowledge it and me. You could also send me a
% postcard if you wish (address below).
% 
% I usually give my consent that the collection may be copied and
% distributed with the following conditions:
% 
% 1) It may be used only for research or educational purposes
% 
% and 
% 
% 2) Any copy must be accompanied by a reference to the original
% collection and its author.
% 
% and
% 
% 3) This information must always accompany every copy of a bibliograhy.
% 
% I reserve the right to revoke the above permission at any time. 
% 
% Any other use must be negotiated in advance. 
% 
% Any commercial use of the bibliographies is strictly prohibited. In
% particular, the whole or derived bibliographies may not be sold for
% profit or included in commercial documents (e.g., published on CD-ROM,
% floppy disks, books, magazines, or other print form) without the prior
% written permission of the copyright holder.
% 
% Please contact the author if the intended usage is not covered by the
% above statement.
% 
% Abstracts of publications published by the ACM and the IEEE are also
% subject to the respective "interim" or "provisional" copyright
% policies:
% 
%     ACM copyright policy (http://www.acm.org/pubs/copyright_policy/)
%     IEEE copyright policy (http://www.ieee.org/copyright/policies.htm)
% 
% This copyright notice is derived from one by Alf-Christian Achilles
% for his (massive) Computer Science Bibliography Collection at 
% (http://liinwww.ira.uka.de/bibliography/index.html).
% 
% --------------------------------------------------------------------
% 
% My address:
% 
% My postal address is:
% 
% Andy Pryke,
% Department of Computer Science,
% The University of Birmingham,
% Edgbaston,
% Birmingham.
% B15 2TT
% 
% Fax  : 0121 414 4281
% Phone: 0121 414 3736
% Email: A.N.Pryke(at)cs.bham.ac.uk
% Web: http://www.cs.bham.ac.uk/~anp/
% 
,
@Article{machine_learning_journal_special:93,
  key =          "Machine_Learning_Journal_Special:93",
  journal =      "Machine Learning Journal",
  year =         "1993",
  volume =       "5",
  number =       "6",
  month =        dec,
  note =         "Special issue on Learning and Discovery in Databases",
}

Improved Methods for Finding Association Rules,
Available as
compressed postscript.
@TechReport{no_author:improved-methods:,
  URL =          "ftp://ftp.cs.helsinki.fi/pub/Reports/by_Project/PMDM/Improved_Methods_for_Finding_Association_Rules.ps.gz",
  title =        "Improved Methods for Finding Association Rules",
  abstract =     "Association rules are statements of the form for 90 %
                 of the rows of the relation, if the row has value 1 in
                 the columns in set W , then it has 1 also in column B .
                 Agrawal, Imielinski, and Swami introduced the problem
                 of mining association rules from large collections of
                 data, and gave a method based on successive passes over
                 the database. We give an improved algorithm for the
                 problem. The method is based on careful combinatorial
                 analysis of the information obtained in previous
                 passes; this makes it possible to eliminate unnecessary
                 candidate rules. Experiments on a university course
                 enrollment database indicate that the method
                 outperforms the previous one by a factor of 5. We also
                 give simple information-theoretic lower bounds for the
                 problem of finding association rules, and show that
                 sampling is in general a very efficient way of finding
                 such rules. Computing Reviews Categories and Subject
                 Descriptors: H.3.3[Information Systems]: Information
                 Storage and Retrieval - Information Search and
                 Retrieval I.2.6 [Computing Methodologies]: Artificial
                 Intelligence - Learning I.2.8 [Computing
                 Methodologies]: Artificial Intelligence - Problem
                 Solving, Control Methods, and Search General Terms:
                 Databases, machine learning, artificial intelligence.
                 Additional Key Words and Phrases: Database mining,
                 knowledge discovery in databases, association rules,
                 covering sets.",
}

Learning Decision Trees for Mapping the Local Environment in Mobile Robot Navigation,
Available as
compressed postscript.
@TechReport{no_author:learning-decision:,
  URL =          "ftp://ftp.cs.helsinki.fi/pub/Reports/by_Project/PMDM/Learning_Decision_Trees_for_Mapping_the_Local_Environment_in_Mobile_Robot_Navigation.ps.gz",
  title =        "Learning Decision Trees for Mapping the Local
                 Environment in Mobile Robot Navigation",
  abstract =     "This paper describes the use of the C4.5 decision tree
                 learning algorithm in the design of a classifier for a
                 new approach to the mapping of a mobile robot's local
                 environment. The decision tree uses the features from
                 the echoes of an ultrasonic array mounted on the robot
                 to classify the contours of its local environment. The
                 contours are classified into a finite number of two
                 dimensional shapes to form a primitive map which is to
                 be used for navigation. The nature of the problem,
                 noise and the practical timing constraints,
                 distinguishes it from those typically used in machine
                 learning applications and highlights some of the
                 advantages of decision tree learning in robotic
                 applications.",
}

Overheads for the AI'94 Tutorial on Intelligent Learning Database Systems,
Available as
postscript.
@Misc{no_author:overheads-ai94:,
  URL =          "ftp://coral.cs.jcu.edu.au/pub/research/HCV/KDD.ps",
  title =        "Overheads for the {AI}'94 Tutorial on Intelligent
                 Learning Database Systems",
  abstract =     "This full-day tutorial presents and discusses
                 techniques for the following 3 interconnected phases in
                 constructing intelligent learning database systems: (1)
                 Translation of standard database information into a
                 form suitable for use by a rule-based system; (2) Using
                 machine learning techniques to produce rule bases from
                 databases; and (3) Interpreting the rules produced to
                 solve users' problems and/or reduce data spaces. It
                 suits a wide audience including postgraduate students
                 and industrial people from databases, expert systems,
                 and machine learning.",
  annote =       "Comments and suggestions for improvements are
                 solicited! Comments to Xindong Wu
                 (xindong(at)INSECT.SD.MONASH.EDU.AU),",
}

State Of The Art,
Available as
sec8.htm.
@Article{no_author:state-art:95,
  title =        "State Of The Art",
  journal =      "Byte",
  year =         "1995",
  month =        oct,
  annote =       "A number of articles, good introduction to data
                 mining",
  URL =          "http://www.byte.com/art/9510/sec8/sec8.htm",
}

Discovery of Actionable Patterns in Databases: The Action Hierarchy Approach, Gediminas Adomavicius and Alexander Tuzhilin
@InProceedings{adomavicius.ea:actionable-patterns:97,
  title =        "Discovery of Actionable Patterns in Databases: The
                 Action Hierarchy Approach",
  author =       "Gediminas Adomavicius and Alexander Tuzhilin",
  pages =        "111",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Mining Association Rules between Sets of Items in Large Databases, Rakesh Agrawal and Tomasz Imielinski and Arun N. Swami
Available as
postscript.
@InProceedings{agrawal.ea:association-rules:93a,
  key_modifier = "a",
  title =        "Mining Association Rules between Sets of Items in
                 Large Databases",
  author =       "Rakesh Agrawal and Tomasz Imielinski and Arun N.
                 Swami",
  editor =       "Peter Buneman and Sushil Jajodia",
  booktitle =    "Proceedings of the 1993 {ACM} {SIGMOD} International
                 Conference on Management of Data",
  address =      "Washington, D.C.",
  month =        "26--28~" # may,
  year =         "1993",
  pages =        "207--216",
  URL =          "http://www.almaden.ibm.com/cs/people/ragrawal/papers/sigmod93.ps",
  abstract =     "We are given a large database of customer
                 transactions. Each transaction consists of items
                 purchased by a customer in a visit. We present an
                 efficient algorithm that generates all significant
                 association rules between items in the database. The
                 algorithm incorporates buffer management and novel
                 estimation and pruning techniques. We also present
                 results of applying this algorithm to sales data
                 obtained from a large retailing company, which shows
                 the effectiveness of the algorithm.",
}

Mining association rules between sets of items in large databases, Rakesh Agrawal and Tomasz Imielinski and Arun Swami
@Article{agrawal.ea:association-rules:93b,
  key_modifier = "b",
  author =       "Rakesh Agrawal and Tomasz Imielinski and Arun Swami",
  title =        "Mining association rules between sets of items in
                 large databases",
  journal =      "SIGMOD Record (ACM Special Interest Group on
                 Management of Data)",
  volume =       "22",
  number =       "2",
  pages =        "207--216",
  month =        jun,
  year =         "1993",
  ISBN =         "0-89791-592-5",
  ISSN =         "0163-5808",
  abstract =     "We are given a large database of customer
                 transactions. Each transaction consists of items
                 purchased by a customer in a visit. We present an
                 efficient algorithm that generates all significant
                 association rules between items in the database. The
                 algorithm incorporates buffer management and novel
                 estimation and pruning techniques. We also present
                 results of applying this algorithm to sales data
                 obtained from a large retailing company, which shows
                 the effectiveness of the algorithm.",
  affiliation =  "IBM Almaden Research Cent",
  affiliationaddress = "San Jose, CA, USA",
  classification = "723.3; 921.6; 911.4; 723.2; 722.1; 922.1; C6160Z
                 (Other DBMS); C6130 (Data handling techniques); C6170
                 (Expert systems); C6120 (File organisation); C7170
                 (Marketing)",
  conference =   "Proceedings of the 1993 ACM SIGMOD International
                 Conference on Management of Data",
  conferenceyear = "1993",
  keywords =     "Database systems; Algorithms; Marketing; Data
                 handling; Data storage equipment; Probability;
                 Estimation; Query languages; Large scale systems;
                 Associative processing; Administrative data processing;
                 Large databases; Mining association rules; Pruning
                 technique; Basket data, Large database; Customer
                 transactions; Efficient algorithm; Association rules;
                 Buffer management; Novel estimation; Pruning
                 techniques; Sales data; Large retailing company",
  meetingaddress = "Washington, DC, USA",
  meetingdate =  "May 26--28 1993",
  meetingdate2 = "05/26--28/93",
  publisherinfo = "Fort Collins Computer Center",
  sponsor =      "ACM, SIGMOD; Minerals, Metals \& Materials Society",
  thesaurus =    "Knowledge based systems; Marketing data processing;
                 Storage management; Transaction processing; Very large
                 databases",
  xxcrossref =   "Anonymous:1993:SAS",
}

Database mining - a performance perspective, R. Agrawal and T. Imielinski and A. Swami
@Article{agrawal.ea:database-performance:93,
  author =       "R. Agrawal and T. Imielinski and A. Swami",
  address =      "Ibm Corp, Almaden Res Ctr, 650 Harry Rd, San Jose, Ca,
                 95120",
  title =        "Database mining - a performance perspective",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1993",
  volume =       "5",
  issue =        "6",
  pages =        "914--925",
  abstract =     "We present our perspective of database mining as the
                 confluence of machine learning techniques and the
                 performance emphasis of database technology. We
                 describe three classes of database mining problems
                 involving classification, associations, and sequences,
                 and argue that these problems can be uniformly viewed
                 as requiring discovery of rules embedded in massive
                 data. We describe a model and some basic operations for
                 the process of rule discovery. We show how the database
                 mining problems we consider map to this model and how
                 they can be solved by using the basic operations we
                 propose. We give an example of an algorithm for
                 classification obtained by combining the basic rule
                 discovery operations. This algorithm not only is
                 efficient in discovering classification rules but also
                 has accuracy comparable to ID3, one of the current best
                 classifiers.",
  annote =       "Identification and unification of 3 classes of data
                 mining problem, Classification, Association and
                 Sequences. They then go on to propose a unifying
                 framework for these three problems, and five basic
                 operators for rule discovery. These are then used to
                 construct an algorithm CDP (Classifier with Dynamic
                 Pruning) which out performs ID3 in classifier accuracy
                 and efficiency on a test problem.",
  keywords =     "ASSOCIATIONS, CLASSIFICATION, DATABASE MINING,
                 DECISION TREES, KNOWLEDGE DISCOVERY, SEQUENCES",
}

Developing Tightly-Coupled Data Mining Applications on a Relational Database System, Rakesh Agrawal and Kyuseok Shim
@InProceedings{agrawal.ea:developing-tightly-coupled:96,
  title =        "Developing Tightly-Coupled Data Mining Applications on
                 a Relational Database System",
  pages =        "287",
  author =       "Rakesh Agrawal and Kyuseok Shim",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Fast Algorithms for Mining Association Rules in Large Databases, R. Agrawal and R. Srikant
@InProceedings{agrawal.ea:fast-algorithms:94,
  author =       "R. Agrawal and R. Srikant",
  title =        "Fast Algorithms for Mining Association Rules in Large
                 Databases",
  editor =       "Jorgeesh Bocca and Matthias Jarke and Carlo Zaniolo",
  booktitle =    "20th International Conference on Very Large Data
                 Bases, September 12--15, 1994, Santiago, Chile
                 proceedings",
  publisher =    "Morgan Kaufmann Publishers",
  address =      "Los Altos, CA 94022, USA",
  pages =        "487--499",
  year =         "1994",
  annote =       "Also known as VLDB'94",
  keywords =     "very large data bases; VLDB",
}

Parallel mining of association rules, R. Agrawal and J. C. Shafer
@Article{agrawal.ea:parallel-association:96,
  author =       "R. Agrawal and J. C. Shafer",
  address =      "Ibm Corp, Almaden Res Ctr, 650 Harry Rd, San Jose, Ca,
                 95120",
  title =        "Parallel mining of association rules",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1996",
  volume =       "8",
  issue =        "6",
  pages =        "962--969",
  abstract =     "We consider the problem of mining association rules on
                 a shared- nothing multiprocessor. We present three
                 algorithms that explore a spectrum of trade-offs
                 between computation, communication, memory usage,
                 synchronization, and the use of problem-specific
                 information. The best algorithm exhibits near perfect
                 scaleup behavior, yet requires only minimal overhead
                 compared to the current best serial algorithm.",
  keywords =     "data mining, association rules, parallel algorithms",
}

Quest: A Project on Database Mining, R. Agrawal and M. Carey and C. Faloutson and S. Ghosh and A. Houtsma and T. Imielinski and B. Iyer and A. Mahboob and H. Miranda and R. Srikant and A. Swami
@Article{agrawal.ea:quest-project:94a,
  key_modifier = "a",
  author =       "R. Agrawal and M. Carey and C. Faloutson and S. Ghosh
                 and A. Houtsma and T. Imielinski and B. Iyer and A.
                 Mahboob and H. Miranda and R. Srikant and A. Swami",
  title =        "{Quest}: {A} Project on Database Mining",
  journal =      "SIGMOD Record (ACM Special Interest Group on
                 Management of Data)",
  volume =       "23",
  number =       "2",
  pages =        "514--514",
  month =        jun,
  year =         "1994",
  ISSN =         "0163-5808",
  affiliation =  "IBM Almaden Res. Center, San Jose, CA, USA",
  classification = "C6160 (Database management systems (DBMS))",
  keywords =     "Quest project; Database mining; Tertiary storage; Data
                 model construction; Data model verification",
  thesaurus =    "Very large databases",
  xxcrossref =   "Anonymous:1994:ASI",
}

Quest: A Project on Database Mining, Rakesh Agrawal and Michael J. Carey and Christos Faloutsos and Sakti P. Ghosh and Maurice A. W. Houtsma and Tomasz Imielinski and Balakrishna R. Iyer and A. Mahboob and H. Miranda and Ramakrishnan Srikant and Arun N. Swami
@InProceedings{agrawal.ea:quest-project:94b,
  key_modifier = "b",
  title =        "Quest: {A} Project on Database Mining",
  author =       "Rakesh Agrawal and Michael J. Carey and Christos
                 Faloutsos and Sakti P. Ghosh and Maurice A. W. Houtsma
                 and Tomasz Imielinski and Balakrishna R. Iyer and A.
                 Mahboob and H. Miranda and Ramakrishnan Srikant and
                 Arun N. Swami",
  editor =       "Richard T. Snodgrass and Marianne Winslett",
  booktitle =    "Proceedings of the 1994 {ACM} {SIGMOD} International
                 Conference on Management of Data",
  address =      "Minneapolis, Minnesota",
  month =        "24--27~" # may,
  year =         "1994",
  pages =        "514",
}

The Quest Data Mining System, Rakesh Agrawal and Manish Mehta and John Shafer and Ramakrishnan Srikant and Andreas Arning and Toni Bollinger
@InProceedings{agrawal.ea:quest-system:96,
  title =        "The Quest Data Mining System",
  pages =        "244",
  author =       "Rakesh Agrawal and Manish Mehta and John Shafer and
                 Ramakrishnan Srikant and Andreas Arning and Toni
                 Bollinger",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Mining Sequential Patterns, R. Agrawal and R. Srikant
@InProceedings{agrawal.ea:sequential-patterns:95,
  author =       "R. Agrawal and R. Srikant",
  title =        "Mining Sequential Patterns",
  booktitle =    "International Conference on Database Engineering",
  organization = "ieee",
  year =         "1995",
  pages =        "3--14",
  abstract =     "We are given a large database of customer
                 transactions, where each transaction consists of
                 customer-id, transaction time, and the items bought in
                 the transaction. We introduce the problem of mining
                 sequential patterns over such databases. We present
                 three algorithms to solve this problem, and empirically
                 evaluate their performance using synthetic data. Two of
                 the proposed algorithms, AprioriSome and AprioriAll,
                 have comparable performance, albeit AprioriSome
                 performs a little better when the minimum number of
                 customers that must support a sequential pattern is
                 low. Scale-up experiments show that both AprioriSome
                 and AprioriAII scale linearly with the number of
                 customer transactions. They also have excellent
                 scale-up properties with respect to the number of
                 transactions per customer and the number of items in a
                 transaction.",
}

Data Mining, Rakesh Agrawal
@InProceedings{agrawal:data-mining:94,
  author =       "Rakesh Agrawal",
  title =        "Data Mining",
  pages =        "75--76",
  booktitle =    "Proceedings of the 13th Symposium on Principles of
                 Database Systems",
  month =        may,
  publisher =    "ACM Press",
  address =      "New York, NY, USA",
  year =         "1994",
}

Tutorial: Data Mining, R. Agrawal
@InProceedings{agrawal:tutorial:94,
  author =       "R. Agrawal",
  title =        "Tutorial: Data Mining",
  editor =       "{ACM}",
  booktitle =    "13th Symposium --- 1994 May: Minneapolis; {MN}",
  volume =       "13",
  publisher =    "ACM Press",
  address =      "New York, NY 10036, USA",
  series =       "PROCEEDINGS OF THE ACM SIGACT SIGMOD SIGART SYMPOSIUM
                 ON PRINCIPLES OF DATABASE SYSTEMS 1994",
  pages =        "75--76",
  year =         "1994",
  keywords =     "database systems; ACM; SIGACT; SIGMOD; SIGART;
                 computability; theory",
}

Machine Learning tutorial (Slides and Anotated Bibliography), David Aha
Available as
hypertext.
@Misc{aha:machine-learning:,
  URL =          "http://www.aic.nrl.navy.mil/~aha/slides.html",
  title =        "Machine Learning tutorial (Slides and Anotated
                 Bibliography)",
  author =       "David Aha",
  annote =       "David Aha presented the Machine Learning tutorial at
                 AI \& Stats 1995. He's kindly put his slides online",
}

Temporal aspects in data mining, Salem Al-naemi
@TechReport{al-naemi:temporal-aspects:92,
  author =       "Salem Al-naemi",
  title =        "Temporal aspects in data mining",
  institution =  "Computer Science Department, University of
                 Birmingham",
  year =         "1992/3",
  annote =       "Sections on RdB's, other temporal models and time
                 series",
}

Mine for Gold with Parallel Systems, Michael Alexander
@Article{alexander:mine-gold:94,
  author =       "Michael Alexander",
  title =        "Mine for Gold with Parallel Systems",
  journal =      "Datamation",
  volume =       "40",
  number =       "22",
  pages =        "65--??",
  day =          "15",
  month =        nov,
  year =         "1994",
  ISSN =         "0011-6963",
  abstract =     "Parallel computing technology has become more
                 accessible to IS shops with the release of parallelized
                 versions of popular RDBMSs. With such off-the-shelf
                 tools, your company can gain competitive advantage
                 through techniques like data mining that allow you to
                 more finely analyze and project demand for your
                 products. But if you're going to need the power of
                 massively parallel systems, off-the-shelf solutions are
                 still a few years away.",
}

Partial Classification Using Association Rules, Kamal Ali and Stefanos Manganaris and Ramakrishnan Srikant
@InProceedings{ali.ea:partial-classification:97,
  title =        "Partial Classification Using Association Rules",
  author =       "Kamal Ali and Stefanos Manganaris and Ramakrishnan
                 Srikant",
  pages =        "115",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Charter, Robert B. Allen
@Article{allen:charter:95,
  author =       "Robert B. Allen",
  title =        "Charter",
  journal =      "ACM Transactions on Information Systems",
  volume =       "13",
  number =       "3",
  pages =        "235",
  year =         "1995",
  copyright =    "(c) Copyright 1995 Association for Computing
                 Machinery",
  abstract =     "The ACM Transactions on Information Systems (TOIS)
                 considers the design, performance, and evaluation of
                 computer systems that facilitate the presentation of
                 information in a variety of media, as well as
                 underlying technologies that support these systems. The
                 major themes of TOIS and those topics which distinguish
                 it from other ACM Transactions include: - Information
                 Retrieval and Information Filtering: Algorithms and
                 inference mechanisms for search, retrieval, and
                 presentation of information and models of user
                 information preferences. - Information Interfaces:
                 Hypertext and hypermedia interfaces, information
                 visualization, multimedia presentation, and task and
                 user models for information systems. - Natural Language
                 Processing: Computational linguistics and models of
                 natural language (including content, syntax, semantics,
                 and dialogue) relevant to information systems. -
                 Knowledge and Information Representation:
                 Representation issues for supporting information
                 systems including semantic and object-oriented
                 databases, knowledge bases, and hypertext/hypermedia
                 document models. - Multimedia Information Systems:
                 Semantics, search, and presentation of media including
                 audio, image, video, and virtual reality. - Networked
                 Information Systems: Interfaces and indexing, resource
                 discovery, and visualization. - Organizational
                 Interfaces and Social Impact of Information Systems:
                 Electronic mail; decision and negotiation support
                 systems; the effects of information system use on
                 groups, organizations, and communities; social
                 constraints imposed on information systems such as
                 legal and privacy concerns. - Design and Evaluation of
                 Information Systems: Design principles for information
                 systems, methodologies for evaluating information
                 systems, and programming languages relevant to
                 information systems. - Information System Applications:
                 Electronic books, documents, journals, movies, and
                 libraries; authoring systems; office information
                 systems; geographic information systems; and
                 intelligent tutoring systems.",
}

Knowledge discovery in biomedical databases - a machine induction approach, H. Alnahi and S. Alshawi
@Article{alnahi.ea:biomedical-machine:93,
  author =       "H. Alnahi and S. Alshawi",
  address =      "Brunel Univ, Dept Comp Sci, Uxbridge Ub8 3Ph, Middx,
                 England",
  title =        "Knowledge discovery in biomedical databases - a
                 machine induction approach",
  journal =      "Computer Methods And Programs In Biomedicine",
  year =         "1993",
  volume =       "39",
  issue =        "3-4",
  pages =        "343--349",
  abstract =     "The increase in the number and size of available
                 databases by far exceeds the growth of the
                 corresponding knowledge. Furthermore, many databases
                 contain information which is not possessed by an
                 existing human expert. This creates both a need and an
                 opportunity for extracting knowledge from databases. An
                 unsolved problem in molecular biology is the problem of
                 predicting a protein's secondary structure from its
                 primary structure. Inductive machine learning is a
                 search for a plausible general description which can
                 explain the given input data, and is useful for
                 predicting new data. In this paper we present a
                 statistical inductive algorithm which can be used to
                 produce new rules for predicting multiple protein
                 secondary structures from protein primary structure
                 databases.",
  keywords =     "SECONDARY STRUCTURE, PREDICTION, SEQUENCE, MACHINE
                 LEARNING, INDUCTION, DATABASES, KNOWLEDGE, RULES,
                 PROTEIN PRIMARY SECONDARY STRUCTURES, AMINO ACID
                 RESIDUES",
}

Discovering rules for water demand prediction: an enhanced rough-set approach (reprinted from proceedings of the international joint conference on artificial intelligence), A. J. An and N. Shan and C. Chan and N. Cercone and W. Ziarko
@Article{an.ea:discovering-rules:96,
  author =       "A. J. An and N. Shan and C. Chan and N. Cercone and W.
                 Ziarko",
  address =      "Univ Regina, Dept Comp Sci, Regina, Sk S4S 0A2,
                 Canada",
  title =        "Discovering rules for water demand prediction: an
                 enhanced rough-set approach (reprinted from proceedings
                 of the international joint conference on artificial
                 intelligence)",
  journal =      "Engineering Applications Of Artificial Intelligence",
  year =         "1996",
  volume =       "9",
  issue =        "6",
  pages =        "645--653",
  abstract =     "Prediction of consumer demands is a pre-requisite for
                 optimal control of water distribution systems because
                 minimum-cost pumping schedules can be computed if water
                 demands are accurately estimated This paper presents an
                 enhanced rough-sets method for generating prediction
                 rules from a set of observed data. The proposed method
                 extends upon the standard rough set model by making use
                 of the statistical information inherent in the data to
                 handle incomplete and ambiguous training samples. It
                 also discusses some experimental results from using
                 this method for discovering knowledge on water demand
                 prediction. Copyright (C) 1996 IJCAI Inc.",
  keywords =     "water demand prediction, knowledge discovery, rough
                 sets",
}

Edm - a general framework for data mining based on evidence theory, S. S. Anand and D. A. Bell and J. G. Hughes
@Article{anand.ea:edm-general:96,
  author =       "S. S. Anand and D. A. Bell and J. G. Hughes",
  address =      "Univ Ulster, Fac Informat, Sch Informat \& Software
                 Engn, Jordanstown, North Ireland",
  title =        "Edm - a general framework for data mining based on
                 evidence theory",
  journal =      "Data \& Knowledge Engineering",
  year =         "1996",
  volume =       "18",
  issue =        "3",
  pages =        "189--223",
  abstract =     "Data Mining or Knowledge Discovery in Databases
                 [1,15,23] is currently one of the most exciting and
                 challenging areas where database techniques are coupled
                 with techniques from Artificial Intelligence and
                 mathematical sub-disciplines to great potential
                 advantage. It has been defined as the non- trivial
                 extraction of implicit, previously unknown and
                 potentially useful information from data. A lot of
                 research effort is being directed towards building
                 tools for discovering interesting patterns which are
                 hidden below the surface in databases. However, most of
                 the work bring done in this field has been
                 problem-specific and no general framework has yet been
                 proposed for Data Mining. In this paper we seek to
                 remedy this by proposing, EDM - Evidence-based Data
                 Mining - a general framework for Data Mining based on
                 Evidence Theory. Having a general framework for Data
                 Mining offers a number of advantages. It provides a
                 common method for representing knowledge which allows
                 prior knowledge from the user or knowledge discovered
                 by another discovery process to be incorporated into
                 the discovery process. A common knowledge
                 representation also supports the discovery of meta-
                 knowledge from knowledge discovered by different Data
                 Mining techniques. Furthermore, a general framework can
                 provide facilities that are common to most discovery
                 processes, e.g. incorporating domain knowledge and
                 dealing with missing values. The framework presented in
                 this paper has the following additional advantages. The
                 framework is inherently parallel. Thus, algorithms
                 developed within this framework will also be parallel
                 and will therefore be expected to be efficient for
                 large data sets - a necessity as most commercial data
                 sets, relational or otherwise, are very large. This is
                 compounded by the fact that the algorithms are complex.
                 Also, the parallelism within the framework allows its
                 use in parallel, distributed and heterogeneous
                 databases. The framework is easily updated and new
                 discovery methods can be readily incorporated within
                 the framework, making it 'general' in the functional
                 sense in addition to the representational sense
                 considered above. The framework provides an intuitive
                 way of dealing with missing data during the discovery
                 process using the concept of Ignorance borrowed from
                 Evidence Theory. The framework consists of a method for
                 representing data and knowledge, and methods for data
                 manipulation or knowledge discovery(1). We suggest an
                 extension of the conventional definition of mass
                 functions in Evidence Theory for use in Data Mining, as
                 a means to represent evidence of the existence of rules
                 in the database. The discovery process within EDM
                 consists of a series of operations on the mass
                 functions. Each operation is carried out by an EDM
                 operator. We provide a classification for the EDM
                 operators based on the discovery functions performed by
                 them and discuss aspects of the induction, domain and
                 combination operator classes. The application of EDM to
                 two separate Data Mining tasks is also addressed,
                 highlighting the advantages of using a general
                 framework for Data Mining in general and, in
                 particular, using one that is based on Evidence
                 Theory.",
  keywords =     "DATA MINING, KNOWLEDGE DISCOVERY IN DATABASES,
                 UNCERTAINTY HANDLING, EVIDENCE THEORY, PARALLEL
                 DISCOVERY",
}

A High-Performance Data Mining Server, S. S. Anand and D. A. Bell and J. G. Hughes and C. M. Shapcott
@Article{anand.ea:high-performance-server:96,
  author =       "S. S. Anand and D. A. Bell and J. G. Hughes and C. M.
                 Shapcott",
  title =        "A High-Performance Data Mining Server",
  journal =      "Lecture Notes in Computer Science",
  volume =       "1067",
  pages =        "907--??",
  year =         "1996",
  ISSN =         "0302-9743",
}

Data mining in parallel, S. S. Anand and C. Shapcott and D. Bell and J. Hughes
@InProceedings{anand.ea:parallel:95,
  author =       "S. S. Anand and C. Shapcott and D. Bell and J.
                 Hughes",
  title =        "Data mining in parallel",
  volume =       "44",
  series =       "Transputer and Occam Engineering",
  pages =        "113--124",
  booktitle =    "Proceedings of WoTUG-18: Transputer and occam
                 Developments",
  year =         "1995",
  publisher =    "IOS Press",
  address =      "Amsterdam",
  month =        apr,
  ISBN =         "ISBN 90-5199-222-x",
}

Getting to grips with arrears: `data mining' systems at the Leeds, anonymous
@Article{anonymous:getting-to:94,
  author =       "anonymous",
  title =        "Getting to grips with arrears: `data mining' systems
                 at the {L}eeds",
  journal =      "Expert Systems",
  year =         "1994",
  volume =       "11",
  number =       "2",
  pages =        "122--124",
  month =        may,
  keywords =     "Applications, Data mining, kdd, Attar Software, Xpert
                 Rule Analyser",
}

Data Mining: Intelligent Technology Gets down to Business, anonymous
@Article{anonymous:intelligent-technology:93,
  author =       "anonymous",
  title =        "Data Mining: Intelligent Technology Gets down to
                 Business",
  journal =      "PC AI",
  year =         "1993",
  month =        nov # " - " # dec,
}

Lessons in Data Mining, Anonymous
@Article{anonymous:lessons:97,
  author =       "Anonymous",
  title =        "Lessons in Data Mining",
  journal =      "Byte Magazine",
  volume =       "22",
  number =       "2",
  pages =        "40--??",
  month =        feb,
  year =         "1997",
  ISSN =         "0360-5280",
}

SIGMOD '93. 1993 ACM SIGMOD. International Conference on Management of Data, Anonymous (Ed)
@Proceedings{anonymous:sigmod-93:93,
  editor =       "Anonymous",
  booktitle =    "SIGMOD '93. 1993 ACM SIGMOD. International Conference
                 on Management of Data",
  title =        "{SIGMOD} '93. 1993 {ACM} {SIGMOD}. International
                 Conference on Management of Data",
  volume =       "22(2)",
  month =        jun,
  publisher =    "ACM Press",
  address =      "New York, NY 10036, USA",
  year =         "1993",
  ISSN =         "0163-5808",
  series =       "SIGMOD Record (ACM Special Interest Group on
                 Management of Data)",
  classification = "C6160 (Database management systems (DBMS)); C4250
                 (Database theory); C7250 (Information storage and
                 retrieval); C6170 (Expert systems); C6120 (File
                 organisation); C6140D (High level languages); C6130
                 (Data handling techniques); C6150G (Diagnostic,
                 testing, debugging and evaluating systems)",
  confdate =     "26--28 May 1993",
  conflocation = "Washington, DC, USA",
  confsponsor =  "ACM",
  keywords =     "Benchmark programs; Database rules; Integrity; Join
                 processing; Object-oriented databases; Memory-based
                 implementations; DBMS implementation issues; Recovery;
                 Knowledge discovery; Temporal reasoning; Data
                 compression; Query optimisation; Secondary storage
                 techniques; Search structures; Query languages;
                 Interfaces; Intelligent/deductive DBMSs;
                 Relational/parallel DBMS processing; Transaction
                 management; Object/scientific DBMSs; Interoperability",
  thesaurus =    "Data compression; Database management systems;
                 Database theory; Inference mechanisms; Knowledge based
                 systems; Program testing; Query languages; Query
                 processing; Storage management; System recovery;
                 Transaction processing",
}

Supercomputers Knock At IS Doors, Anonymous
@Article{anonymous:supercomputers-knock-at-is-doors:92,
  author =       "Anonymous",
  title =        "{Supercomputers Knock At {IS} Doors}",
  journal =      "Datamation",
  volume =       "38",
  number =       "24",
  pages =        "79--??",
  day =          "01",
  month =        dec,
  year =         "1992",
  ISSN =         "0011-6963",
  abstract =     "Cost-effective massively parallel designs gain
                 converts for data mining and OLTP applications among
                 leading edge users and traditional systems suppliers.",
}

Computational learning theory: an introduction, Martin Anthony and Norman Biggs
@Book{anthony.ea:computational-learning:92,
  author =       "Martin Anthony and Norman Biggs",
  title =        "Computational learning theory: an introduction",
  year =         "1992",
  publisher =    "Cambridge University Press",
  series =       "Cambridge Tracts in Theoretical Computer Science",
  volume =       "30",
}

Knowledge Mining by Imprecise Querying: A Classification-based System, T. M. Anwar and H. W. Beck and S. B. Navathe
@InProceedings{anwar.ea:by-imprecise:92,
  author =       "T. M. Anwar and H. W. Beck and S. B. Navathe",
  title =        "Knowledge Mining by Imprecise Querying: {A}
                 Classification-based System",
  booktitle =    "Proceedings of the International Conference on Data
                 Engineering",
  address =      "Tempe, AZ",
  month =        feb,
  year =         "1992",
  pages =        "622--630",
  abstract =     "Knowledge mining is the process of discovering new
                 knowledge that is hitherto unknown. Users with a lack
                 of knowledge of database schemas engage in the process
                 of knowledge mining by posing imprecise queries. An
                 approach to knowledge mining by imprecise querying is
                 presented that utilizes conceptual clustering
                 techniques. In contrast to numeric or fuzzy set
                 approaches which ultimately rely on some distance
                 metric and threshold to processing such queries,
                 conceptual clustering retrieves instances which are
                 structurally, semantically, and pragmatically similar
                 to the query even though they may not match the
                 requirements exactly. The query processor has both a
                 deductive and inductive component. The deductive
                 component finds precise matches in the traditional
                 sense, and the inductive component identifies ways in
                 which imprecise matches may be considered similar.
                 Ranking on similarity is done using the database
                 taxonomy, by which similar instances become members of
                 the same class. Relative similarity is determined by
                 depth in the taxonomy. The conceptual clustering
                 algorithm, its use in query processing and an example
                 are presented.",
}

Sales surge as mainframes find a role in client\slash server, E. L. Appleton
@Article{appleton:sales-surge:95,
  author =       "E. L. Appleton",
  title =        "Sales surge as mainframes find a role in client\slash
                 server",
  journal =      "Datamation",
  volume =       "41",
  number =       "10",
  pages =        "48",
  month =        jun,
  year =         "1995",
  ISSN =         "0011-6963",
  classification = "D5010 (Computers and work stations); D5020 (Computer
                 networks and intercomputer communications)",
  keywords =     "Mainframes; Client/server; Demand; Economy;
                 Large-system market; Vendors; IBM Parallel Sysplex;
                 UNIX server; NT server; Pyramid; HP T-500; Data mining;
                 Parallelism; IBM Power Parallel; Amdahl ECL mainframe",
  language =     "English",
  pubcountry =   "USA",
  thesaurus =    "Client-server systems; DP industry; Mainframes",
}

Predicting defects in Disk Drive Manufacturing: a case study in High-Dimensional Classification, Chidanand Apt\'e and Sholom Weiss and Gordon Grout
@InProceedings{apte.ea:predicting-defects:93,
  author =       "Chidanand Apt\'e and Sholom Weiss and Gordon Grout",
  title =        "Predicting defects in Disk Drive Manufacturing: a case
                 study in High-Dimensional Classification",
  booktitle =    "Proceedings of the 9th Conference on Artificial
                 Intelligence for Applications",
  pages =        "212--218",
  address =      "Orlando, Florida",
  year =         "1993",
}

A Linear Method for Deviation Detection in Large Databases, Andreas Arning and Rakesh Agrawal and Prabhakar Raghavan
@InProceedings{arning.ea:linear-method:96,
  title =        "A Linear Method for Deviation Detection in Large
                 Databases",
  pages =        "164",
  author =       "Andreas Arning and Rakesh Agrawal and Prabhakar
                 Raghavan",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Exploiting Background Knowledge in Automated Discovery, John M. Aronis and Foster J. Provost and Bruce G. Buchanan
@InProceedings{aronis.ea:exploiting-background:96,
  title =        "Exploiting Background Knowledge in Automated
                 Discovery",
  pages =        "355",
  author =       "John M. Aronis and Foster J. Provost and Bruce G.
                 Buchanan",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Increasing the Efficiency of Data Mining Algorithms with Breadth-First Marker Propagation, John M. Aronis and Foster J. Provost
@InProceedings{aronis.ea:increasing-efficiency:97,
  title =        "Increasing the Efficiency of Data Mining Algorithms
                 with Breadth-First Marker Propagation",
  author =       "John M. Aronis and Foster J. Provost",
  pages =        "119",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Data mining for lead identification and explosion, S. Ash and S. Gothe
@Article{ash.ea:lead-identification:97,
  author =       "S. Ash and S. Gothe",
  address =      "Tripos Inc, St Louis, Mo, 63144",
  title =        "Data mining for lead identification and explosion",
  journal =      "Abstracts Of Papers Of The American Chemical Soc.",
  year =         "1997",
  volume =       "213",
  issue =        "Pt1",
  pages =        "57--CINF",
}

Managing Complexity in Large Data Bases Using Self-Organizing Maps, Barbro Back and Mikko Irjala and Kaisa Sere and Hannu Vanharanta
Available as
hypertext.
@TechReport{back.ea:managing-complexity:96,
  author =       "Barbro Back and Mikko Irjala and Kaisa Sere and Hannu
                 Vanharanta",
  title =        "Managing Complexity in Large Data Bases Using
                 Self-Organizing Maps",
  institution =  "TUCS - Turku Centre for Computer Science",
  number =       "TUCS-TR-48",
  month =        oct # " 23",
  year =         "1996",
  keywords =     "neural networks, self-organizing maps, data bases,
                 benchmarking",
  URL =          "http://www.tucs.abo.fi/publications/techreports/TR48.html",
  abstract =     "The amount of financial information in today's
                 sophisticated large data bases is huge and makes
                 comparisons between company performance - especially
                 over time - difficult or at least very time consuming.
                 The aim of this paper is to invest igate whether neural
                 networks in the form of self-organizing maps can be
                 used to manage the complexity in large data bases. We
                 structure and analyze accoun ting numbers in a large
                 data base over several time periods. By using self
                 organizing maps, we overcome the problems associated
                 with finding the appropriate und erlying distribution
                 and the functional form of the underlying data in the
                 structuring task that is often encountered, for
                 example, when using cluster analysis. The method chosen
                 also offers a way of visualizing the results. The data
                 base in this study consists of annual reports of more
                 than 120 world wide forest companies with data from a
                 five year time period. This paper is an extended
                 version of our paper Data Mining Accambis Numbers Using
                 Self Organising Maps presented at Finnish Artificial
                 Intelligenc e Conference in Vasa 20-23 August 1996.",
}

ReDuce: Automatic Structuring and Compression in Relational Databases, B. Bain and C. Sammut and A. Sharma and J. Shepherd
@InProceedings{bain.ea:reduce-automatic:96,
  author =       "B. Bain and C. Sammut and A. Sharma and J. Shepherd",
  title =        "{R}e{D}uce: {A}utomatic Structuring and Compression in
                 Relational Databases",
  booktitle =    "Proceedings of the MLnet Familiarization Workshop on
                 Data Mining with Inductive Logic Programing",
  pages =        "41--52",
  year =         "1996",
}

Knowledge from data using fuzzy methods, J. F. Baldwin
@Article{baldwin:using-fuzzy:96,
  author =       "J. F. Baldwin",
  address =      "Univ Bristol, Dept Engn Math, Bristol, Avon, England",
  title =        "Knowledge from data using fuzzy methods",
  journal =      "Pattern Recognition Letters",
  year =         "1996",
  volume =       "17",
  issue =        "6",
  pages =        "593--600",
  abstract =     "The basic concept of a data browser is explained and
                 some methods are described which are suitable for
                 extracting knowledge from data as an induction process.
                 The data browser gives data mining capabilities but
                 also provides a stage for computers and users to act
                 out their parts in this knowledge discovery process.",
}

From molecules to models to data mining, N. Basta
@Article{basta:molecules-to:96,
  author =       "N. Basta",
  address =      "Us Dept Def, Off Infosec Comp Sci, Ft George G Meade,
                 Md, 20755",
  title =        "From molecules to models to data mining",
  journal =      "Chemical Engineering",
  year =         "1996",
  volume =       "103",
  issue =        "2",
  pages =        "5--5",
}

Brute-Force Mining of High-Confidence Classification Rules, Jr. Roberto J. Bayardo
@InProceedings{bayardo:brute-force-high-confidence:97,
  title =        "Brute-Force Mining of High-Confidence Classification
                 Rules",
  author =       "Jr. Roberto J. Bayardo",
  pages =        "123",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Discovery and Maintenance of Functional Dependencies by Independencies, S. Bell
@InProceedings{bell:maintenance-functional:95,
  author =       "S. Bell",
  title =        "Discovery and Maintenance of Functional Dependencies
                 by Independencies",
  booktitle =    "Proceedings of the Workshop on Knowledge Discovery in
                 Databases",
  pages =        "27--32",
  publisher =    "AAAI Press",
  year =         "1995",
}

From data properties to evidence, D. A. Bell
@Article{bell:properties-to:93,
  author =       "D. A. Bell",
  address =      "Univ Ulster, Dept Informat Sci, Jordanstown Bt37 0Qb,
                 Antrim, North Ireland",
  title =        "From data properties to evidence",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1993",
  volume =       "5",
  issue =        "6",
  pages =        "965--969",
  abstract =     "Information and knowledge in computerized information
                 systems are often characterized by uncertainty. The
                 facts needed for some realistic applications are
                 unavailable or are crudely estimated or judged. This
                 problem manifests itself frequently in information
                 systems centered on databases. We describe here an
                 exploration of an aspect of the problem of handling
                 uncertain evidence on which reasoning is to be based.
                 We focus upon the problem of making decisions among
                 propositions based on both uncertain data items (in
                 contrast to data in conventional databases) and
                 arguments which are not certain. The primary knowledge
                 discovery issue we address is a classification problem
                 - which classification does the available evidence
                 support? The method investigated here seeks to exploit
                 information available from conventional database
                 systems - namely, the integrity assertions or data
                 dependency information contained in the database. This
                 information, e.g., from functional dependencies and a
                 form of multivalued dependencies, allows us to rank
                 arguments in terms of their strengths. Hence, as a step
                 in the process of discovering classification knowledge,
                 using a database as a secondary knowledge discovery
                 exercise, we explicate latent knowledge pertinent to
                 arguments of relevance to the purpose at hand. This is
                 called evidence. Information is requested via user
                 prompts from an evidential reasoner. It is fed as
                 evidence to the reasoner. An object-oriented structure
                 for managing evidence is used to model the conclusion
                 space and to reflect the evidence structure. The
                 implementation of the evidence structure and an example
                 of its use are outlined.",
  keywords =     "CLASSIFICATION, DATA DEPENDENCIES, DATABASE, EVIDENCE
                 BASE, EVIDENTIAL REASONING, INTEGRITY CONSTRAINTS",
}

Value-added databases: knowledge discovery and evidential reasoning., D. Bell
@InProceedings{bell:value-added-evidential:94,
  title =        "Value-added databases: knowledge discovery and
                 evidential reasoning.",
  author =       "D. Bell",
  booktitle =    "Proceedings of the International Workshop on Advances
                 in Databases and Information Systems - {ADBIS'94}",
  address =      "Moscow",
  year =         "1994",
  month =        may # " 23--26",
  pages =        "2--9",
  abstract =     "Results of research into methods of managing evidence
                 can be coupled with the power and capacity of data
                 management systems to give a potent approach to
                 discovering interesting but hidden patterns in large
                 collections of data. We present some pertinent results
                 from evidence theory and its applications, and suggest
                 an approach to the exploitation of these results in the
                 discovery of knowledge which is held in databases. In
                 this sense we {\em add value} to databases, which
                 presumably already justify their existence, and hence
                 further increase the attractiveness of very large
                 database systems.",
}

An Examination of Inductive Learning Algorithms for the Classification of Sleep Signals, John A. Bentrup and Sylvian R. Ray
Available as
compressed postscript.
@TechReport{bentrup.ea:examination-inductive:93,
  author =       "John A. Bentrup and Sylvian R. Ray",
  title =        "An Examination of Inductive Learning Algorithms for
                 the Classification of Sleep Signals",
  institution =  "Department of Computer Science, University of Illinois
                 at Urbana-Champaign",
  type =         "Report.",
  number =       "UIUCDCS-R-93-1792",
  address =      "1304 Springfield Avenue, Urbana, Il 61801",
  month =        feb,
  year =         "1993",
  URL =          "ftp://a.cs.uiuc.edu/pub/TechReports/UIUCDCS-R-93-1792.ps.Z",
  note =         "Modified version to appear in Proceedings of the 30th
                 Annual Rocky Mountain Bioengineering Symposium (April
                 1993).",
  annote =       "Nine inductive learning algorithms are tested on sleep
                 signals of 161 subjects. Algorithms are ID3, C4, CART,
                 MDL, AIMS, Bayes, PLS(K), PRG, Nearest Neighbour and
                 COBWEB. Nice table summarising algorithms.",
}

Integrated Learning in a Real Domain, F. Bergadano and A. Giordana and L. Saitta
@InCollection{bergadano.ea:integrated-learning:91,
  editor =       "Gregory Piatetsky-Shapiro and William J. Frawley",
  booktitle =    "Knowledge Discovery in Databases",
  publisher =    "AAAI Press / The MIT Press",
  address =      "Menlo Park, California",
  edition =      "1st",
  year =         "1991",
  author =       "F. Bergadano and A. Giordana and L. Saitta",
  title =        "Integrated Learning in a Real Domain",
  pages =        "277--288",
}

Applying Data Mining and Machine Learning Techniques to Submarine Intelligence Analysis, Ulla Bergsten and Johan Schubert and Per Svensson
@InProceedings{bergsten.ea:applying-machine:97,
  title =        "Applying Data Mining and Machine Learning Techniques
                 to Submarine Intelligence Analysis",
  author =       "Ulla Bergsten and Johan Schubert and Per Svensson",
  pages =        "127",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Hot Topics: Customizing information. 2. How successful are we so far?, D. Berleant and H. Berghel
@Article{berleant.ea:hot-topics:94,
  author =       "D. Berleant and H. Berghel",
  title =        "Hot Topics: Customizing information. 2. {How}
                 successful are we so far?",
  journal =      "Computer",
  volume =       "27",
  number =       "10",
  pages =        "76--78",
  month =        oct,
  year =         "1994",
  ISSN =         "0018-9162",
  affiliation =  "Dept. of Comput. Syst. Eng., Arkansas Univ.,
                 Fayetteville, AR, USA",
  classification = "C6130D (Document processing techniques); C7210
                 (Information services and centres); C7250N (Front end
                 systems for online searching)",
  keywords =     "Advanced information customization; Browsing; Data
                 interchange; Digital library; Document customization;
                 Filtering; Hypermedia; Hypertext; Information analysis;
                 Information extraction; Information retrieval;
                 Information science; Information-customizing
                 interfaces; Interactivity; Knowledge discovery;
                 Nonprescriptive structuring",
  thesaurus =    "Document handling; Full-text databases; Hypermedia;
                 Information retrieval; Online front-ends",
}

Enactment in Information Farming, Mark Bernstein
@InProceedings{bernstein:enactment-information:93,
  author =       "Mark Bernstein",
  title =        "Enactment in Information Farming",
  booktitle =    "Proceedings of ACM Hypertext'93",
  series =       "Technical Briefings",
  pages =        "242--249",
  year =         "1993",
  copyright =    "(c) Copyright 1993 Association for Computing
                 Machinery",
  keywords =     "Design, Rhetoric, Enactment, Collaboration,
                 Information farming",
  abstract =     "Information farming views the cultivation of
                 information as a continuing, collaborative activity
                 performed by groups of people working together to
                 achieve changing individual and common goals. Failure
                 to differentiate information farming from related but
                 distinct activities like information mining and data
                 factories has been a fruitful source of
                 misunderstanding and discord in the hypertext
                 literature and in the design of hypertext environments.
                 Dramatic enactment and visual salience -- not recall,
                 precision, or usability -- assume primary roles in
                 design for information gardening. In this technical
                 briefing, we examine how enactment contribute to the
                 success and failure of a variety of Hypergate and
                 Storyspace features.",
}

Computational Methods for Intelligent Information Access, Michael W. Berry and Susan T. Dumais and Todd A. Letsche
@InProceedings{berry.ea:computational-methods:95,
  author =       "Michael W. Berry and Susan T. Dumais and Todd A.
                 Letsche",
  title =        "Computational Methods for Intelligent Information
                 Access",
  booktitle =    "Proceedings of Supercomputing'95",
  publisher =    "ACM/IEEE",
  address =      "San Diego, CA",
  month =        dec,
  year =         "1995",
  keywords =     "data mining, indexing, information, latent, matrices,
                 retrieval, semantic, singular value decomposition
                 (SVD), sparse, updating,",
  abstract =     "ps/PDF on the CD with MPEG.",
}

Testing Complex Temporal Relationships Involving Multiple Granularities and Its Application to Data Mining, C. Bettini and X. Sean Wang and S. Jajodia
@InProceedings{bettini.ea:testing-complex:96,
  author =       "C. Bettini and X. {Sean Wang} and S. Jajodia",
  title =        "Testing Complex Temporal Relationships Involving
                 Multiple Granularities and Its Application to Data
                 Mining",
  editor =       "{ACM}",
  booktitle =    "Proceedings of the Fifteenth {ACM}
                 {SIGACT}-{SIGMOD}-{SIGART} Symposium on Principles of
                 Database Systems, {PODS} 1996, Montr{\'e}al, Canada,
                 June 3--5, 1996",
  volume =       "15",
  publisher =    "ACM Press",
  address =      "New York, NY 10036, USA",
  year =         "1996",
  series =       "Proceedings of the ACM SIGACT SIGMOD SIGART Symposium
                 on Principles of Database Systems",
  pages =        "68--78",
  annote =       "Held in conjunction with the 1996 ACM SIGMOD
                 international conference on management of data. Also
                 known as PODS 1996",
  keywords =     "database systems; PODS; ACM; SIGMOD; SIGART; SIGACT",
}

Time-dependent concepts: representation and reasoning using temporal description logics, C. Bettini
@Article{bettini:time-dependent-concepts:97,
  author =       "C. Bettini",
  address =      "Univ Milan, Dipartimento Sci Informaz, I-20122 Milan,
                 Italy",
  title =        "Time-dependent concepts: representation and reasoning
                 using temporal description logics",
  journal =      "Data \& Knowledge Engineering",
  year =         "1997",
  volume =       "22",
  issue =        "1",
  pages =        "1--38",
  abstract =     "A time-dependent concept is a conceptual entity that
                 is defined in terms of temporal relationships with
                 other entities. For example, the concept of an action
                 is defined in terms of a set of temporal relationships
                 among states of a system. The concept of ''widow'', in
                 natural language, is defined in terms of events that
                 have occurred in the past. Time-dependent concepts
                 appear in several application areas, from natural
                 language to diagnosis, from planning to data mining. An
                 interesting issue in knowledge representation is how to
                 formally represent and reason with these concepts. In
                 this paper, we represent a family of formal
                 representation languages obtained as an interval-based
                 temporal extension of description logics. We illustrate
                 the expressiveness of these formalisms in representing
                 time-dependent concepts with respect to standard
                 description logics and other extensions. We give some
                 complexity results for reasoning problems and we
                 propose approximate algorithms to compute subsumption
                 among time-dependent concepts.",
  keywords =     "INTERVALS, temporal knowledge, temporal reasoning,
                 description logics, taxonomies, subsumption algorithms,
                 temporal objects",
}

Advanced Scout: Data Mining and Knowledge Discovery in NBA data, Inderpal Bhandari and Ed Colet and Jennifer Parker and Zachary Pines and Rajiv Pratap and Krishnakumar Ramanujam
@Article{bhandari.ea:advanced-scout:97,
  author =       "Inderpal Bhandari and Ed Colet and Jennifer Parker and
                 Zachary Pines and Rajiv Pratap and Krishnakumar
                 Ramanujam",
  title =        "Advanced Scout: Data Mining and Knowledge Discovery in
                 {NBA} data",
  journal =      "Data Mining and Knowledge Discovery",
  year =         "1997",
  volume =       "1",
  number =       "1",
  annote =       "Advanced Scout is a PC-based data mining application
                 used by National Basketball Association (NBA) coaching
                 staffs to discover interesting patterns in basketball
                 game data. We describe Advanced Scout software from the
                 perspective of data mining and knowledge discovery.
                 This paper highlights the pre-processing of raw data
                 that the program performs, describes the data mining
                 aspects of the software and how the interpretation of
                 patterns supports the process of knowledge discovery.
                 The underlying technique of attribute focusing as the
                 basis of the algorithm is also described. The process
                 of pattern interpretation is facilitated by allowing
                 the user to relate patterns to video tape.",
}

A case-study of software process improvement during development, I. Bhandari and M. Halliday and E. Tarver and D. Brown and J. Chaar and R. Chillarege
@Article{bhandari.ea:case-study-software:93,
  author =       "I. Bhandari and M. Halliday and E. Tarver and D. Brown
                 and J. Chaar and R. Chillarege",
  address =      "Ibm Corp, Thomas J Watson Res Ctr, Yorktown Hts, Ny,
                 10598 Ibm Corp, Mid Hudson Valley Programming Lab,
                 Wappingers Falls, Ny, 12590",
  title =        "A case-study of software process improvement during
                 development",
  journal =      "Ieee Trans. On Software Engineering",
  year =         "1993",
  volume =       "19",
  issue =        "12",
  pages =        "1157--1170",
  abstract =     "We present a case study of the use of a software
                 process improvement method which is based on the
                 analysis of defect data. The first step of the method
                 is the classification of software defects using
                 attributes which relate defects to specific process
                 activities. Such classification captures the semantics
                 of the defects in a fashion which is useful for process
                 correction. The second step utilizes a machine-
                 assisted approach to data exploration which allows a
                 project team to discover such knowledge from defect
                 data as is useful for process correction. We show that
                 such analysis of defect data can readily lead a project
                 team to improve their process during development.",
  keywords =     "CYCLE, DATE EXPLORATION, DEFECT-BASED PROCESS
                 IMPROVEMENT, IN-PROCESS METRICS, KNOWLEDGE DISCOVERY",
}

Attribute focusing - machine-assisted knowledge discovery applied to software production process-control, I. Bhandari
@Article{bhandari:attribute-focusing:94,
  author =       "I. Bhandari",
  address =      "Ibm Corp, Thomas J Watson Res Ctr, Yorktown Hts, Ny,
                 10598",
  title =        "Attribute focusing - machine-assisted knowledge
                 discovery applied to software production
                 process-control",
  journal =      "Knowledge Acquisition",
  year =         "1994",
  volume =       "6",
  issue =        "3",
  pages =        "271--294",
  abstract =     "How can people who are not trained in data analysis
                 discover knowledge from a database of attribute-valued
                 data? I address this question by presenting a
                 man-machine approach to knowledge discovery called
                 Attribute Focusing and its application to software
                 production process control. Attribute Focusing utilizes
                 an automatic filter to focus attention on that small
                 part of a large amount of data which is interesting. A
                 person studies that part in a manner which leads him to
                 discover knowledge about the physical situation to
                 which the data pertain. Specifically, the paper
                 describes: 1. A model of interestingness of data based
                 on the magnitude of data values, the association of
                 data values and basic knowledge of the limits of human
                 processing. 2. The use of that model of interestingness
                 by people to discover knowledge. 3. The application of
                 the Attribute Focusing approach to diagnose and correct
                 the software production process. Based on the results
                 that have been observed, the paper concludes that
                 man-machine approaches to knowledge discovery should be
                 emphasized much more than has been in the past, and
                 that Attribute Focusing is a powerful, practical
                 approach to such discovery.",
}

Data mining, N. Bissantz and J. Hagedorn
@Article{bissantz.ea:data-mining:93,
  author =       "N. Bissantz and J. Hagedorn",
  address =      "Ibm Corp, Thomas J Watson Res Ctr, Yorktown Hts, Ny,
                 10598 Ibm Corp, Mid Hudson Valley Programming Lab,
                 Wappingers Falls, Ny, 12590",
  title =        "Data mining",
  journal =      "Wirtschaftsinformatik",
  year =         "1993",
  volume =       "35",
  issue =        "5",
  pages =        "481--487",
}

Relational knowledge discovery in databases, H. Blockeel and L. De Raedt
@InProceedings{blockeel.ea:relational:96,
  author =       "H. Blockeel and L. De Raedt",
  title =        "Relational knowledge discovery in databases",
  booktitle =    "Proceedings of the 6th International Workshop on
                 Inductive Logic Programming",
  editor =       "S. Muggleton",
  publisher =    "Stockholm University, Royal Institute of Technology",
  pages =        "1--13",
  year =         "1996",
}

Discovery, Confirmation and Incorporation of Causal Relationships from a Large Time-Oriented Clinical Database: The RX Project, Robert L. Blum
@Article{blum:confirmation-incorporation:82,
  author =       "Robert L. Blum",
  title =        "Discovery, Confirmation and Incorporation of Causal
                 Relationships from a Large Time-Oriented Clinical
                 Database: The {RX} Project",
  journal =      "Computers and Biomedical Research",
  volume =       "15",
  pages =        "164--187",
  year =         "1982",
}

Discovery and Representation of Causal Relationships from a Large Time-Oriented Clinical Database: The RX Project, Robert L. Blum
@Book{blum:representation-causal:82,
  author =       "Robert L. Blum",
  title =        "Discovery and Representation of Causal Relationships
                 from a Large Time-Oriented Clinical Database: The {RX}
                 Project",
  year =         "1982",
  publisher =    "Spinger-Verlag",
  series =       "Lecture Notes in Medical Informatics",
  volume =       "19",
}

Occam's Razor, Anselm Blumer and Andrzej Ehrenfeucht and David Haussler and Manfred K. Warmuth
@Article{blumer.ea:occams-razor:87,
  author =       "Anselm Blumer and Andrzej Ehrenfeucht and David
                 Haussler and Manfred K. Warmuth",
  title =        "Occam's Razor",
  journal =      "Information processing letters",
  volume =       "24",
  pages =        "377--380",
  year =         "1987",
}

Process-Based Database Support for the Early Indicator Method,
@InProceedings{breitner.ea:process-based-database:97,
  title =        "Process-Based Database Support for the Early Indicator
                 Method",
  author =       "Christoph Breitner and J{\"{o}}rg Schl{\"{o}}sser and
                 R{\"{u}}diger Wirth",
  pages =        "131",
  crossref =     "heckerman.ea:proceedings-third:97",
}

SAMIA: a bottom-up learning method using a simulated annealing algorithm, Pierre Br\'ezellec and Henri Soldano
@InProceedings{brezellec.ea:samia-bottom-up:93,
  author =       "Pierre Br\'ezellec and Henri Soldano",
  title =        "{SAMIA}: a bottom-up learning method using a simulated
                 annealing algorithm",
  booktitle =    "Proceedings of the European conference on Machine
                 Learning",
  series =       "Lecture notes in Artificial Intelligence",
  pages =        "297--309",
  publisher =    "Springer-verlag",
  year =         "1993",
}

Direct Access of an ILP Algorithm to a Database Management System, P. Brockhausen and K. Morik
@InProceedings{brockhausen.ea:direct-access:96,
  author =       "P. Brockhausen and K. Morik",
  title =        "Direct Access of an {ILP} Algorithm to a Database
                 Management System",
  booktitle =    "Proceedings of the MLnet Familiarization Workshop on
                 Data Mining with Inductive Logic Programing",
  pages =        "95--110",
  year =         "1996",
}

Applying classification algorithms in practice (preprint), C. E. Brodley and P. Smyth
Available as
hypertext.
@Article{brodley.ea:applying-classification:,
  author =       "C. E. Brodley and P. Smyth",
  title =        "Applying classification algorithms in practice
                 (preprint)",
  journal =      "(To appear) Statistics and Computing",
  URL =          "http://yake.ecn.purdue.edu/~brodley/my-papers/publications.html",
}

Distributed Information Management in the National HPCC Software Exchange, Shirley Browne and Jack Dongarra and Geoffrey C. Fox and Ken Hawick and Ken Kennedy and Rick Stevens and Robert Olson and Tom Rowan
@InProceedings{browne.ea:distributed-information:95,
  author =       "Shirley Browne and Jack Dongarra and Geoffrey C. Fox
                 and Ken Hawick and Ken Kennedy and Rick Stevens and
                 Robert Olson and Tom Rowan",
  title =        "Distributed Information Management in the National
                 {HPCC} Software Exchange",
  booktitle =    "Proceedings of Supercomputing'95",
  publisher =    "ACM/IEEE",
  address =      "San Diego, CA",
  month =        dec,
  year =         "1995",
  keywords =     "data mining, information management, information
                 retrieval, HPCC, high performance computing, software
                 repository,",
  abstract =     "Simple html document on CD.",
}

MineSet: An Integrated System for Data Mining, Cliff Brunk and James Kelly and Ron Kohavi
@InProceedings{brunk.ea:mineset-integrated:97,
  title =        "MineSet: An Integrated System for Data Mining",
  author =       "Cliff Brunk and James Kelly and Ron Kohavi",
  pages =        "135",
  crossref =     "heckerman.ea:proceedings-third:97",
}

A guide to the literature on learning probabilistic networks from data, W. Buntine
@Article{buntine:guide-to:96,
  author =       "W. Buntine",
  address =      "Thinkbank, 1678 Shattuck Ave, Suite 320, Berkeley, Ca,
                 94709",
  title =        "A guide to the literature on learning probabilistic
                 networks from data",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1996",
  volume =       "8",
  issue =        "2",
  pages =        "195--210",
  abstract =     "This literature review discusses different methods
                 under the general rubric of learning Bayesian networks
                 from data, and includes some overlapping work on more
                 general probabilistic networks. Connections are drawn
                 between the statistical, neural network, and
                 uncertainty communities, and between the different
                 methodological communities, such as Bayesian,
                 description length, and classical statistics. Basic
                 concepts for learning and Bayesian networks are
                 introduced and methods are then reviewed. Methods are
                 discussed for learning parameters of a probabilistic
                 network, for learning the structure, and for learning
                 hidden variables. The presentation avoids formal
                 definitions and theorems, as these are plentiful in the
                 literature, and instead illustrates key concepts with
                 simplified examples.",
  keywords =     "EXPERT-SYSTEMS, BAYESIAN NETWORKS, GRAPHICAL MODELS,
                 INDEPENDENCE, COMPLEXITY, BAYESIAN NETWORKS, GRAPHICAL
                 MODELS, HIDDEN VARIABLES, LEARNING, LEARNING STRUCTURE,
                 PROBABILISTIC NETWORKS, KNOWLEDGE DISCOVERY",
}

Attribute-Oriented Induction in Relational Databases, Yandong Cai and Nick Cercone and Jaiwei Han
@InCollection{cai.ea:attribute-oriented-induction:91,
  editor =       "Gregory Piatetsky-Shapiro and William J. Frawley",
  booktitle =    "Knowledge Discovery in Databases",
  publisher =    "AAAI Press / The MIT Press",
  address =      "Menlo Park, California",
  edition =      "1st",
  year =         "1991",
  author =       "Yandong Cai and Nick Cercone and Jaiwei Han",
  title =        "Attribute-Oriented Induction in Relational Databases",
  pages =        "213--228",
}

An overview of machine learning, Jaime G. Carbonell and Ryszard S. Michalski and Tom M. Mitchell
@InCollection{carbonell.ea:overview-machine:83,
  author =       "Jaime G. Carbonell and Ryszard S. Michalski and Tom M.
                 Mitchell",
  title =        "An overview of machine learning",
  pages =        "3--24",
  crossref =     "michalski.ea:machine-learning:83",
}

Assessing Credit Card Applications Using Machine Learning, Chris Carter and Jason Catlett
@Article{carter.ea:assessing-credit:87,
  author =       "Chris Carter and Jason Catlett",
  title =        "Assessing Credit Card Applications Using Machine
                 Learning",
  journal =      "IEEE Expert",
  pages =        "71--79",
  volume =       "Fall 1987",
  year =         "1987",
}

A fast, online generalization algorithm for knowledge discovery, C. L. Carter and H. J. Hamilton
@Article{carter.ea:fast-online:95,
  author =       "C. L. Carter and H. J. Hamilton",
  address =      "Univ Regina, Dept Comp Sci, Regina, Sk S4S 0A2,
                 Canada",
  title =        "A fast, online generalization algorithm for knowledge
                 discovery",
  journal =      "Applied Mathematics Letters",
  year =         "1995",
  volume =       "8",
  issue =        "2",
  pages =        "5--11",
  abstract =     "We present an O(n) algorithm for generalizing a
                 database relation using concept hierarchies, where n is
                 the number of tuples in the input relation. The
                 algorithm is based on a variant of Han et al.'s
                 attribute-oriented O(n log n) algorithm. Our algorithm
                 is an on-line algorithm; fast performance is achieved
                 because after encountering a tuple and generalizing it,
                 the location of the appropriate counter to increment is
                 calculated instead of searched for.",
  keywords =     "KNOWLEDGE DISCOVERY, DATA MINING, DATABASES, CONCEPT
                 HIERARCHIES, GENERALIZATION",
}

Megainduction: machine learning on very large databases, Jason Catlett
Available as
hypertext.
@PhdThesis{catlett:megainduction-machine:91,
  title =        "Megainduction: machine learning on very large
                 databases",
  year =         "1991",
  author =       "Jason Catlett",
  URL =          "http://www.research.att.com/orgs/ssr/people/catlett/phd.html",
}

IEEE Transactions on Knowledge and Data Engineering Special issue on Learning and Discovery in Databases, N. Cercone and M. Tsuchiya (guest editors) (Eds)
@Article{cercone.ea:ieee-transactions:93,
  key =          "cercone.ea:ieee-transactions:93",
  title =        "{IEEE} Transactions on Knowledge and Data Engineering
                 Special issue on Learning and Discovery in Databases",
  journal =      "IEEE Transactions on Knowledge and Data Engineering",
  year =         "1993",
  volume =       "5",
  number =       "6",
  month =        dec,
  editor =       "N. Cercone and M. Tsuchiya (guest editors)",
  note =         "Special issue on Learning and Discovery in Databases",
}

Proposal and Empirical Comparison of a Parallelizable Distance-Based Discretization Method, Jes\'us Cerquides and Ramon L\'opez de M\`antaras
@InProceedings{cerquides.ea:proposal-empirical:97,
  title =        "Proposal and Empirical Comparison of a Parallelizable
                 Distance-Based Discretization Method",
  author =       "Jes\'{u}s Cerquides and Ramon L\'{o}pez de
                 M\`{a}ntaras",
  pages =        "139",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Experiments in Multistrategy Learning by Meta-Learning, Philip K. Chan and Salvatore J. Stolfo
@InProceedings{chan.ea:experiments-multistrategy:93,
  author =       "Philip K. Chan and Salvatore J. Stolfo",
  title =        "Experiments in Multistrategy Learning by
                 Meta-Learning",
  booktitle =    "Proceedings of the second international conference on
                 information and knowledge management",
  pages =        "314--323",
  address =      "Washington, DC",
  year =         "1993",
}

Sharing Learned Models among Remote Database Partitions by Local Meta-Learning, Philip K. Chan and Salvatore J. Stolfo
@InProceedings{chan.ea:sharing-learned:96,
  title =        "Sharing Learned Models among Remote Database
                 Partitions by Local Meta-Learning",
  pages =        "2",
  author =       "Philip K. Chan and Salvatore J. Stolfo",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Model uncertainty, data mining and statistical-inference, C. Chatfield
@Article{chatfield:model-uncertainty:95,
  author =       "C. Chatfield",
  address =      "Univ Bath, Sch Math Sci, Bath Ba2 7Ay, Avon, England",
  title =        "Model uncertainty, data mining and
                 statistical-inference",
  journal =      "J. Of The Royal Statistical Soc. Series A-Statistics
                 In Society",
  year =         "1995",
  volume =       "158",
  issue =        "Pt3",
  pages =        "419--466",
  abstract =     "This paper takes abroad, pragmatic view of statistical
                 inference to include all aspects of model formulation.
                 The estimation of model: parameters traditionally
                 assumes that a model has a prespecified known form and
                 takes no account of possible uncertainty regarding the
                 model structure. This implicitly assumes the existence
                 of a 'true' model, which many would regard-as a
                 fiction. In practice model uncertainty is a fact of
                 life and likely to be more serious than other sources
                 of uncertainty which have received far more attention
                 from statisticians. This is true whether the model is
                 specified on subject-matter grounds or, as is
                 increasingly the case, when a model is formulated,
                 fitted and checked on the same data set in an
                 iterative, interactive way. Modern computing power
                 allows a large number of models to be considered and
                 data-dependent specification searches have become the
                 norm in many areas of statistics. The term data mining
                 may be used in this context when the analyst goes to
                 great lengths to obtain a good fit. This paper reviews
                 the effects of model uncertainty, such as too narrow
                 prediction intervals, and the non-trivial biases in
                 parameter estimates which can follow data-based
                 modelling. Ways of assessing and overcoming the effects
                 of model uncertainty are discussed, including the use
                 of simulation and resampling methods, a Bayesian model
                 averaging approach and collecting additional data
                 wherever possible. Perhaps the main aim of the paper is
                 to ensure that statisticians are aware of the problems
                 and start addressing the issues even if there is no
                 simple, general theoretical fix.",
  keywords =     "MOVING AVERAGE MODELS, BOOTSTRAP, VALIDATION,
                 PREDICTION, COMPLEXITY, SELECTION, CHOICE,
                 AUTOREGRESSIVE MODEL, BAYESIAN MODEL AVERAGING, DATA
                 MINING, FORECASTING, MODEL BUILDING, RESAMPLING,
                 STATISTICAL INFERENCE, SUBSET SELECTION",
}

Large Scale Data Mining: Challenges and Responses,
@InProceedings{chattratichat.ea:large-scale:97,
  title =        "Large Scale Data Mining: Challenges and Responses",
  author =       "Jaturon Chattratichat and John Darlington and Moustafa
                 Ghanem and Harald H{\"{u}}ning Yike Guo and Martin
                 K{\"{o}}hler and Janjao Sutiwaraphun and Hing Wing To
                 and Dan Yang",
  pages =        "143",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Bayesian Classification (AUTOCLASS): Theory and Results, P. Cheeseman and J. Stutz
@InCollection{cheeseman.ea:bayesian-classification:95,
  author =       "P. Cheeseman and J. Stutz",
  title =        "Bayesian Classification ({AUTOCLASS}): Theory and
                 Results",
  booktitle =    "Advances in Knowledge Discovery and Data Mining",
  editor =       "U. M. Fayyad and G. Piatetsky-Shapiro and P Smyth and
                 R. Uthurusamy",
  year =         "1995",
}

Efficient Data Mining for Path Traversal Patterns in Distributed Systems, M. S. Chen and J. S. Park and P. S. Yu
@InProceedings{chen.ea:efficient-path:96,
  author =       "M. S. Chen and J. S. Park and P. S. Yu",
  title =        "Efficient Data Mining for Path Traversal Patterns in
                 Distributed Systems",
  booktitle =    "16th International Conference on Distributed Computing
                 Systems (16th IDCS'96)",
  pages =        "385--393?",
  publisher =    "IEEE",
  address =      "Hong Kong",
  month =        may,
  year =         "1996",
  keywords =     "Distributed Objects,",
  note =         "IBM T. J. Watson Research Center, USA",
}

Data mining: an overview from a database perspective, Ming-Syan Chen and Jiawei Han and Philip S. Yu
@Article{chen.ea:overview-database:96,
  author =       "Ming-Syan Chen and Jiawei Han and Philip S. Yu",
  address =      "Natl Taiwan Univ, Dept Elect Engn, Taipei 10764,
                 Taiwan Simon Fraser Univ, Sch Comp Sci, Burnaby, Bc V5A
                 1S6, Canada Ibm Corp, Thomas J Watson Res Ctr, Yorktown
                 Hts, Ny, 10598",
  title =        "Data mining: an overview from a database perspective",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1996",
  month =        dec,
  volume =       "8",
  issue =        "6",
  pages =        "866--883",
  abstract =     "Mining information and knowledge from large databases
                 has been recognized by many researchers as a key
                 research topic in database systems and machine
                 learning, and by many industrial companies as an
                 important area with an opportunity of major revenues.
                 Researchers in many different fields have shown great
                 interest in data mining. Several emerging applications
                 in information providing services, such as data
                 warehousing and on-line services over the Internet,
                 also call for various data mining techniques to better
                 understand user behavior, to improve the service
                 provided, and to increase the business opportunities.
                 In response to such a demand, this article is to
                 provide a survey, from a database researcher's point of
                 view, on the data mining techniques developed recently.
                 A classification of the available data mining
                 techniques is provided, and a comparative study of such
                 techniques is presented.",
  keywords =     "data mining, knowledge discovery, association rules,
                 classification, data clustering, pattern matching
                 algorithms, data generalization and characterization,
                 data cubes, multiple-dimensional databases",
}

A parallel computing approach to creating engineering concept spaces for semantic retrieval - the illinois digital library initiative project, H. C. Chen and B. Schatz and T. Ng and J. Martinez and A. Kirchhoff and C. T. Lin
@Article{chen.ea:parallel-computing:96,
  author =       "H. C. Chen and B. Schatz and T. Ng and J. Martinez and
                 A. Kirchhoff and C. T. Lin",
  address =      "Univ Arizona, Karl Eller Grad Sch Management, Mis
                 Dept, Mcclelland Hall, Tucson, Az, 85721 Univ Illinois,
                 Natl Ctr Supercomp Applicat, Beckman Inst, Urbana, Il,
                 61801 Univ Arizona, Sci \& Engn Lib, Tucson, Az, 85712
                 Univ Arizona, Dept Lib \& Informat Studies, Tucson, Az,
                 85712",
  title =        "A parallel computing approach to creating engineering
                 concept spaces for semantic retrieval - the illinois
                 digital library initiative project",
  journal =      "Ieee Trans. On Pattern Analysis And Machine
                 Intelligence",
  year =         "1996",
  volume =       "18",
  issue =        "8",
  pages =        "771--782",
  abstract =     "This research presents preliminary results generated
                 from the semantic retrieval research component of the
                 illinois Digital Library Initiative (DLI) project.
                 Using a variation of the automatic thesaurus generation
                 techniques, to which we refer as the concept space
                 approach, we aimed to create graphs of domain-specific
                 concepts (terms) and their weighted co-occurrence
                 relationships for all major engineering domains.
                 Merging these concept spaces and providing traversal
                 paths across:different concept spaces could potentially
                 help alleviate the vocabulary (difference) problem
                 evident in large- scale information retrieval. We have
                 experimented previously with such a technique for a
                 smaller molecular biology domain (Worm Community
                 System, with 10+ MBs of document collection) with
                 encouraging results. In order to address the
                 scalability issue related to large-scale information
                 retrieval and analysis for the current Illinois DLI
                 project, we recently conducted experiments using the
                 concept space approach on parallel supercomputers. Our
                 test collection included 2+ GBs of computer science and
                 electrical engineering abstracts extracted from the
                 INSPEC database. The concept space approach called for
                 extensive textual and statistical analysis (a form of
                 knowledge discovery) based on automatic indexing and
                 cooccurrence analysis algorithms, both previously
                 tested in the biology domain. Initial testing results
                 using a 512-node CM-5 and a 16-processor SGI Power
                 Challenge were promising. Power Challenge was later
                 selected to create a comprehensive computer engineering
                 concept space of about 270,000 terms and 4,000,000+
                 links using 24.5 hours of CPU time. Our system
                 evaluation involving 12 knowledgeable subjects revealed
                 that the automatically-created computer engineering
                 concept space generated significantly higher concept
                 recall than the human- generated INSPEC computer
                 engineering thesaurus. However, the INSPEC was more
                 precise than the automatic concept space. Our current
                 work mainly involves creating concept spaces for other
                 major engineering domains and developing robust graph
                 matching and traversal algorithms for cross-domain,
                 concept-based retrieval. Future work also will include
                 generating individualized concept spaces for assisting
                 user- specific concept-based information retrieval.",
  keywords =     "INFORMATION-RETRIEVAL, DOCUMENT-RETRIEVAL, CONNECTION
                 MACHINE, NEURAL NETWORKS, SYSTEMS, SEARCH, PERFORMANCE,
                 DATABASES, DESIGN, MODEL, SEMANTIC RETRIEVAL, CONCEPT
                 SPACE, CONCEPT ASSOCIATION, PARALLEL COMPUTING, DIGITAL
                 LIBRARY",
}

Semantics-Based Information Management and Retrieval: A Knowledge Discovery Approach, H. Chen and K. Lynch
@Article{chen.ea:semantics-based-information:92,
  author =       "H. Chen and K. Lynch",
  title =        "Semantics-Based Information Management and Retrieval:
                 {A} Knowledge Discovery Approach",
  journal =      "IEEE Transactions on Systems, Man, and Cybernetics",
  publisher =    "IEEE",
  month =        "Forthcoming",
  year =         "1992",
  abstract =     "We report results of a study that involved the
                 creation of knowledge bases from large, operational
                 textual databases. Two East-bloc computing knowledge
                 bases, both based on semantic network structure, were
                 created automatically using two statistical algorithms.
                 With the help of four East-bloc computing experts, we
                 evaluated the two knowledge bases in detail in a
                 concept-association experiment bases on recall and
                 recognition tests. In our experiment, one of the
                 knowledge bases that exhibited the asymmetric link
                 property out-performed all four experts in recalling
                 relevant concepts in East-bloc computing. The knowledge
                 base, which contained about 20,000 concepts (nodes) and
                 280,000 weighted relationships (links), was
                 incorporated as a thesauras-like component into an
                 intelligent retrieval system. The system allowed users
                 to perform semantics-based information management and
                 information retrieval via interactive, conceptual
                 relevance feedback. Current research efforts include
                 development of a meta knowledge base and design of
                 semantic network and neural network based inferencing
                 algorithms.",
}

Growing Simpler Decision Trees to Facilitate Knowledge Discovery, Kevin J. Cherkauer and Jude W. Shavlik
@InProceedings{cherkauer.ea:growing-simpler:96,
  title =        "Growing Simpler Decision Trees to Facilitate Knowledge
                 Discovery",
  pages =        "315",
  author =       "Kevin J. Cherkauer and Jude W. Shavlik",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Efficient mining of association rules in distributed databases, D. W. Cheung and V. T. Ng and A. W. Fu and Y. J. Fu
@Article{cheung.ea:efficient-association:96,
  author =       "D. W. Cheung and V. T. Ng and A. W. Fu and Y. J. Fu",
  address =      "Univ Hong Kong, Dept Comp Sci, Hong Kong, Hong Kong
                 Hong Kong Polytech Univ, Dept Comp, Hong Kong, Hong
                 Kong Chinese Univ Hong Kong, Dept Comp Sci \& Engn,
                 Hong Kong, Hong Kong Simon Fraser Univ, Sch Comp Sci,
                 Burnaby, Bc V5A 1S6, Canada",
  title =        "Efficient mining of association rules in distributed
                 databases",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1996",
  month =        dec,
  volume =       "8",
  issue =        "6",
  pages =        "911--922",
  abstract =     "Many sequential algorithms have been proposed for
                 mining of association rules. However, very little work
                 has been done in mining association rules in
                 distributed databases. A direct application of
                 sequential algorithms to distributed databases is not
                 effective, because it requires a large amount of
                 communication overhead. In this study, an efficient
                 algorithm, DMA, is proposed. It generates a small
                 number of candidate sets and requires only O(n)
                 messages for support count exchange for each candidate
                 set, where n is the number of sites in a distributed
                 database. The algorithm has been implemented on an
                 experimental test bed and its performance is studied.
                 The results show that DMA has superior performance when
                 comparing with the direct application of a popular
                 sequential algorithm in distributed databases.",
  keywords =     "data mining, knowledge discovery, distributed data
                 mining, association rule, distributed database,
                 distributed algorithm, partitioned database",
}

Maintenance of Discovered Knowledge: A Case in Multi-Level Association Rules, David W. Cheung and Vincent T. Ng and Benjamin W. Tam
@InProceedings{cheung.ea:maintenance-discovered:96,
  title =        "Maintenance of Discovered Knowledge: {A} Case in
                 Multi-Level Association Rules",
  pages =        "307",
  author =       "David W. Cheung and Vincent T. Ng and Benjamin W.
                 Tam",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Knowledge discovery in databases: a rule-based attribute-oriented approach, D. W.-l. Cheung and A. W.-C. Fu and J. Han
@InProceedings{cheung.ea:rule-based-attribute-oriented:94a,
  key_modifier = "a",
  author =       "D. W.-l. Cheung and A. W.-C. Fu and J. Han",
  title =        "Knowledge discovery in databases: a rule-based
                 attribute-oriented approach",
  pages =        "164--173",
  editor =       "Zbigniew W. Ra{\'s} and Maria Zemankova",
  booktitle =    "Proceedings of the 8th International Symposium on
                 Methodologies for Intelligent Systems",
  month =        oct,
  series =       "LNAI",
  volume =       "869",
  publisher =    "Springer",
  address =      "Berlin",
  year =         "1994",
}

Knowledge discovery in databases: a rule-based attribute-oriented approach, D. W.-I. Cheung and A. W.-C. Fu and J. Han
@Article{cheung.ea:rule-based-attribute-oriented:94b,
  key_modifier = "b",
  author =       "D. W.-I. Cheung and A. W.-C. Fu and J. Han",
  title =        "Knowledge discovery in databases: a rule-based
                 attribute-oriented approach",
  journal =      "Lecture Notes in Computer Science",
  volume =       "869",
  pages =        "164--??",
  year =         "1994",
  ISSN =         "0302-9743",
}

Using Artificial Intelligence Planning to Automate Science Data Analysis for Large Image Databases, Steve Chien and Forest Fisher and and Helen Mortensen and Edisanter Lo and Ronald Greeley
@InProceedings{chien.ea:using-artificial:97,
  title =        "Using Artificial Intelligence Planning to Automate
                 Science Data Analysis for Large Image Databases",
  author =       "Steve Chien and Forest Fisher and and Helen Mortensen
                 and Edisanter Lo and Ronald Greeley",
  pages =        "147",
  crossref =     "heckerman.ea:proceedings-third:97",
}

A framework for query optimization to support data mining, R. Sunil Choenni and Arno P. J. M. Siebes
Available as
compressed postscript.
@InCollection{choenni.ea:framework-query:96,
  author =       "R. Sunil Choenni and Arno P. J. M. Siebes",
  title =        "A framework for query optimization to support data
                 mining",
  publisher =    "Centrum voor Wiskunde en Informatica (CWI)",
  ISSN =         "ISSN 0169-118X",
  month =        oct # " 31",
  year =         "1996",
  keywords =     "data mining systems, search strategies, query
                 optimization, physical database design.",
  URL =          "ftp://ftp.cwi.nl/pub/CWIreports/AA/CS-R9637.ps.Z",
  abstract =     "In order to extract knowledge from databases, data
                 mining algorithms heavily query the databases.
                 Inefficient processing of these queries will inevitably
                 have its impact on the performance of these algorithms,
                 making them less valuable. In this paper, we describe
                 an optimization framework for an efficient processing
                 of queries generated by different data mining
                 algorithms. In this framework, we show how to take
                 advantage of the physical organization of the database,
                 the operators and the control structures used in an
                 algorithm. Finally, we discuss how our framework fits
                 into conventional query optimization frameworks.",
  note =         "AA (Department of Algorithmics and Architecture)",
  annote =       "originally contained the following fields and values -
                 booktitle, 105 note, CS-R9637",
}

On multi-query optimization, R. (Sunil) Choenni and Martin L. Kersten and Johan F. P. van den Akker and Amani Saad
Available as
compressed postscript.
@InCollection{choenni.ea:on-multi-query:96,
  author =       "R. (Sunil) Choenni and Martin L. Kersten and Johan F.
                 P. van den Akker and Amani Saad",
  title =        "On multi-query optimization",
  pages =        "19",
  publisher =    "Centrum voor Wiskunde en Informatica (CWI)",
  address =      "ISSN 0169-118X",
  month =        oct # " 31",
  year =         "1996",
  keywords =     "multi-query optimization, architectures, exploiting
                 interdependencies between queries.",
  URL =          "ftp://ftp.cwi.nl/pub/CWIreports/AA/CS-R9638.ps.Z",
  abstract =     "In some key database applications, such as data
                 mining, a sequence of interdependent queries may be
                 posed simultaneously to the DBMS. The optimization of
                 such sequences is called multi-query optimization, and
                 it attempts to exploit these dependencies in the
                 derivation of a query evaluation plan (qep). Although
                 it has been observed and demonstrated by several
                 researchers that exploitation of dependencies speed up
                 the query processing, limited research has been
                 reported how to benefit from multi-query optimization,
                 taking the capabilities of existing query optimizers
                 into account. This is exactly the topic of this paper.
                 Since existing optimizers are able to optimize queries
                 in which a restricted number of basic operations
                 appears, e.g., number of joins is limited to 10, and
                 the optimization of a query is relatively expensive, we
                 attempt to profit from multi query optimization under
                 the condition that queries are passed only once and
                 separately to the optimizer. We propose a two-step
                 optimization procedure. In the first step, we
                 determine, on the basis of the dependencies between
                 queries, in which order they should be specified and
                 what results should be stored. In the second step, each
                 query is passed separately to an optimizer.",
  note =         "AA (Department of Algorithmics and Architecture)",
  annote =       "originally contained the following fields and values -
                 note, CS-R9638, booktitle, 143",
}

Using a Hybrid Neural/Expert System for Data Base Mining in Market Survey Data, Victor Ciesielski and Gregory Palstra
@InProceedings{ciesielski.ea:using-hybrid:96,
  title =        "Using a Hybrid Neural/Expert System for Data Base
                 Mining in Market Survey Data",
  pages =        "38",
  author =       "Victor Ciesielski and Gregory Palstra",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Classification Problem Solving, W. J. Clancey
@InProceedings{clancey:classification-problem:84,
  title =        "Classification Problem Solving",
  author =       "W. J. Clancey",
  editor =       "R. J. Brachman",
  booktitle =    "Proceedings of the National Conference on Artificial
                 Intelligence",
  address =      "Austin, Texas",
  month =        aug,
  year =         "1984",
  publisher =    "William Kaufmann",
  pages =        "49--55",
}

The CN2 Induction Algorithm, Peter Clark and Tim Niblett
@Article{clark.ea:cn2-induction:89,
  author =       "Peter Clark and Tim Niblett",
  title =        "The {CN2} Induction Algorithm",
  journal =      "Machine Learning",
  year =         "1989",
  volume =       "3",
  pages =        "261--283",
}

Knowledge Representation in Machine Learning, Peter Clark
@InCollection{clark:representation-machine:89,
  author =       "Peter Clark",
  title =        "Knowledge Representation in Machine Learning",
  editor =       "Yves Kodratoff and Alan Hutchinson",
  booktitle =    "Machine and Human Learning, advances in European
                 Research",
  publisher =    "Michael Horwood",
  year =         "1989",
  pages =        "35--49",
  address =      "London",
}

Security and Privacy Implications of Data Mining, Chris Clifton and Don Marks
Available as
postscript.
@InProceedings{clifton.ea:security-privacy:96,
  author =       "Chris Clifton and Don Marks",
  title =        "Security and Privacy Implications of Data Mining",
  booktitle =    "Workshop on Data Mining and Knowledge Discovery",
  address =      "Montreal, Canada",
  organization = "ACM SIGMOD",
  year =         "1996",
  publisher =    "University of British Columbia Department of Computer
                 Science",
  number =       "96-08",
  pages =        "15--19",
  month =        jun # " 2",
  URL =          "ftp://ftp.fas.sfu.ca/pub/cs/han/dmkd96/p15.ps",
  contributedby = "clifton(at)mitre.org",
}

Overfitting Explained, P. R. Cohen and D. Jensen
Available as
postscript.
@InProceedings{cohen.ea:overfitting-explained:97,
  author =       "P. R. Cohen and D. Jensen",
  title =        "Overfitting Explained",
  booktitle =    "Preliminary Papers of the Sixth International Workshop
                 on Artificial Intelligence and Statistics",
  year =         "1997",
  month =        jan,
  pages =        "115--122",
  abstract =     "Overfitting arises when model components are evaluated
                 against the wrong reference distribution. Most modeling
                 algorithms iteratively find the best of several
                 components and then test whether this component is good
                 enough to add to the model. We show that for
                 independently distributed random variables, the
                 reference distribution for any one variable
                 underestimates the reference distribution for the the
                 highest-valued variable; thus variate values will
                 appear significant when they are not, and model
                 components will be added when they should not be added.
                 We relate this problem to the well-known statistical
                 theory of multiple comparisons or simultaneous
                 inference.",
  abstract_url = "http://eksl-www.cs.umass.edu/~jensen/papers/ais97b.html",
  URL =          "http://www-eksl.cs.umass.edu/papers/cohen-ais96b.ps",
}

The Role of Knowledge Mining in the Development and Evolution of New Applications, David Cohen and L. Berke and P. Bloom and D. Cohen and D. Tsur
@InProceedings{cohen.ea:role-development:94,
  author =       "David Cohen and L. Berke and P. Bloom and D. Cohen and
                 D. Tsur",
  title =        "The Role of Knowledge Mining in the Development and
                 Evolution of New Applications",
  pages =        "166--167",
  editor =       "Ahmed K. Elmagarmid and Erich Neuhold",
  booktitle =    "Proceedings of the 10th International Conference on
                 Data Engineering",
  address =      "Houston, TX",
  month =        feb,
  year =         "1994",
  publisher =    "IEEE Computer Society Press",
}

Knowledge in context: a strategy for expert system maintenance, P. Compton and R. Jansen
@InProceedings{compton.ea:context-strategy:88,
  author =       "P. Compton and R. Jansen",
  title =        "Knowledge in context: a strategy for expert system
                 maintenance",
  booktitle =    "Proceedings of the 2nd {A}ustralian Joint Artificial
                 Intelligence conference",
  address =      "Adelaide",
  year =         "1988",
  publisher =    "Springer",
  series =       "Lecture Notes in Artificial Intelligence",
  volume =       "406",
  pages =        "292--306",
}

Knowledge discovery in molecular databases, D. Conklin and S. Fortier and J. Glasgow
@Article{conklin.ea:molecular:93,
  author =       "D. Conklin and S. Fortier and J. Glasgow",
  address =      "Queens Univ, Dept Comp \& Informat Sci, Kingston K7L
                 3N6, On, Canada Queens Univ, Dept Chem, Kingston K7L
                 3N6, On, Canada",
  title =        "Knowledge discovery in molecular databases",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1993",
  volume =       "5",
  issue =        "6",
  pages =        "985--987",
  abstract =     "This paper describes an approach to knowledge
                 discovery in complex molecular databases. The machine
                 learning paradigm used is structured concept formation,
                 in which objects described in terms of components and
                 their interrelationships are clustered and organized in
                 a knowledge base. Symbolic images are used to represent
                 classes of structured objects. A discovered molecular
                 knowledge base is successfully used in the
                 interpretation of a high resolution electron density
                 map.",
  keywords =     "PROTEIN, CASE-BASED REASONING, CHEMICAL INFORMATION
                 RETRIEVAL, CONCEPTUAL CLUSTERING, DESCRIPTION LOGICS,
                 INDEXING, RELATIONAL MODELS, SCENE ANALYSIS, SPATIAL
                 CONCEPTS, SPATIAL REASONING, STRUCTURED CONCEPT
                 FORMATION",
}

Machine discovery of protein motifs, D. Conklin
@Article{conklin:machine-protein:95,
  author =       "D. Conklin",
  address =      "Zymogenet Inc, 1201 Eastlake Ave E, Seattle, Wa,
                 98102",
  title =        "Machine discovery of protein motifs",
  journal =      "Machine Learning",
  year =         "1995",
  volume =       "21",
  issue =        "1-2",
  pages =        "125--150",
  abstract =     "The investigation of relations between protein
                 tertiary structure and amino acid sequence is a topic
                 of tremendous importance in molecular biology. The
                 automated discovery of recurrent patterns of structure
                 and sequence is an essential part of this
                 investigation. These patterns, known as protein motifs,
                 are abstractions of fragments drawn from proteins of
                 known sequence and tertiary structure. This paper has
                 two objectives. The first is to introduce and define
                 protein motifs, and provide a survey of previous
                 research on protein motif discovery. The second is to
                 present and apply a novel approach to protein motif
                 representation and discovery, which is based on a
                 spatial description logic and the symbolic machine
                 learning paradigm of structured concept formation. A
                 large database of protein fragments is processed using
                 this approach, and several interesting and significant
                 protein motifs are discovered.",
  keywords =     "SECONDARY STRUCTURE, SEQUENCE PATTERNS, PREDICTIVE
                 POWER, IDENTIFICATION, RECOGNITION, GENERATION,
                 DEFINITION, TEMPLATES, SETS, PROTEIN TERTIARY
                 STRUCTURE, MACHINE DISCOVERY, RELATIONAL LEARNING,
                 KNOWLEDGE REPRESENTATION, DESCRIPTION LOGICS,
                 INFORMATION RETRIEVAL, KNOWLEDGE DISCOVERY IN
                 DATABASES",
}

Scalable discovery of informative structural concepts using domain knowledge, D. J. Cook and L. B. Holder and S. Djoko
@Article{cook.ea:scalable-informative:96,
  author =       "D. J. Cook and L. B. Holder and S. Djoko",
  address =      "Univ Texas, Dept Comp Sci \& Engn, Arlington, Tx,
                 76019 Bell No Res, Sci Staff, Richardson, Tx",
  title =        "Scalable discovery of informative structural concepts
                 using domain knowledge",
  journal =      "Ieee Expert-Intelligent Systems \& Their
                 Applications",
  year =         "1996",
  volume =       "11",
  issue =        "5",
  pages =        "59--68",
}

Substructure Discovery Using Minimum Description Length and Background Knowledge, D. J. Cook and L. B. Holder
Available as
postscript.
@Article{cook.ea:substructure-using:94,
  author =       "D. J. Cook and L. B. Holder",
  title =        "Substructure Discovery Using Minimum Description
                 Length and Background Knowledge",
  journal =      "JAIR",
  year =         "1994",
  volume =       "1",
  pages =        "231--255",
  abstract =     "The ability to identify interesting and repetitive
                 substructures is an essential component to discovering
                 knowledge in structural data. We describe a new version
                 of our SUBDUE substructure discovery system based on
                 the minimum description length principle. The SUBDUE
                 system discovers substructures that compress the
                 original data and represent structural concepts in the
                 data. By replacing previously-discovered substructures
                 in the data, multiple passes of SUBDUE produce a
                 hierarchical description of the structural regularities
                 in the data. SUBDUE uses a computationally-bounded
                 inexact graph match that identifies similar, but not
                 identical, instances of a substructure and finds an
                 approximate measure of closeness of two substructures
                 when under computational constraints. In addition to
                 the minimum description length principle, other
                 background knowledge can be used by SUBDUE to guide the
                 search towards more appropriate substructures.
                 Experiments in a variety of domains demonstrate
                 SUBDUE's ability to find substructures capable of
                 compressing the original data and to discover
                 structural concepts important to the domain.",
  annote =       "The SUBDUE system discovers substructures that
                 compress the original data and represent structural
                 concepts in the data. By replacing
                 previously-discovered substructures in the data,
                 multiple passes of SUBDUE produce a hierarchical
                 description of the structural regularities in the
                 data.",
  URL =          "gopher://P.GP.CS.CMU.EDU:70/00/volume1/cook94a.ps",
}

What has Mill to Say About Data Mining ?, Tremaine A. O. Cornish and Anthony D. Elliman
@InProceedings{cornish.ea:what-has:95,
  author =       "Tremaine A. O. Cornish and Anthony D. Elliman",
  title =        "What has Mill to Say About Data Mining ?",
  pages =        "347--353",
  booktitle =    "Proceedings of the Eleventh Conference on Artificial
                 Intelligence for Applications",
  month =        "20--2~" # feb,
  publisher =    "IEEE Computer Society Press",
  address =      "Los Alamitos",
  year =         "1995",
}

Historical perspectives on information-science, T. A. O. Cornish
@Article{cornish:historical-perspectives:96,
  author =       "T. A. O. Cornish",
  address =      "Brunel Univ, Dept Comp Sci \& Informat Syst, Uxbridge
                 Ub8 3Ph, Middx, England",
  title =        "Historical perspectives on information-science",
  journal =      "Systems Research And Information Science",
  year =         "1996",
  volume =       "7",
  issue =        "2",
  pages =        "105--116",
  abstract =     "There is a general attitude in science and
                 particularly computer science, that if something is
                 more than five year old, then we have nothing to learn
                 from it. This paper seeks first to destroy the basis of
                 this myth with reference to areas of current research
                 which are still striving to live up to visions set many
                 years ago. Secondly to look at an area of research,
                 Knowledge Discovery in Databases and demonstrate that
                 it to has a great deal to learn from the distant past,
                 which has been all but overlooked.",
  keywords =     "KNOWLEDGE DISCOVERY, SYSTEMATIC, SCIENTIFIC, DATA
                 MINING, HISTORICAL, INFORMATION, SYSTEMS",
}

Data Mining of Multi-dimensional Remotely Sensed Images, Robert F. Cromp and William J. Campbell
@InProceedings{cromp.ea:multi-dimensional-remotely:93,
  author =       "Robert F. Cromp and William J. Campbell",
  title =        "Data Mining of Multi-dimensional Remotely Sensed
                 Images",
  pages =        "471--480",
  editor =       "Bharat Bhargava and Timothy Finin and Yelena Yesha",
  booktitle =    "Proceedings of the 2nd International Conference on
                 Information and Knowledge Management",
  month =        nov,
  publisher =    "ACM Press",
  address =      "New York, NY, USA",
  year =         "1993",
}

Knowledge Discovery in Databases: Exploiting Knowledge-Level Redescription, J. Cupit and N. Shadbolt
@Article{cupit.ea:exploiting-knowledge-level:96a,
  key_modifier = "a",
  author =       "J. Cupit and N. Shadbolt",
  title =        "Knowledge Discovery in Databases: Exploiting
                 Knowledge-Level Redescription",
  journal =      "Lecture Notes in Computer Science",
  volume =       "1076",
  pages =        "245--??",
  year =         "1996",
  ISSN =         "0302-9743",
}

Knowledge Discovery in Databases: Exploiting Knowledge-Level Redescription, James Cupit and Nigel Shadbolt
@InProceedings{cupit.ea:exploiting-knowledge-level:96b,
  key_modifier = "b",
  author =       "James Cupit and Nigel Shadbolt",
  title =        "Knowledge Discovery in Databases: Exploiting
                 Knowledge-Level Redescription",
  pages =        "245--261",
  editor =       "Nigel Shadbolt and Kieron O'Hara and Schreiber Guus",
  booktitle =    "Proceedings of the Nineth European Knowledge
                 Acquisition Workshop ({EKAW}-96)",
  month =        may # "14--17~",
  series =       "LNAI",
  volume =       "1076",
  publisher =    "Springer",
  address =      "Berlin",
  year =         "1996",
}

Mining Knowledge in Noisy Audio Data, Andrzej Czyzewski
@InProceedings{czyzewski:noisy-audio:96,
  title =        "Mining Knowledge in Noisy Audio Data",
  pages =        "220",
  author =       "Andrzej Czyzewski",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Distributed learning: An agent-based approach to data-mining, Winton Davies and Peter Edwards
@InProceedings{davies.ea:distributed-learning:95,
  title =        "Distributed learning: {A}n agent-based approach to
                 data-mining",
  author =       "Winton Davies and Peter Edwards",
  booktitle =    "Working