% Data Mining Bibliographies Copyright Information
% 
% The author reserves the 
% 
%     Copyright (C) 1997 Andy Pryke. All rights reserved.
% 
% for the compilation of this KDD bibliography collection. 
% 
% If you find the bibliography collection useful for your work, I would
% be happy if you acknowledge it and me. You could also send me a
% postcard if you wish (address below).
% 
% I usually give my consent that the collection may be copied and
% distributed with the following conditions:
% 
% 1) It may be used only for research or educational purposes
% 
% and 
% 
% 2) Any copy must be accompanied by a reference to the original
% collection and its author.
% 
% and
% 
% 3) This information must always accompany every copy of a bibliograhy.
% 
% I reserve the right to revoke the above permission at any time. 
% 
% Any other use must be negotiated in advance. 
% 
% Any commercial use of the bibliographies is strictly prohibited. In
% particular, the whole or derived bibliographies may not be sold for
% profit or included in commercial documents (e.g., published on CD-ROM,
% floppy disks, books, magazines, or other print form) without the prior
% written permission of the copyright holder.
% 
% Please contact the author if the intended usage is not covered by the
% above statement.
% 
% Abstracts of publications published by the ACM and the IEEE are also
% subject to the respective "interim" or "provisional" copyright
% policies:
% 
%     ACM copyright policy (http://www.acm.org/pubs/copyright_policy/)
%     IEEE copyright policy (http://www.ieee.org/copyright/policies.htm)
% 
% This copyright notice is derived from one by Alf-Christian Achilles
% for his (massive) Computer Science Bibliography Collection at 
% (http://liinwww.ira.uka.de/bibliography/index.html).
% 
% --------------------------------------------------------------------
% 
% My address:
% 
% My postal address is:
% 
% Andy Pryke,
% Department of Computer Science,
% The University of Birmingham,
% Edgbaston,
% Birmingham.
% B15 2TT
% 
% Fax  : 0121 414 4281
% Phone: 0121 414 3736
% Email: A.N.Pryke(at)cs.bham.ac.uk
% Web: http://www.cs.bham.ac.uk/~anp/
% 
,
@Article{machine_learning_journal_special:93,
  key =          "Machine_Learning_Journal_Special:93",
  journal =      "Machine Learning Journal",
  year =         "1993",
  volume =       "5",
  number =       "6",
  month =        dec,
  note =         "Special issue on Learning and Discovery in Databases",
}

Improved Methods for Finding Association Rules,
Available as
compressed postscript.
@TechReport{no_author:improved-methods:,
  URL =          "ftp://ftp.cs.helsinki.fi/pub/Reports/by_Project/PMDM/Improved_Methods_for_Finding_Association_Rules.ps.gz",
  title =        "Improved Methods for Finding Association Rules",
  abstract =     "Association rules are statements of the form for 90 %
                 of the rows of the relation, if the row has value 1 in
                 the columns in set W , then it has 1 also in column B .
                 Agrawal, Imielinski, and Swami introduced the problem
                 of mining association rules from large collections of
                 data, and gave a method based on successive passes over
                 the database. We give an improved algorithm for the
                 problem. The method is based on careful combinatorial
                 analysis of the information obtained in previous
                 passes; this makes it possible to eliminate unnecessary
                 candidate rules. Experiments on a university course
                 enrollment database indicate that the method
                 outperforms the previous one by a factor of 5. We also
                 give simple information-theoretic lower bounds for the
                 problem of finding association rules, and show that
                 sampling is in general a very efficient way of finding
                 such rules. Computing Reviews Categories and Subject
                 Descriptors: H.3.3[Information Systems]: Information
                 Storage and Retrieval - Information Search and
                 Retrieval I.2.6 [Computing Methodologies]: Artificial
                 Intelligence - Learning I.2.8 [Computing
                 Methodologies]: Artificial Intelligence - Problem
                 Solving, Control Methods, and Search General Terms:
                 Databases, machine learning, artificial intelligence.
                 Additional Key Words and Phrases: Database mining,
                 knowledge discovery in databases, association rules,
                 covering sets.",
}

Learning Decision Trees for Mapping the Local Environment in Mobile Robot Navigation,
Available as
compressed postscript.
@TechReport{no_author:learning-decision:,
  URL =          "ftp://ftp.cs.helsinki.fi/pub/Reports/by_Project/PMDM/Learning_Decision_Trees_for_Mapping_the_Local_Environment_in_Mobile_Robot_Navigation.ps.gz",
  title =        "Learning Decision Trees for Mapping the Local
                 Environment in Mobile Robot Navigation",
  abstract =     "This paper describes the use of the C4.5 decision tree
                 learning algorithm in the design of a classifier for a
                 new approach to the mapping of a mobile robot's local
                 environment. The decision tree uses the features from
                 the echoes of an ultrasonic array mounted on the robot
                 to classify the contours of its local environment. The
                 contours are classified into a finite number of two
                 dimensional shapes to form a primitive map which is to
                 be used for navigation. The nature of the problem,
                 noise and the practical timing constraints,
                 distinguishes it from those typically used in machine
                 learning applications and highlights some of the
                 advantages of decision tree learning in robotic
                 applications.",
}

Overheads for the AI'94 Tutorial on Intelligent Learning Database Systems,
Available as
postscript.
@Misc{no_author:overheads-ai94:,
  URL =          "ftp://coral.cs.jcu.edu.au/pub/research/HCV/KDD.ps",
  title =        "Overheads for the {AI}'94 Tutorial on Intelligent
                 Learning Database Systems",
  abstract =     "This full-day tutorial presents and discusses
                 techniques for the following 3 interconnected phases in
                 constructing intelligent learning database systems: (1)
                 Translation of standard database information into a
                 form suitable for use by a rule-based system; (2) Using
                 machine learning techniques to produce rule bases from
                 databases; and (3) Interpreting the rules produced to
                 solve users' problems and/or reduce data spaces. It
                 suits a wide audience including postgraduate students
                 and industrial people from databases, expert systems,
                 and machine learning.",
  annote =       "Comments and suggestions for improvements are
                 solicited! Comments to Xindong Wu
                 (xindong(at)INSECT.SD.MONASH.EDU.AU),",
}

State Of The Art,
Available as
sec8.htm.
@Article{no_author:state-art:95,
  title =        "State Of The Art",
  journal =      "Byte",
  year =         "1995",
  month =        oct,
  annote =       "A number of articles, good introduction to data
                 mining",
  URL =          "http://www.byte.com/art/9510/sec8/sec8.htm",
}

Discovery of Actionable Patterns in Databases: The Action Hierarchy Approach, Gediminas Adomavicius and Alexander Tuzhilin
@InProceedings{adomavicius.ea:actionable-patterns:97,
  title =        "Discovery of Actionable Patterns in Databases: The
                 Action Hierarchy Approach",
  author =       "Gediminas Adomavicius and Alexander Tuzhilin",
  pages =        "111",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Mining Association Rules between Sets of Items in Large Databases, Rakesh Agrawal and Tomasz Imielinski and Arun N. Swami
Available as
postscript.
@InProceedings{agrawal.ea:association-rules:93a,
  key_modifier = "a",
  title =        "Mining Association Rules between Sets of Items in
                 Large Databases",
  author =       "Rakesh Agrawal and Tomasz Imielinski and Arun N.
                 Swami",
  editor =       "Peter Buneman and Sushil Jajodia",
  booktitle =    "Proceedings of the 1993 {ACM} {SIGMOD} International
                 Conference on Management of Data",
  address =      "Washington, D.C.",
  month =        "26--28~" # may,
  year =         "1993",
  pages =        "207--216",
  URL =          "http://www.almaden.ibm.com/cs/people/ragrawal/papers/sigmod93.ps",
  abstract =     "We are given a large database of customer
                 transactions. Each transaction consists of items
                 purchased by a customer in a visit. We present an
                 efficient algorithm that generates all significant
                 association rules between items in the database. The
                 algorithm incorporates buffer management and novel
                 estimation and pruning techniques. We also present
                 results of applying this algorithm to sales data
                 obtained from a large retailing company, which shows
                 the effectiveness of the algorithm.",
}

Mining association rules between sets of items in large databases, Rakesh Agrawal and Tomasz Imielinski and Arun Swami
@Article{agrawal.ea:association-rules:93b,
  key_modifier = "b",
  author =       "Rakesh Agrawal and Tomasz Imielinski and Arun Swami",
  title =        "Mining association rules between sets of items in
                 large databases",
  journal =      "SIGMOD Record (ACM Special Interest Group on
                 Management of Data)",
  volume =       "22",
  number =       "2",
  pages =        "207--216",
  month =        jun,
  year =         "1993",
  ISBN =         "0-89791-592-5",
  ISSN =         "0163-5808",
  abstract =     "We are given a large database of customer
                 transactions. Each transaction consists of items
                 purchased by a customer in a visit. We present an
                 efficient algorithm that generates all significant
                 association rules between items in the database. The
                 algorithm incorporates buffer management and novel
                 estimation and pruning techniques. We also present
                 results of applying this algorithm to sales data
                 obtained from a large retailing company, which shows
                 the effectiveness of the algorithm.",
  affiliation =  "IBM Almaden Research Cent",
  affiliationaddress = "San Jose, CA, USA",
  classification = "723.3; 921.6; 911.4; 723.2; 722.1; 922.1; C6160Z
                 (Other DBMS); C6130 (Data handling techniques); C6170
                 (Expert systems); C6120 (File organisation); C7170
                 (Marketing)",
  conference =   "Proceedings of the 1993 ACM SIGMOD International
                 Conference on Management of Data",
  conferenceyear = "1993",
  keywords =     "Database systems; Algorithms; Marketing; Data
                 handling; Data storage equipment; Probability;
                 Estimation; Query languages; Large scale systems;
                 Associative processing; Administrative data processing;
                 Large databases; Mining association rules; Pruning
                 technique; Basket data, Large database; Customer
                 transactions; Efficient algorithm; Association rules;
                 Buffer management; Novel estimation; Pruning
                 techniques; Sales data; Large retailing company",
  meetingaddress = "Washington, DC, USA",
  meetingdate =  "May 26--28 1993",
  meetingdate2 = "05/26--28/93",
  publisherinfo = "Fort Collins Computer Center",
  sponsor =      "ACM, SIGMOD; Minerals, Metals \& Materials Society",
  thesaurus =    "Knowledge based systems; Marketing data processing;
                 Storage management; Transaction processing; Very large
                 databases",
  xxcrossref =   "Anonymous:1993:SAS",
}

Database mining - a performance perspective, R. Agrawal and T. Imielinski and A. Swami
@Article{agrawal.ea:database-performance:93,
  author =       "R. Agrawal and T. Imielinski and A. Swami",
  address =      "Ibm Corp, Almaden Res Ctr, 650 Harry Rd, San Jose, Ca,
                 95120",
  title =        "Database mining - a performance perspective",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1993",
  volume =       "5",
  issue =        "6",
  pages =        "914--925",
  abstract =     "We present our perspective of database mining as the
                 confluence of machine learning techniques and the
                 performance emphasis of database technology. We
                 describe three classes of database mining problems
                 involving classification, associations, and sequences,
                 and argue that these problems can be uniformly viewed
                 as requiring discovery of rules embedded in massive
                 data. We describe a model and some basic operations for
                 the process of rule discovery. We show how the database
                 mining problems we consider map to this model and how
                 they can be solved by using the basic operations we
                 propose. We give an example of an algorithm for
                 classification obtained by combining the basic rule
                 discovery operations. This algorithm not only is
                 efficient in discovering classification rules but also
                 has accuracy comparable to ID3, one of the current best
                 classifiers.",
  annote =       "Identification and unification of 3 classes of data
                 mining problem, Classification, Association and
                 Sequences. They then go on to propose a unifying
                 framework for these three problems, and five basic
                 operators for rule discovery. These are then used to
                 construct an algorithm CDP (Classifier with Dynamic
                 Pruning) which out performs ID3 in classifier accuracy
                 and efficiency on a test problem.",
  keywords =     "ASSOCIATIONS, CLASSIFICATION, DATABASE MINING,
                 DECISION TREES, KNOWLEDGE DISCOVERY, SEQUENCES",
}

Developing Tightly-Coupled Data Mining Applications on a Relational Database System, Rakesh Agrawal and Kyuseok Shim
@InProceedings{agrawal.ea:developing-tightly-coupled:96,
  title =        "Developing Tightly-Coupled Data Mining Applications on
                 a Relational Database System",
  pages =        "287",
  author =       "Rakesh Agrawal and Kyuseok Shim",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Fast Algorithms for Mining Association Rules in Large Databases, R. Agrawal and R. Srikant
@InProceedings{agrawal.ea:fast-algorithms:94,
  author =       "R. Agrawal and R. Srikant",
  title =        "Fast Algorithms for Mining Association Rules in Large
                 Databases",
  editor =       "Jorgeesh Bocca and Matthias Jarke and Carlo Zaniolo",
  booktitle =    "20th International Conference on Very Large Data
                 Bases, September 12--15, 1994, Santiago, Chile
                 proceedings",
  publisher =    "Morgan Kaufmann Publishers",
  address =      "Los Altos, CA 94022, USA",
  pages =        "487--499",
  year =         "1994",
  annote =       "Also known as VLDB'94",
  keywords =     "very large data bases; VLDB",
}

Parallel mining of association rules, R. Agrawal and J. C. Shafer
@Article{agrawal.ea:parallel-association:96,
  author =       "R. Agrawal and J. C. Shafer",
  address =      "Ibm Corp, Almaden Res Ctr, 650 Harry Rd, San Jose, Ca,
                 95120",
  title =        "Parallel mining of association rules",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1996",
  volume =       "8",
  issue =        "6",
  pages =        "962--969",
  abstract =     "We consider the problem of mining association rules on
                 a shared- nothing multiprocessor. We present three
                 algorithms that explore a spectrum of trade-offs
                 between computation, communication, memory usage,
                 synchronization, and the use of problem-specific
                 information. The best algorithm exhibits near perfect
                 scaleup behavior, yet requires only minimal overhead
                 compared to the current best serial algorithm.",
  keywords =     "data mining, association rules, parallel algorithms",
}

Quest: A Project on Database Mining, R. Agrawal and M. Carey and C. Faloutson and S. Ghosh and A. Houtsma and T. Imielinski and B. Iyer and A. Mahboob and H. Miranda and R. Srikant and A. Swami
@Article{agrawal.ea:quest-project:94a,
  key_modifier = "a",
  author =       "R. Agrawal and M. Carey and C. Faloutson and S. Ghosh
                 and A. Houtsma and T. Imielinski and B. Iyer and A.
                 Mahboob and H. Miranda and R. Srikant and A. Swami",
  title =        "{Quest}: {A} Project on Database Mining",
  journal =      "SIGMOD Record (ACM Special Interest Group on
                 Management of Data)",
  volume =       "23",
  number =       "2",
  pages =        "514--514",
  month =        jun,
  year =         "1994",
  ISSN =         "0163-5808",
  affiliation =  "IBM Almaden Res. Center, San Jose, CA, USA",
  classification = "C6160 (Database management systems (DBMS))",
  keywords =     "Quest project; Database mining; Tertiary storage; Data
                 model construction; Data model verification",
  thesaurus =    "Very large databases",
  xxcrossref =   "Anonymous:1994:ASI",
}

Quest: A Project on Database Mining, Rakesh Agrawal and Michael J. Carey and Christos Faloutsos and Sakti P. Ghosh and Maurice A. W. Houtsma and Tomasz Imielinski and Balakrishna R. Iyer and A. Mahboob and H. Miranda and Ramakrishnan Srikant and Arun N. Swami
@InProceedings{agrawal.ea:quest-project:94b,
  key_modifier = "b",
  title =        "Quest: {A} Project on Database Mining",
  author =       "Rakesh Agrawal and Michael J. Carey and Christos
                 Faloutsos and Sakti P. Ghosh and Maurice A. W. Houtsma
                 and Tomasz Imielinski and Balakrishna R. Iyer and A.
                 Mahboob and H. Miranda and Ramakrishnan Srikant and
                 Arun N. Swami",
  editor =       "Richard T. Snodgrass and Marianne Winslett",
  booktitle =    "Proceedings of the 1994 {ACM} {SIGMOD} International
                 Conference on Management of Data",
  address =      "Minneapolis, Minnesota",
  month =        "24--27~" # may,
  year =         "1994",
  pages =        "514",
}

The Quest Data Mining System, Rakesh Agrawal and Manish Mehta and John Shafer and Ramakrishnan Srikant and Andreas Arning and Toni Bollinger
@InProceedings{agrawal.ea:quest-system:96,
  title =        "The Quest Data Mining System",
  pages =        "244",
  author =       "Rakesh Agrawal and Manish Mehta and John Shafer and
                 Ramakrishnan Srikant and Andreas Arning and Toni
                 Bollinger",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Mining Sequential Patterns, R. Agrawal and R. Srikant
@InProceedings{agrawal.ea:sequential-patterns:95,
  author =       "R. Agrawal and R. Srikant",
  title =        "Mining Sequential Patterns",
  booktitle =    "International Conference on Database Engineering",
  organization = "ieee",
  year =         "1995",
  pages =        "3--14",
  abstract =     "We are given a large database of customer
                 transactions, where each transaction consists of
                 customer-id, transaction time, and the items bought in
                 the transaction. We introduce the problem of mining
                 sequential patterns over such databases. We present
                 three algorithms to solve this problem, and empirically
                 evaluate their performance using synthetic data. Two of
                 the proposed algorithms, AprioriSome and AprioriAll,
                 have comparable performance, albeit AprioriSome
                 performs a little better when the minimum number of
                 customers that must support a sequential pattern is
                 low. Scale-up experiments show that both AprioriSome
                 and AprioriAII scale linearly with the number of
                 customer transactions. They also have excellent
                 scale-up properties with respect to the number of
                 transactions per customer and the number of items in a
                 transaction.",
}

Data Mining, Rakesh Agrawal
@InProceedings{agrawal:data-mining:94,
  author =       "Rakesh Agrawal",
  title =        "Data Mining",
  pages =        "75--76",
  booktitle =    "Proceedings of the 13th Symposium on Principles of
                 Database Systems",
  month =        may,
  publisher =    "ACM Press",
  address =      "New York, NY, USA",
  year =         "1994",
}

Tutorial: Data Mining, R. Agrawal
@InProceedings{agrawal:tutorial:94,
  author =       "R. Agrawal",
  title =        "Tutorial: Data Mining",
  editor =       "{ACM}",
  booktitle =    "13th Symposium --- 1994 May: Minneapolis; {MN}",
  volume =       "13",
  publisher =    "ACM Press",
  address =      "New York, NY 10036, USA",
  series =       "PROCEEDINGS OF THE ACM SIGACT SIGMOD SIGART SYMPOSIUM
                 ON PRINCIPLES OF DATABASE SYSTEMS 1994",
  pages =        "75--76",
  year =         "1994",
  keywords =     "database systems; ACM; SIGACT; SIGMOD; SIGART;
                 computability; theory",
}

Machine Learning tutorial (Slides and Anotated Bibliography), David Aha
Available as
hypertext.
@Misc{aha:machine-learning:,
  URL =          "http://www.aic.nrl.navy.mil/~aha/slides.html",
  title =        "Machine Learning tutorial (Slides and Anotated
                 Bibliography)",
  author =       "David Aha",
  annote =       "David Aha presented the Machine Learning tutorial at
                 AI \& Stats 1995. He's kindly put his slides online",
}

Temporal aspects in data mining, Salem Al-naemi
@TechReport{al-naemi:temporal-aspects:92,
  author =       "Salem Al-naemi",
  title =        "Temporal aspects in data mining",
  institution =  "Computer Science Department, University of
                 Birmingham",
  year =         "1992/3",
  annote =       "Sections on RdB's, other temporal models and time
                 series",
}

Mine for Gold with Parallel Systems, Michael Alexander
@Article{alexander:mine-gold:94,
  author =       "Michael Alexander",
  title =        "Mine for Gold with Parallel Systems",
  journal =      "Datamation",
  volume =       "40",
  number =       "22",
  pages =        "65--??",
  day =          "15",
  month =        nov,
  year =         "1994",
  ISSN =         "0011-6963",
  abstract =     "Parallel computing technology has become more
                 accessible to IS shops with the release of parallelized
                 versions of popular RDBMSs. With such off-the-shelf
                 tools, your company can gain competitive advantage
                 through techniques like data mining that allow you to
                 more finely analyze and project demand for your
                 products. But if you're going to need the power of
                 massively parallel systems, off-the-shelf solutions are
                 still a few years away.",
}

Partial Classification Using Association Rules, Kamal Ali and Stefanos Manganaris and Ramakrishnan Srikant
@InProceedings{ali.ea:partial-classification:97,
  title =        "Partial Classification Using Association Rules",
  author =       "Kamal Ali and Stefanos Manganaris and Ramakrishnan
                 Srikant",
  pages =        "115",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Charter, Robert B. Allen
@Article{allen:charter:95,
  author =       "Robert B. Allen",
  title =        "Charter",
  journal =      "ACM Transactions on Information Systems",
  volume =       "13",
  number =       "3",
  pages =        "235",
  year =         "1995",
  copyright =    "(c) Copyright 1995 Association for Computing
                 Machinery",
  abstract =     "The ACM Transactions on Information Systems (TOIS)
                 considers the design, performance, and evaluation of
                 computer systems that facilitate the presentation of
                 information in a variety of media, as well as
                 underlying technologies that support these systems. The
                 major themes of TOIS and those topics which distinguish
                 it from other ACM Transactions include: - Information
                 Retrieval and Information Filtering: Algorithms and
                 inference mechanisms for search, retrieval, and
                 presentation of information and models of user
                 information preferences. - Information Interfaces:
                 Hypertext and hypermedia interfaces, information
                 visualization, multimedia presentation, and task and
                 user models for information systems. - Natural Language
                 Processing: Computational linguistics and models of
                 natural language (including content, syntax, semantics,
                 and dialogue) relevant to information systems. -
                 Knowledge and Information Representation:
                 Representation issues for supporting information
                 systems including semantic and object-oriented
                 databases, knowledge bases, and hypertext/hypermedia
                 document models. - Multimedia Information Systems:
                 Semantics, search, and presentation of media including
                 audio, image, video, and virtual reality. - Networked
                 Information Systems: Interfaces and indexing, resource
                 discovery, and visualization. - Organizational
                 Interfaces and Social Impact of Information Systems:
                 Electronic mail; decision and negotiation support
                 systems; the effects of information system use on
                 groups, organizations, and communities; social
                 constraints imposed on information systems such as
                 legal and privacy concerns. - Design and Evaluation of
                 Information Systems: Design principles for information
                 systems, methodologies for evaluating information
                 systems, and programming languages relevant to
                 information systems. - Information System Applications:
                 Electronic books, documents, journals, movies, and
                 libraries; authoring systems; office information
                 systems; geographic information systems; and
                 intelligent tutoring systems.",
}

Knowledge discovery in biomedical databases - a machine induction approach, H. Alnahi and S. Alshawi
@Article{alnahi.ea:biomedical-machine:93,
  author =       "H. Alnahi and S. Alshawi",
  address =      "Brunel Univ, Dept Comp Sci, Uxbridge Ub8 3Ph, Middx,
                 England",
  title =        "Knowledge discovery in biomedical databases - a
                 machine induction approach",
  journal =      "Computer Methods And Programs In Biomedicine",
  year =         "1993",
  volume =       "39",
  issue =        "3-4",
  pages =        "343--349",
  abstract =     "The increase in the number and size of available
                 databases by far exceeds the growth of the
                 corresponding knowledge. Furthermore, many databases
                 contain information which is not possessed by an
                 existing human expert. This creates both a need and an
                 opportunity for extracting knowledge from databases. An
                 unsolved problem in molecular biology is the problem of
                 predicting a protein's secondary structure from its
                 primary structure. Inductive machine learning is a
                 search for a plausible general description which can
                 explain the given input data, and is useful for
                 predicting new data. In this paper we present a
                 statistical inductive algorithm which can be used to
                 produce new rules for predicting multiple protein
                 secondary structures from protein primary structure
                 databases.",
  keywords =     "SECONDARY STRUCTURE, PREDICTION, SEQUENCE, MACHINE
                 LEARNING, INDUCTION, DATABASES, KNOWLEDGE, RULES,
                 PROTEIN PRIMARY SECONDARY STRUCTURES, AMINO ACID
                 RESIDUES",
}

Discovering rules for water demand prediction: an enhanced rough-set approach (reprinted from proceedings of the international joint conference on artificial intelligence), A. J. An and N. Shan and C. Chan and N. Cercone and W. Ziarko
@Article{an.ea:discovering-rules:96,
  author =       "A. J. An and N. Shan and C. Chan and N. Cercone and W.
                 Ziarko",
  address =      "Univ Regina, Dept Comp Sci, Regina, Sk S4S 0A2,
                 Canada",
  title =        "Discovering rules for water demand prediction: an
                 enhanced rough-set approach (reprinted from proceedings
                 of the international joint conference on artificial
                 intelligence)",
  journal =      "Engineering Applications Of Artificial Intelligence",
  year =         "1996",
  volume =       "9",
  issue =        "6",
  pages =        "645--653",
  abstract =     "Prediction of consumer demands is a pre-requisite for
                 optimal control of water distribution systems because
                 minimum-cost pumping schedules can be computed if water
                 demands are accurately estimated This paper presents an
                 enhanced rough-sets method for generating prediction
                 rules from a set of observed data. The proposed method
                 extends upon the standard rough set model by making use
                 of the statistical information inherent in the data to
                 handle incomplete and ambiguous training samples. It
                 also discusses some experimental results from using
                 this method for discovering knowledge on water demand
                 prediction. Copyright (C) 1996 IJCAI Inc.",
  keywords =     "water demand prediction, knowledge discovery, rough
                 sets",
}

Edm - a general framework for data mining based on evidence theory, S. S. Anand and D. A. Bell and J. G. Hughes
@Article{anand.ea:edm-general:96,
  author =       "S. S. Anand and D. A. Bell and J. G. Hughes",
  address =      "Univ Ulster, Fac Informat, Sch Informat \& Software
                 Engn, Jordanstown, North Ireland",
  title =        "Edm - a general framework for data mining based on
                 evidence theory",
  journal =      "Data \& Knowledge Engineering",
  year =         "1996",
  volume =       "18",
  issue =        "3",
  pages =        "189--223",
  abstract =     "Data Mining or Knowledge Discovery in Databases
                 [1,15,23] is currently one of the most exciting and
                 challenging areas where database techniques are coupled
                 with techniques from Artificial Intelligence and
                 mathematical sub-disciplines to great potential
                 advantage. It has been defined as the non- trivial
                 extraction of implicit, previously unknown and
                 potentially useful information from data. A lot of
                 research effort is being directed towards building
                 tools for discovering interesting patterns which are
                 hidden below the surface in databases. However, most of
                 the work bring done in this field has been
                 problem-specific and no general framework has yet been
                 proposed for Data Mining. In this paper we seek to
                 remedy this by proposing, EDM - Evidence-based Data
                 Mining - a general framework for Data Mining based on
                 Evidence Theory. Having a general framework for Data
                 Mining offers a number of advantages. It provides a
                 common method for representing knowledge which allows
                 prior knowledge from the user or knowledge discovered
                 by another discovery process to be incorporated into
                 the discovery process. A common knowledge
                 representation also supports the discovery of meta-
                 knowledge from knowledge discovered by different Data
                 Mining techniques. Furthermore, a general framework can
                 provide facilities that are common to most discovery
                 processes, e.g. incorporating domain knowledge and
                 dealing with missing values. The framework presented in
                 this paper has the following additional advantages. The
                 framework is inherently parallel. Thus, algorithms
                 developed within this framework will also be parallel
                 and will therefore be expected to be efficient for
                 large data sets - a necessity as most commercial data
                 sets, relational or otherwise, are very large. This is
                 compounded by the fact that the algorithms are complex.
                 Also, the parallelism within the framework allows its
                 use in parallel, distributed and heterogeneous
                 databases. The framework is easily updated and new
                 discovery methods can be readily incorporated within
                 the framework, making it 'general' in the functional
                 sense in addition to the representational sense
                 considered above. The framework provides an intuitive
                 way of dealing with missing data during the discovery
                 process using the concept of Ignorance borrowed from
                 Evidence Theory. The framework consists of a method for
                 representing data and knowledge, and methods for data
                 manipulation or knowledge discovery(1). We suggest an
                 extension of the conventional definition of mass
                 functions in Evidence Theory for use in Data Mining, as
                 a means to represent evidence of the existence of rules
                 in the database. The discovery process within EDM
                 consists of a series of operations on the mass
                 functions. Each operation is carried out by an EDM
                 operator. We provide a classification for the EDM
                 operators based on the discovery functions performed by
                 them and discuss aspects of the induction, domain and
                 combination operator classes. The application of EDM to
                 two separate Data Mining tasks is also addressed,
                 highlighting the advantages of using a general
                 framework for Data Mining in general and, in
                 particular, using one that is based on Evidence
                 Theory.",
  keywords =     "DATA MINING, KNOWLEDGE DISCOVERY IN DATABASES,
                 UNCERTAINTY HANDLING, EVIDENCE THEORY, PARALLEL
                 DISCOVERY",
}

A High-Performance Data Mining Server, S. S. Anand and D. A. Bell and J. G. Hughes and C. M. Shapcott
@Article{anand.ea:high-performance-server:96,
  author =       "S. S. Anand and D. A. Bell and J. G. Hughes and C. M.
                 Shapcott",
  title =        "A High-Performance Data Mining Server",
  journal =      "Lecture Notes in Computer Science",
  volume =       "1067",
  pages =        "907--??",
  year =         "1996",
  ISSN =         "0302-9743",
}

Data mining in parallel, S. S. Anand and C. Shapcott and D. Bell and J. Hughes
@InProceedings{anand.ea:parallel:95,
  author =       "S. S. Anand and C. Shapcott and D. Bell and J.
                 Hughes",
  title =        "Data mining in parallel",
  volume =       "44",
  series =       "Transputer and Occam Engineering",
  pages =        "113--124",
  booktitle =    "Proceedings of WoTUG-18: Transputer and occam
                 Developments",
  year =         "1995",
  publisher =    "IOS Press",
  address =      "Amsterdam",
  month =        apr,
  ISBN =         "ISBN 90-5199-222-x",
}

Getting to grips with arrears: `data mining' systems at the Leeds, anonymous
@Article{anonymous:getting-to:94,
  author =       "anonymous",
  title =        "Getting to grips with arrears: `data mining' systems
                 at the {L}eeds",
  journal =      "Expert Systems",
  year =         "1994",
  volume =       "11",
  number =       "2",
  pages =        "122--124",
  month =        may,
  keywords =     "Applications, Data mining, kdd, Attar Software, Xpert
                 Rule Analyser",
}

Data Mining: Intelligent Technology Gets down to Business, anonymous
@Article{anonymous:intelligent-technology:93,
  author =       "anonymous",
  title =        "Data Mining: Intelligent Technology Gets down to
                 Business",
  journal =      "PC AI",
  year =         "1993",
  month =        nov # " - " # dec,
}

Lessons in Data Mining, Anonymous
@Article{anonymous:lessons:97,
  author =       "Anonymous",
  title =        "Lessons in Data Mining",
  journal =      "Byte Magazine",
  volume =       "22",
  number =       "2",
  pages =        "40--??",
  month =        feb,
  year =         "1997",
  ISSN =         "0360-5280",
}

SIGMOD '93. 1993 ACM SIGMOD. International Conference on Management of Data, Anonymous (Ed)
@Proceedings{anonymous:sigmod-93:93,
  editor =       "Anonymous",
  booktitle =    "SIGMOD '93. 1993 ACM SIGMOD. International Conference
                 on Management of Data",
  title =        "{SIGMOD} '93. 1993 {ACM} {SIGMOD}. International
                 Conference on Management of Data",
  volume =       "22(2)",
  month =        jun,
  publisher =    "ACM Press",
  address =      "New York, NY 10036, USA",
  year =         "1993",
  ISSN =         "0163-5808",
  series =       "SIGMOD Record (ACM Special Interest Group on
                 Management of Data)",
  classification = "C6160 (Database management systems (DBMS)); C4250
                 (Database theory); C7250 (Information storage and
                 retrieval); C6170 (Expert systems); C6120 (File
                 organisation); C6140D (High level languages); C6130
                 (Data handling techniques); C6150G (Diagnostic,
                 testing, debugging and evaluating systems)",
  confdate =     "26--28 May 1993",
  conflocation = "Washington, DC, USA",
  confsponsor =  "ACM",
  keywords =     "Benchmark programs; Database rules; Integrity; Join
                 processing; Object-oriented databases; Memory-based
                 implementations; DBMS implementation issues; Recovery;
                 Knowledge discovery; Temporal reasoning; Data
                 compression; Query optimisation; Secondary storage
                 techniques; Search structures; Query languages;
                 Interfaces; Intelligent/deductive DBMSs;
                 Relational/parallel DBMS processing; Transaction
                 management; Object/scientific DBMSs; Interoperability",
  thesaurus =    "Data compression; Database management systems;
                 Database theory; Inference mechanisms; Knowledge based
                 systems; Program testing; Query languages; Query
                 processing; Storage management; System recovery;
                 Transaction processing",
}

Supercomputers Knock At IS Doors, Anonymous
@Article{anonymous:supercomputers-knock-at-is-doors:92,
  author =       "Anonymous",
  title =        "{Supercomputers Knock At {IS} Doors}",
  journal =      "Datamation",
  volume =       "38",
  number =       "24",
  pages =        "79--??",
  day =          "01",
  month =        dec,
  year =         "1992",
  ISSN =         "0011-6963",
  abstract =     "Cost-effective massively parallel designs gain
                 converts for data mining and OLTP applications among
                 leading edge users and traditional systems suppliers.",
}

Computational learning theory: an introduction, Martin Anthony and Norman Biggs
@Book{anthony.ea:computational-learning:92,
  author =       "Martin Anthony and Norman Biggs",
  title =        "Computational learning theory: an introduction",
  year =         "1992",
  publisher =    "Cambridge University Press",
  series =       "Cambridge Tracts in Theoretical Computer Science",
  volume =       "30",
}

Knowledge Mining by Imprecise Querying: A Classification-based System, T. M. Anwar and H. W. Beck and S. B. Navathe
@InProceedings{anwar.ea:by-imprecise:92,
  author =       "T. M. Anwar and H. W. Beck and S. B. Navathe",
  title =        "Knowledge Mining by Imprecise Querying: {A}
                 Classification-based System",
  booktitle =    "Proceedings of the International Conference on Data
                 Engineering",
  address =      "Tempe, AZ",
  month =        feb,
  year =         "1992",
  pages =        "622--630",
  abstract =     "Knowledge mining is the process of discovering new
                 knowledge that is hitherto unknown. Users with a lack
                 of knowledge of database schemas engage in the process
                 of knowledge mining by posing imprecise queries. An
                 approach to knowledge mining by imprecise querying is
                 presented that utilizes conceptual clustering
                 techniques. In contrast to numeric or fuzzy set
                 approaches which ultimately rely on some distance
                 metric and threshold to processing such queries,
                 conceptual clustering retrieves instances which are
                 structurally, semantically, and pragmatically similar
                 to the query even though they may not match the
                 requirements exactly. The query processor has both a
                 deductive and inductive component. The deductive
                 component finds precise matches in the traditional
                 sense, and the inductive component identifies ways in
                 which imprecise matches may be considered similar.
                 Ranking on similarity is done using the database
                 taxonomy, by which similar instances become members of
                 the same class. Relative similarity is determined by
                 depth in the taxonomy. The conceptual clustering
                 algorithm, its use in query processing and an example
                 are presented.",
}

Sales surge as mainframes find a role in client\slash server, E. L. Appleton
@Article{appleton:sales-surge:95,
  author =       "E. L. Appleton",
  title =        "Sales surge as mainframes find a role in client\slash
                 server",
  journal =      "Datamation",
  volume =       "41",
  number =       "10",
  pages =        "48",
  month =        jun,
  year =         "1995",
  ISSN =         "0011-6963",
  classification = "D5010 (Computers and work stations); D5020 (Computer
                 networks and intercomputer communications)",
  keywords =     "Mainframes; Client/server; Demand; Economy;
                 Large-system market; Vendors; IBM Parallel Sysplex;
                 UNIX server; NT server; Pyramid; HP T-500; Data mining;
                 Parallelism; IBM Power Parallel; Amdahl ECL mainframe",
  language =     "English",
  pubcountry =   "USA",
  thesaurus =    "Client-server systems; DP industry; Mainframes",
}

Predicting defects in Disk Drive Manufacturing: a case study in High-Dimensional Classification, Chidanand Apt\'e and Sholom Weiss and Gordon Grout
@InProceedings{apte.ea:predicting-defects:93,
  author =       "Chidanand Apt\'e and Sholom Weiss and Gordon Grout",
  title =        "Predicting defects in Disk Drive Manufacturing: a case
                 study in High-Dimensional Classification",
  booktitle =    "Proceedings of the 9th Conference on Artificial
                 Intelligence for Applications",
  pages =        "212--218",
  address =      "Orlando, Florida",
  year =         "1993",
}

A Linear Method for Deviation Detection in Large Databases, Andreas Arning and Rakesh Agrawal and Prabhakar Raghavan
@InProceedings{arning.ea:linear-method:96,
  title =        "A Linear Method for Deviation Detection in Large
                 Databases",
  pages =        "164",
  author =       "Andreas Arning and Rakesh Agrawal and Prabhakar
                 Raghavan",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Exploiting Background Knowledge in Automated Discovery, John M. Aronis and Foster J. Provost and Bruce G. Buchanan
@InProceedings{aronis.ea:exploiting-background:96,
  title =        "Exploiting Background Knowledge in Automated
                 Discovery",
  pages =        "355",
  author =       "John M. Aronis and Foster J. Provost and Bruce G.
                 Buchanan",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Increasing the Efficiency of Data Mining Algorithms with Breadth-First Marker Propagation, John M. Aronis and Foster J. Provost
@InProceedings{aronis.ea:increasing-efficiency:97,
  title =        "Increasing the Efficiency of Data Mining Algorithms
                 with Breadth-First Marker Propagation",
  author =       "John M. Aronis and Foster J. Provost",
  pages =        "119",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Data mining for lead identification and explosion, S. Ash and S. Gothe
@Article{ash.ea:lead-identification:97,
  author =       "S. Ash and S. Gothe",
  address =      "Tripos Inc, St Louis, Mo, 63144",
  title =        "Data mining for lead identification and explosion",
  journal =      "Abstracts Of Papers Of The American Chemical Soc.",
  year =         "1997",
  volume =       "213",
  issue =        "Pt1",
  pages =        "57--CINF",
}

Managing Complexity in Large Data Bases Using Self-Organizing Maps, Barbro Back and Mikko Irjala and Kaisa Sere and Hannu Vanharanta
Available as
hypertext.
@TechReport{back.ea:managing-complexity:96,
  author =       "Barbro Back and Mikko Irjala and Kaisa Sere and Hannu
                 Vanharanta",
  title =        "Managing Complexity in Large Data Bases Using
                 Self-Organizing Maps",
  institution =  "TUCS - Turku Centre for Computer Science",
  number =       "TUCS-TR-48",
  month =        oct # " 23",
  year =         "1996",
  keywords =     "neural networks, self-organizing maps, data bases,
                 benchmarking",
  URL =          "http://www.tucs.abo.fi/publications/techreports/TR48.html",
  abstract =     "The amount of financial information in today's
                 sophisticated large data bases is huge and makes
                 comparisons between company performance - especially
                 over time - difficult or at least very time consuming.
                 The aim of this paper is to invest igate whether neural
                 networks in the form of self-organizing maps can be
                 used to manage the complexity in large data bases. We
                 structure and analyze accoun ting numbers in a large
                 data base over several time periods. By using self
                 organizing maps, we overcome the problems associated
                 with finding the appropriate und erlying distribution
                 and the functional form of the underlying data in the
                 structuring task that is often encountered, for
                 example, when using cluster analysis. The method chosen
                 also offers a way of visualizing the results. The data
                 base in this study consists of annual reports of more
                 than 120 world wide forest companies with data from a
                 five year time period. This paper is an extended
                 version of our paper Data Mining Accambis Numbers Using
                 Self Organising Maps presented at Finnish Artificial
                 Intelligenc e Conference in Vasa 20-23 August 1996.",
}

ReDuce: Automatic Structuring and Compression in Relational Databases, B. Bain and C. Sammut and A. Sharma and J. Shepherd
@InProceedings{bain.ea:reduce-automatic:96,
  author =       "B. Bain and C. Sammut and A. Sharma and J. Shepherd",
  title =        "{R}e{D}uce: {A}utomatic Structuring and Compression in
                 Relational Databases",
  booktitle =    "Proceedings of the MLnet Familiarization Workshop on
                 Data Mining with Inductive Logic Programing",
  pages =        "41--52",
  year =         "1996",
}

Knowledge from data using fuzzy methods, J. F. Baldwin
@Article{baldwin:using-fuzzy:96,
  author =       "J. F. Baldwin",
  address =      "Univ Bristol, Dept Engn Math, Bristol, Avon, England",
  title =        "Knowledge from data using fuzzy methods",
  journal =      "Pattern Recognition Letters",
  year =         "1996",
  volume =       "17",
  issue =        "6",
  pages =        "593--600",
  abstract =     "The basic concept of a data browser is explained and
                 some methods are described which are suitable for
                 extracting knowledge from data as an induction process.
                 The data browser gives data mining capabilities but
                 also provides a stage for computers and users to act
                 out their parts in this knowledge discovery process.",
}

From molecules to models to data mining, N. Basta
@Article{basta:molecules-to:96,
  author =       "N. Basta",
  address =      "Us Dept Def, Off Infosec Comp Sci, Ft George G Meade,
                 Md, 20755",
  title =        "From molecules to models to data mining",
  journal =      "Chemical Engineering",
  year =         "1996",
  volume =       "103",
  issue =        "2",
  pages =        "5--5",
}

Brute-Force Mining of High-Confidence Classification Rules, Jr. Roberto J. Bayardo
@InProceedings{bayardo:brute-force-high-confidence:97,
  title =        "Brute-Force Mining of High-Confidence Classification
                 Rules",
  author =       "Jr. Roberto J. Bayardo",
  pages =        "123",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Discovery and Maintenance of Functional Dependencies by Independencies, S. Bell
@InProceedings{bell:maintenance-functional:95,
  author =       "S. Bell",
  title =        "Discovery and Maintenance of Functional Dependencies
                 by Independencies",
  booktitle =    "Proceedings of the Workshop on Knowledge Discovery in
                 Databases",
  pages =        "27--32",
  publisher =    "AAAI Press",
  year =         "1995",
}

From data properties to evidence, D. A. Bell
@Article{bell:properties-to:93,
  author =       "D. A. Bell",
  address =      "Univ Ulster, Dept Informat Sci, Jordanstown Bt37 0Qb,
                 Antrim, North Ireland",
  title =        "From data properties to evidence",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1993",
  volume =       "5",
  issue =        "6",
  pages =        "965--969",
  abstract =     "Information and knowledge in computerized information
                 systems are often characterized by uncertainty. The
                 facts needed for some realistic applications are
                 unavailable or are crudely estimated or judged. This
                 problem manifests itself frequently in information
                 systems centered on databases. We describe here an
                 exploration of an aspect of the problem of handling
                 uncertain evidence on which reasoning is to be based.
                 We focus upon the problem of making decisions among
                 propositions based on both uncertain data items (in
                 contrast to data in conventional databases) and
                 arguments which are not certain. The primary knowledge
                 discovery issue we address is a classification problem
                 - which classification does the available evidence
                 support? The method investigated here seeks to exploit
                 information available from conventional database
                 systems - namely, the integrity assertions or data
                 dependency information contained in the database. This
                 information, e.g., from functional dependencies and a
                 form of multivalued dependencies, allows us to rank
                 arguments in terms of their strengths. Hence, as a step
                 in the process of discovering classification knowledge,
                 using a database as a secondary knowledge discovery
                 exercise, we explicate latent knowledge pertinent to
                 arguments of relevance to the purpose at hand. This is
                 called evidence. Information is requested via user
                 prompts from an evidential reasoner. It is fed as
                 evidence to the reasoner. An object-oriented structure
                 for managing evidence is used to model the conclusion
                 space and to reflect the evidence structure. The
                 implementation of the evidence structure and an example
                 of its use are outlined.",
  keywords =     "CLASSIFICATION, DATA DEPENDENCIES, DATABASE, EVIDENCE
                 BASE, EVIDENTIAL REASONING, INTEGRITY CONSTRAINTS",
}

Value-added databases: knowledge discovery and evidential reasoning., D. Bell
@InProceedings{bell:value-added-evidential:94,
  title =        "Value-added databases: knowledge discovery and
                 evidential reasoning.",
  author =       "D. Bell",
  booktitle =    "Proceedings of the International Workshop on Advances
                 in Databases and Information Systems - {ADBIS'94}",
  address =      "Moscow",
  year =         "1994",
  month =        may # " 23--26",
  pages =        "2--9",
  abstract =     "Results of research into methods of managing evidence
                 can be coupled with the power and capacity of data
                 management systems to give a potent approach to
                 discovering interesting but hidden patterns in large
                 collections of data. We present some pertinent results
                 from evidence theory and its applications, and suggest
                 an approach to the exploitation of these results in the
                 discovery of knowledge which is held in databases. In
                 this sense we {\em add value} to databases, which
                 presumably already justify their existence, and hence
                 further increase the attractiveness of very large
                 database systems.",
}

An Examination of Inductive Learning Algorithms for the Classification of Sleep Signals, John A. Bentrup and Sylvian R. Ray
Available as
compressed postscript.
@TechReport{bentrup.ea:examination-inductive:93,
  author =       "John A. Bentrup and Sylvian R. Ray",
  title =        "An Examination of Inductive Learning Algorithms for
                 the Classification of Sleep Signals",
  institution =  "Department of Computer Science, University of Illinois
                 at Urbana-Champaign",
  type =         "Report.",
  number =       "UIUCDCS-R-93-1792",
  address =      "1304 Springfield Avenue, Urbana, Il 61801",
  month =        feb,
  year =         "1993",
  URL =          "ftp://a.cs.uiuc.edu/pub/TechReports/UIUCDCS-R-93-1792.ps.Z",
  note =         "Modified version to appear in Proceedings of the 30th
                 Annual Rocky Mountain Bioengineering Symposium (April
                 1993).",
  annote =       "Nine inductive learning algorithms are tested on sleep
                 signals of 161 subjects. Algorithms are ID3, C4, CART,
                 MDL, AIMS, Bayes, PLS(K), PRG, Nearest Neighbour and
                 COBWEB. Nice table summarising algorithms.",
}

Integrated Learning in a Real Domain, F. Bergadano and A. Giordana and L. Saitta
@InCollection{bergadano.ea:integrated-learning:91,
  editor =       "Gregory Piatetsky-Shapiro and William J. Frawley",
  booktitle =    "Knowledge Discovery in Databases",
  publisher =    "AAAI Press / The MIT Press",
  address =      "Menlo Park, California",
  edition =      "1st",
  year =         "1991",
  author =       "F. Bergadano and A. Giordana and L. Saitta",
  title =        "Integrated Learning in a Real Domain",
  pages =        "277--288",
}

Applying Data Mining and Machine Learning Techniques to Submarine Intelligence Analysis, Ulla Bergsten and Johan Schubert and Per Svensson
@InProceedings{bergsten.ea:applying-machine:97,
  title =        "Applying Data Mining and Machine Learning Techniques
                 to Submarine Intelligence Analysis",
  author =       "Ulla Bergsten and Johan Schubert and Per Svensson",
  pages =        "127",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Hot Topics: Customizing information. 2. How successful are we so far?, D. Berleant and H. Berghel
@Article{berleant.ea:hot-topics:94,
  author =       "D. Berleant and H. Berghel",
  title =        "Hot Topics: Customizing information. 2. {How}
                 successful are we so far?",
  journal =      "Computer",
  volume =       "27",
  number =       "10",
  pages =        "76--78",
  month =        oct,
  year =         "1994",
  ISSN =         "0018-9162",
  affiliation =  "Dept. of Comput. Syst. Eng., Arkansas Univ.,
                 Fayetteville, AR, USA",
  classification = "C6130D (Document processing techniques); C7210
                 (Information services and centres); C7250N (Front end
                 systems for online searching)",
  keywords =     "Advanced information customization; Browsing; Data
                 interchange; Digital library; Document customization;
                 Filtering; Hypermedia; Hypertext; Information analysis;
                 Information extraction; Information retrieval;
                 Information science; Information-customizing
                 interfaces; Interactivity; Knowledge discovery;
                 Nonprescriptive structuring",
  thesaurus =    "Document handling; Full-text databases; Hypermedia;
                 Information retrieval; Online front-ends",
}

Enactment in Information Farming, Mark Bernstein
@InProceedings{bernstein:enactment-information:93,
  author =       "Mark Bernstein",
  title =        "Enactment in Information Farming",
  booktitle =    "Proceedings of ACM Hypertext'93",
  series =       "Technical Briefings",
  pages =        "242--249",
  year =         "1993",
  copyright =    "(c) Copyright 1993 Association for Computing
                 Machinery",
  keywords =     "Design, Rhetoric, Enactment, Collaboration,
                 Information farming",
  abstract =     "Information farming views the cultivation of
                 information as a continuing, collaborative activity
                 performed by groups of people working together to
                 achieve changing individual and common goals. Failure
                 to differentiate information farming from related but
                 distinct activities like information mining and data
                 factories has been a fruitful source of
                 misunderstanding and discord in the hypertext
                 literature and in the design of hypertext environments.
                 Dramatic enactment and visual salience -- not recall,
                 precision, or usability -- assume primary roles in
                 design for information gardening. In this technical
                 briefing, we examine how enactment contribute to the
                 success and failure of a variety of Hypergate and
                 Storyspace features.",
}

Computational Methods for Intelligent Information Access, Michael W. Berry and Susan T. Dumais and Todd A. Letsche
@InProceedings{berry.ea:computational-methods:95,
  author =       "Michael W. Berry and Susan T. Dumais and Todd A.
                 Letsche",
  title =        "Computational Methods for Intelligent Information
                 Access",
  booktitle =    "Proceedings of Supercomputing'95",
  publisher =    "ACM/IEEE",
  address =      "San Diego, CA",
  month =        dec,
  year =         "1995",
  keywords =     "data mining, indexing, information, latent, matrices,
                 retrieval, semantic, singular value decomposition
                 (SVD), sparse, updating,",
  abstract =     "ps/PDF on the CD with MPEG.",
}

Testing Complex Temporal Relationships Involving Multiple Granularities and Its Application to Data Mining, C. Bettini and X. Sean Wang and S. Jajodia
@InProceedings{bettini.ea:testing-complex:96,
  author =       "C. Bettini and X. {Sean Wang} and S. Jajodia",
  title =        "Testing Complex Temporal Relationships Involving
                 Multiple Granularities and Its Application to Data
                 Mining",
  editor =       "{ACM}",
  booktitle =    "Proceedings of the Fifteenth {ACM}
                 {SIGACT}-{SIGMOD}-{SIGART} Symposium on Principles of
                 Database Systems, {PODS} 1996, Montr{\'e}al, Canada,
                 June 3--5, 1996",
  volume =       "15",
  publisher =    "ACM Press",
  address =      "New York, NY 10036, USA",
  year =         "1996",
  series =       "Proceedings of the ACM SIGACT SIGMOD SIGART Symposium
                 on Principles of Database Systems",
  pages =        "68--78",
  annote =       "Held in conjunction with the 1996 ACM SIGMOD
                 international conference on management of data. Also
                 known as PODS 1996",
  keywords =     "database systems; PODS; ACM; SIGMOD; SIGART; SIGACT",
}

Time-dependent concepts: representation and reasoning using temporal description logics, C. Bettini
@Article{bettini:time-dependent-concepts:97,
  author =       "C. Bettini",
  address =      "Univ Milan, Dipartimento Sci Informaz, I-20122 Milan,
                 Italy",
  title =        "Time-dependent concepts: representation and reasoning
                 using temporal description logics",
  journal =      "Data \& Knowledge Engineering",
  year =         "1997",
  volume =       "22",
  issue =        "1",
  pages =        "1--38",
  abstract =     "A time-dependent concept is a conceptual entity that
                 is defined in terms of temporal relationships with
                 other entities. For example, the concept of an action
                 is defined in terms of a set of temporal relationships
                 among states of a system. The concept of ''widow'', in
                 natural language, is defined in terms of events that
                 have occurred in the past. Time-dependent concepts
                 appear in several application areas, from natural
                 language to diagnosis, from planning to data mining. An
                 interesting issue in knowledge representation is how to
                 formally represent and reason with these concepts. In
                 this paper, we represent a family of formal
                 representation languages obtained as an interval-based
                 temporal extension of description logics. We illustrate
                 the expressiveness of these formalisms in representing
                 time-dependent concepts with respect to standard
                 description logics and other extensions. We give some
                 complexity results for reasoning problems and we
                 propose approximate algorithms to compute subsumption
                 among time-dependent concepts.",
  keywords =     "INTERVALS, temporal knowledge, temporal reasoning,
                 description logics, taxonomies, subsumption algorithms,
                 temporal objects",
}

Advanced Scout: Data Mining and Knowledge Discovery in NBA data, Inderpal Bhandari and Ed Colet and Jennifer Parker and Zachary Pines and Rajiv Pratap and Krishnakumar Ramanujam
@Article{bhandari.ea:advanced-scout:97,
  author =       "Inderpal Bhandari and Ed Colet and Jennifer Parker and
                 Zachary Pines and Rajiv Pratap and Krishnakumar
                 Ramanujam",
  title =        "Advanced Scout: Data Mining and Knowledge Discovery in
                 {NBA} data",
  journal =      "Data Mining and Knowledge Discovery",
  year =         "1997",
  volume =       "1",
  number =       "1",
  annote =       "Advanced Scout is a PC-based data mining application
                 used by National Basketball Association (NBA) coaching
                 staffs to discover interesting patterns in basketball
                 game data. We describe Advanced Scout software from the
                 perspective of data mining and knowledge discovery.
                 This paper highlights the pre-processing of raw data
                 that the program performs, describes the data mining
                 aspects of the software and how the interpretation of
                 patterns supports the process of knowledge discovery.
                 The underlying technique of attribute focusing as the
                 basis of the algorithm is also described. The process
                 of pattern interpretation is facilitated by allowing
                 the user to relate patterns to video tape.",
}

A case-study of software process improvement during development, I. Bhandari and M. Halliday and E. Tarver and D. Brown and J. Chaar and R. Chillarege
@Article{bhandari.ea:case-study-software:93,
  author =       "I. Bhandari and M. Halliday and E. Tarver and D. Brown
                 and J. Chaar and R. Chillarege",
  address =      "Ibm Corp, Thomas J Watson Res Ctr, Yorktown Hts, Ny,
                 10598 Ibm Corp, Mid Hudson Valley Programming Lab,
                 Wappingers Falls, Ny, 12590",
  title =        "A case-study of software process improvement during
                 development",
  journal =      "Ieee Trans. On Software Engineering",
  year =         "1993",
  volume =       "19",
  issue =        "12",
  pages =        "1157--1170",
  abstract =     "We present a case study of the use of a software
                 process improvement method which is based on the
                 analysis of defect data. The first step of the method
                 is the classification of software defects using
                 attributes which relate defects to specific process
                 activities. Such classification captures the semantics
                 of the defects in a fashion which is useful for process
                 correction. The second step utilizes a machine-
                 assisted approach to data exploration which allows a
                 project team to discover such knowledge from defect
                 data as is useful for process correction. We show that
                 such analysis of defect data can readily lead a project
                 team to improve their process during development.",
  keywords =     "CYCLE, DATE EXPLORATION, DEFECT-BASED PROCESS
                 IMPROVEMENT, IN-PROCESS METRICS, KNOWLEDGE DISCOVERY",
}

Attribute focusing - machine-assisted knowledge discovery applied to software production process-control, I. Bhandari
@Article{bhandari:attribute-focusing:94,
  author =       "I. Bhandari",
  address =      "Ibm Corp, Thomas J Watson Res Ctr, Yorktown Hts, Ny,
                 10598",
  title =        "Attribute focusing - machine-assisted knowledge
                 discovery applied to software production
                 process-control",
  journal =      "Knowledge Acquisition",
  year =         "1994",
  volume =       "6",
  issue =        "3",
  pages =        "271--294",
  abstract =     "How can people who are not trained in data analysis
                 discover knowledge from a database of attribute-valued
                 data? I address this question by presenting a
                 man-machine approach to knowledge discovery called
                 Attribute Focusing and its application to software
                 production process control. Attribute Focusing utilizes
                 an automatic filter to focus attention on that small
                 part of a large amount of data which is interesting. A
                 person studies that part in a manner which leads him to
                 discover knowledge about the physical situation to
                 which the data pertain. Specifically, the paper
                 describes: 1. A model of interestingness of data based
                 on the magnitude of data values, the association of
                 data values and basic knowledge of the limits of human
                 processing. 2. The use of that model of interestingness
                 by people to discover knowledge. 3. The application of
                 the Attribute Focusing approach to diagnose and correct
                 the software production process. Based on the results
                 that have been observed, the paper concludes that
                 man-machine approaches to knowledge discovery should be
                 emphasized much more than has been in the past, and
                 that Attribute Focusing is a powerful, practical
                 approach to such discovery.",
}

Data mining, N. Bissantz and J. Hagedorn
@Article{bissantz.ea:data-mining:93,
  author =       "N. Bissantz and J. Hagedorn",
  address =      "Ibm Corp, Thomas J Watson Res Ctr, Yorktown Hts, Ny,
                 10598 Ibm Corp, Mid Hudson Valley Programming Lab,
                 Wappingers Falls, Ny, 12590",
  title =        "Data mining",
  journal =      "Wirtschaftsinformatik",
  year =         "1993",
  volume =       "35",
  issue =        "5",
  pages =        "481--487",
}

Relational knowledge discovery in databases, H. Blockeel and L. De Raedt
@InProceedings{blockeel.ea:relational:96,
  author =       "H. Blockeel and L. De Raedt",
  title =        "Relational knowledge discovery in databases",
  booktitle =    "Proceedings of the 6th International Workshop on
                 Inductive Logic Programming",
  editor =       "S. Muggleton",
  publisher =    "Stockholm University, Royal Institute of Technology",
  pages =        "1--13",
  year =         "1996",
}

Discovery, Confirmation and Incorporation of Causal Relationships from a Large Time-Oriented Clinical Database: The RX Project, Robert L. Blum
@Article{blum:confirmation-incorporation:82,
  author =       "Robert L. Blum",
  title =        "Discovery, Confirmation and Incorporation of Causal
                 Relationships from a Large Time-Oriented Clinical
                 Database: The {RX} Project",
  journal =      "Computers and Biomedical Research",
  volume =       "15",
  pages =        "164--187",
  year =         "1982",
}

Discovery and Representation of Causal Relationships from a Large Time-Oriented Clinical Database: The RX Project, Robert L. Blum
@Book{blum:representation-causal:82,
  author =       "Robert L. Blum",
  title =        "Discovery and Representation of Causal Relationships
                 from a Large Time-Oriented Clinical Database: The {RX}
                 Project",
  year =         "1982",
  publisher =    "Spinger-Verlag",
  series =       "Lecture Notes in Medical Informatics",
  volume =       "19",
}

Occam's Razor, Anselm Blumer and Andrzej Ehrenfeucht and David Haussler and Manfred K. Warmuth
@Article{blumer.ea:occams-razor:87,
  author =       "Anselm Blumer and Andrzej Ehrenfeucht and David
                 Haussler and Manfred K. Warmuth",
  title =        "Occam's Razor",
  journal =      "Information processing letters",
  volume =       "24",
  pages =        "377--380",
  year =         "1987",
}

Process-Based Database Support for the Early Indicator Method,
@InProceedings{breitner.ea:process-based-database:97,
  title =        "Process-Based Database Support for the Early Indicator
                 Method",
  author =       "Christoph Breitner and J{\"{o}}rg Schl{\"{o}}sser and
                 R{\"{u}}diger Wirth",
  pages =        "131",
  crossref =     "heckerman.ea:proceedings-third:97",
}

SAMIA: a bottom-up learning method using a simulated annealing algorithm, Pierre Br\'ezellec and Henri Soldano
@InProceedings{brezellec.ea:samia-bottom-up:93,
  author =       "Pierre Br\'ezellec and Henri Soldano",
  title =        "{SAMIA}: a bottom-up learning method using a simulated
                 annealing algorithm",
  booktitle =    "Proceedings of the European conference on Machine
                 Learning",
  series =       "Lecture notes in Artificial Intelligence",
  pages =        "297--309",
  publisher =    "Springer-verlag",
  year =         "1993",
}

Direct Access of an ILP Algorithm to a Database Management System, P. Brockhausen and K. Morik
@InProceedings{brockhausen.ea:direct-access:96,
  author =       "P. Brockhausen and K. Morik",
  title =        "Direct Access of an {ILP} Algorithm to a Database
                 Management System",
  booktitle =    "Proceedings of the MLnet Familiarization Workshop on
                 Data Mining with Inductive Logic Programing",
  pages =        "95--110",
  year =         "1996",
}

Applying classification algorithms in practice (preprint), C. E. Brodley and P. Smyth
Available as
hypertext.
@Article{brodley.ea:applying-classification:,
  author =       "C. E. Brodley and P. Smyth",
  title =        "Applying classification algorithms in practice
                 (preprint)",
  journal =      "(To appear) Statistics and Computing",
  URL =          "http://yake.ecn.purdue.edu/~brodley/my-papers/publications.html",
}

Distributed Information Management in the National HPCC Software Exchange, Shirley Browne and Jack Dongarra and Geoffrey C. Fox and Ken Hawick and Ken Kennedy and Rick Stevens and Robert Olson and Tom Rowan
@InProceedings{browne.ea:distributed-information:95,
  author =       "Shirley Browne and Jack Dongarra and Geoffrey C. Fox
                 and Ken Hawick and Ken Kennedy and Rick Stevens and
                 Robert Olson and Tom Rowan",
  title =        "Distributed Information Management in the National
                 {HPCC} Software Exchange",
  booktitle =    "Proceedings of Supercomputing'95",
  publisher =    "ACM/IEEE",
  address =      "San Diego, CA",
  month =        dec,
  year =         "1995",
  keywords =     "data mining, information management, information
                 retrieval, HPCC, high performance computing, software
                 repository,",
  abstract =     "Simple html document on CD.",
}

MineSet: An Integrated System for Data Mining, Cliff Brunk and James Kelly and Ron Kohavi
@InProceedings{brunk.ea:mineset-integrated:97,
  title =        "MineSet: An Integrated System for Data Mining",
  author =       "Cliff Brunk and James Kelly and Ron Kohavi",
  pages =        "135",
  crossref =     "heckerman.ea:proceedings-third:97",
}

A guide to the literature on learning probabilistic networks from data, W. Buntine
@Article{buntine:guide-to:96,
  author =       "W. Buntine",
  address =      "Thinkbank, 1678 Shattuck Ave, Suite 320, Berkeley, Ca,
                 94709",
  title =        "A guide to the literature on learning probabilistic
                 networks from data",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1996",
  volume =       "8",
  issue =        "2",
  pages =        "195--210",
  abstract =     "This literature review discusses different methods
                 under the general rubric of learning Bayesian networks
                 from data, and includes some overlapping work on more
                 general probabilistic networks. Connections are drawn
                 between the statistical, neural network, and
                 uncertainty communities, and between the different
                 methodological communities, such as Bayesian,
                 description length, and classical statistics. Basic
                 concepts for learning and Bayesian networks are
                 introduced and methods are then reviewed. Methods are
                 discussed for learning parameters of a probabilistic
                 network, for learning the structure, and for learning
                 hidden variables. The presentation avoids formal
                 definitions and theorems, as these are plentiful in the
                 literature, and instead illustrates key concepts with
                 simplified examples.",
  keywords =     "EXPERT-SYSTEMS, BAYESIAN NETWORKS, GRAPHICAL MODELS,
                 INDEPENDENCE, COMPLEXITY, BAYESIAN NETWORKS, GRAPHICAL
                 MODELS, HIDDEN VARIABLES, LEARNING, LEARNING STRUCTURE,
                 PROBABILISTIC NETWORKS, KNOWLEDGE DISCOVERY",
}

Attribute-Oriented Induction in Relational Databases, Yandong Cai and Nick Cercone and Jaiwei Han
@InCollection{cai.ea:attribute-oriented-induction:91,
  editor =       "Gregory Piatetsky-Shapiro and William J. Frawley",
  booktitle =    "Knowledge Discovery in Databases",
  publisher =    "AAAI Press / The MIT Press",
  address =      "Menlo Park, California",
  edition =      "1st",
  year =         "1991",
  author =       "Yandong Cai and Nick Cercone and Jaiwei Han",
  title =        "Attribute-Oriented Induction in Relational Databases",
  pages =        "213--228",
}

An overview of machine learning, Jaime G. Carbonell and Ryszard S. Michalski and Tom M. Mitchell
@InCollection{carbonell.ea:overview-machine:83,
  author =       "Jaime G. Carbonell and Ryszard S. Michalski and Tom M.
                 Mitchell",
  title =        "An overview of machine learning",
  pages =        "3--24",
  crossref =     "michalski.ea:machine-learning:83",
}

Assessing Credit Card Applications Using Machine Learning, Chris Carter and Jason Catlett
@Article{carter.ea:assessing-credit:87,
  author =       "Chris Carter and Jason Catlett",
  title =        "Assessing Credit Card Applications Using Machine
                 Learning",
  journal =      "IEEE Expert",
  pages =        "71--79",
  volume =       "Fall 1987",
  year =         "1987",
}

A fast, online generalization algorithm for knowledge discovery, C. L. Carter and H. J. Hamilton
@Article{carter.ea:fast-online:95,
  author =       "C. L. Carter and H. J. Hamilton",
  address =      "Univ Regina, Dept Comp Sci, Regina, Sk S4S 0A2,
                 Canada",
  title =        "A fast, online generalization algorithm for knowledge
                 discovery",
  journal =      "Applied Mathematics Letters",
  year =         "1995",
  volume =       "8",
  issue =        "2",
  pages =        "5--11",
  abstract =     "We present an O(n) algorithm for generalizing a
                 database relation using concept hierarchies, where n is
                 the number of tuples in the input relation. The
                 algorithm is based on a variant of Han et al.'s
                 attribute-oriented O(n log n) algorithm. Our algorithm
                 is an on-line algorithm; fast performance is achieved
                 because after encountering a tuple and generalizing it,
                 the location of the appropriate counter to increment is
                 calculated instead of searched for.",
  keywords =     "KNOWLEDGE DISCOVERY, DATA MINING, DATABASES, CONCEPT
                 HIERARCHIES, GENERALIZATION",
}

Megainduction: machine learning on very large databases, Jason Catlett
Available as
hypertext.
@PhdThesis{catlett:megainduction-machine:91,
  title =        "Megainduction: machine learning on very large
                 databases",
  year =         "1991",
  author =       "Jason Catlett",
  URL =          "http://www.research.att.com/orgs/ssr/people/catlett/phd.html",
}

IEEE Transactions on Knowledge and Data Engineering Special issue on Learning and Discovery in Databases, N. Cercone and M. Tsuchiya (guest editors) (Eds)
@Article{cercone.ea:ieee-transactions:93,
  key =          "cercone.ea:ieee-transactions:93",
  title =        "{IEEE} Transactions on Knowledge and Data Engineering
                 Special issue on Learning and Discovery in Databases",
  journal =      "IEEE Transactions on Knowledge and Data Engineering",
  year =         "1993",
  volume =       "5",
  number =       "6",
  month =        dec,
  editor =       "N. Cercone and M. Tsuchiya (guest editors)",
  note =         "Special issue on Learning and Discovery in Databases",
}

Proposal and Empirical Comparison of a Parallelizable Distance-Based Discretization Method, Jes\'us Cerquides and Ramon L\'opez de M\`antaras
@InProceedings{cerquides.ea:proposal-empirical:97,
  title =        "Proposal and Empirical Comparison of a Parallelizable
                 Distance-Based Discretization Method",
  author =       "Jes\'{u}s Cerquides and Ramon L\'{o}pez de
                 M\`{a}ntaras",
  pages =        "139",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Experiments in Multistrategy Learning by Meta-Learning, Philip K. Chan and Salvatore J. Stolfo
@InProceedings{chan.ea:experiments-multistrategy:93,
  author =       "Philip K. Chan and Salvatore J. Stolfo",
  title =        "Experiments in Multistrategy Learning by
                 Meta-Learning",
  booktitle =    "Proceedings of the second international conference on
                 information and knowledge management",
  pages =        "314--323",
  address =      "Washington, DC",
  year =         "1993",
}

Sharing Learned Models among Remote Database Partitions by Local Meta-Learning, Philip K. Chan and Salvatore J. Stolfo
@InProceedings{chan.ea:sharing-learned:96,
  title =        "Sharing Learned Models among Remote Database
                 Partitions by Local Meta-Learning",
  pages =        "2",
  author =       "Philip K. Chan and Salvatore J. Stolfo",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Model uncertainty, data mining and statistical-inference, C. Chatfield
@Article{chatfield:model-uncertainty:95,
  author =       "C. Chatfield",
  address =      "Univ Bath, Sch Math Sci, Bath Ba2 7Ay, Avon, England",
  title =        "Model uncertainty, data mining and
                 statistical-inference",
  journal =      "J. Of The Royal Statistical Soc. Series A-Statistics
                 In Society",
  year =         "1995",
  volume =       "158",
  issue =        "Pt3",
  pages =        "419--466",
  abstract =     "This paper takes abroad, pragmatic view of statistical
                 inference to include all aspects of model formulation.
                 The estimation of model: parameters traditionally
                 assumes that a model has a prespecified known form and
                 takes no account of possible uncertainty regarding the
                 model structure. This implicitly assumes the existence
                 of a 'true' model, which many would regard-as a
                 fiction. In practice model uncertainty is a fact of
                 life and likely to be more serious than other sources
                 of uncertainty which have received far more attention
                 from statisticians. This is true whether the model is
                 specified on subject-matter grounds or, as is
                 increasingly the case, when a model is formulated,
                 fitted and checked on the same data set in an
                 iterative, interactive way. Modern computing power
                 allows a large number of models to be considered and
                 data-dependent specification searches have become the
                 norm in many areas of statistics. The term data mining
                 may be used in this context when the analyst goes to
                 great lengths to obtain a good fit. This paper reviews
                 the effects of model uncertainty, such as too narrow
                 prediction intervals, and the non-trivial biases in
                 parameter estimates which can follow data-based
                 modelling. Ways of assessing and overcoming the effects
                 of model uncertainty are discussed, including the use
                 of simulation and resampling methods, a Bayesian model
                 averaging approach and collecting additional data
                 wherever possible. Perhaps the main aim of the paper is
                 to ensure that statisticians are aware of the problems
                 and start addressing the issues even if there is no
                 simple, general theoretical fix.",
  keywords =     "MOVING AVERAGE MODELS, BOOTSTRAP, VALIDATION,
                 PREDICTION, COMPLEXITY, SELECTION, CHOICE,
                 AUTOREGRESSIVE MODEL, BAYESIAN MODEL AVERAGING, DATA
                 MINING, FORECASTING, MODEL BUILDING, RESAMPLING,
                 STATISTICAL INFERENCE, SUBSET SELECTION",
}

Large Scale Data Mining: Challenges and Responses,
@InProceedings{chattratichat.ea:large-scale:97,
  title =        "Large Scale Data Mining: Challenges and Responses",
  author =       "Jaturon Chattratichat and John Darlington and Moustafa
                 Ghanem and Harald H{\"{u}}ning Yike Guo and Martin
                 K{\"{o}}hler and Janjao Sutiwaraphun and Hing Wing To
                 and Dan Yang",
  pages =        "143",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Bayesian Classification (AUTOCLASS): Theory and Results, P. Cheeseman and J. Stutz
@InCollection{cheeseman.ea:bayesian-classification:95,
  author =       "P. Cheeseman and J. Stutz",
  title =        "Bayesian Classification ({AUTOCLASS}): Theory and
                 Results",
  booktitle =    "Advances in Knowledge Discovery and Data Mining",
  editor =       "U. M. Fayyad and G. Piatetsky-Shapiro and P Smyth and
                 R. Uthurusamy",
  year =         "1995",
}

Efficient Data Mining for Path Traversal Patterns in Distributed Systems, M. S. Chen and J. S. Park and P. S. Yu
@InProceedings{chen.ea:efficient-path:96,
  author =       "M. S. Chen and J. S. Park and P. S. Yu",
  title =        "Efficient Data Mining for Path Traversal Patterns in
                 Distributed Systems",
  booktitle =    "16th International Conference on Distributed Computing
                 Systems (16th IDCS'96)",
  pages =        "385--393?",
  publisher =    "IEEE",
  address =      "Hong Kong",
  month =        may,
  year =         "1996",
  keywords =     "Distributed Objects,",
  note =         "IBM T. J. Watson Research Center, USA",
}

Data mining: an overview from a database perspective, Ming-Syan Chen and Jiawei Han and Philip S. Yu
@Article{chen.ea:overview-database:96,
  author =       "Ming-Syan Chen and Jiawei Han and Philip S. Yu",
  address =      "Natl Taiwan Univ, Dept Elect Engn, Taipei 10764,
                 Taiwan Simon Fraser Univ, Sch Comp Sci, Burnaby, Bc V5A
                 1S6, Canada Ibm Corp, Thomas J Watson Res Ctr, Yorktown
                 Hts, Ny, 10598",
  title =        "Data mining: an overview from a database perspective",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1996",
  month =        dec,
  volume =       "8",
  issue =        "6",
  pages =        "866--883",
  abstract =     "Mining information and knowledge from large databases
                 has been recognized by many researchers as a key
                 research topic in database systems and machine
                 learning, and by many industrial companies as an
                 important area with an opportunity of major revenues.
                 Researchers in many different fields have shown great
                 interest in data mining. Several emerging applications
                 in information providing services, such as data
                 warehousing and on-line services over the Internet,
                 also call for various data mining techniques to better
                 understand user behavior, to improve the service
                 provided, and to increase the business opportunities.
                 In response to such a demand, this article is to
                 provide a survey, from a database researcher's point of
                 view, on the data mining techniques developed recently.
                 A classification of the available data mining
                 techniques is provided, and a comparative study of such
                 techniques is presented.",
  keywords =     "data mining, knowledge discovery, association rules,
                 classification, data clustering, pattern matching
                 algorithms, data generalization and characterization,
                 data cubes, multiple-dimensional databases",
}

A parallel computing approach to creating engineering concept spaces for semantic retrieval - the illinois digital library initiative project, H. C. Chen and B. Schatz and T. Ng and J. Martinez and A. Kirchhoff and C. T. Lin
@Article{chen.ea:parallel-computing:96,
  author =       "H. C. Chen and B. Schatz and T. Ng and J. Martinez and
                 A. Kirchhoff and C. T. Lin",
  address =      "Univ Arizona, Karl Eller Grad Sch Management, Mis
                 Dept, Mcclelland Hall, Tucson, Az, 85721 Univ Illinois,
                 Natl Ctr Supercomp Applicat, Beckman Inst, Urbana, Il,
                 61801 Univ Arizona, Sci \& Engn Lib, Tucson, Az, 85712
                 Univ Arizona, Dept Lib \& Informat Studies, Tucson, Az,
                 85712",
  title =        "A parallel computing approach to creating engineering
                 concept spaces for semantic retrieval - the illinois
                 digital library initiative project",
  journal =      "Ieee Trans. On Pattern Analysis And Machine
                 Intelligence",
  year =         "1996",
  volume =       "18",
  issue =        "8",
  pages =        "771--782",
  abstract =     "This research presents preliminary results generated
                 from the semantic retrieval research component of the
                 illinois Digital Library Initiative (DLI) project.
                 Using a variation of the automatic thesaurus generation
                 techniques, to which we refer as the concept space
                 approach, we aimed to create graphs of domain-specific
                 concepts (terms) and their weighted co-occurrence
                 relationships for all major engineering domains.
                 Merging these concept spaces and providing traversal
                 paths across:different concept spaces could potentially
                 help alleviate the vocabulary (difference) problem
                 evident in large- scale information retrieval. We have
                 experimented previously with such a technique for a
                 smaller molecular biology domain (Worm Community
                 System, with 10+ MBs of document collection) with
                 encouraging results. In order to address the
                 scalability issue related to large-scale information
                 retrieval and analysis for the current Illinois DLI
                 project, we recently conducted experiments using the
                 concept space approach on parallel supercomputers. Our
                 test collection included 2+ GBs of computer science and
                 electrical engineering abstracts extracted from the
                 INSPEC database. The concept space approach called for
                 extensive textual and statistical analysis (a form of
                 knowledge discovery) based on automatic indexing and
                 cooccurrence analysis algorithms, both previously
                 tested in the biology domain. Initial testing results
                 using a 512-node CM-5 and a 16-processor SGI Power
                 Challenge were promising. Power Challenge was later
                 selected to create a comprehensive computer engineering
                 concept space of about 270,000 terms and 4,000,000+
                 links using 24.5 hours of CPU time. Our system
                 evaluation involving 12 knowledgeable subjects revealed
                 that the automatically-created computer engineering
                 concept space generated significantly higher concept
                 recall than the human- generated INSPEC computer
                 engineering thesaurus. However, the INSPEC was more
                 precise than the automatic concept space. Our current
                 work mainly involves creating concept spaces for other
                 major engineering domains and developing robust graph
                 matching and traversal algorithms for cross-domain,
                 concept-based retrieval. Future work also will include
                 generating individualized concept spaces for assisting
                 user- specific concept-based information retrieval.",
  keywords =     "INFORMATION-RETRIEVAL, DOCUMENT-RETRIEVAL, CONNECTION
                 MACHINE, NEURAL NETWORKS, SYSTEMS, SEARCH, PERFORMANCE,
                 DATABASES, DESIGN, MODEL, SEMANTIC RETRIEVAL, CONCEPT
                 SPACE, CONCEPT ASSOCIATION, PARALLEL COMPUTING, DIGITAL
                 LIBRARY",
}

Semantics-Based Information Management and Retrieval: A Knowledge Discovery Approach, H. Chen and K. Lynch
@Article{chen.ea:semantics-based-information:92,
  author =       "H. Chen and K. Lynch",
  title =        "Semantics-Based Information Management and Retrieval:
                 {A} Knowledge Discovery Approach",
  journal =      "IEEE Transactions on Systems, Man, and Cybernetics",
  publisher =    "IEEE",
  month =        "Forthcoming",
  year =         "1992",
  abstract =     "We report results of a study that involved the
                 creation of knowledge bases from large, operational
                 textual databases. Two East-bloc computing knowledge
                 bases, both based on semantic network structure, were
                 created automatically using two statistical algorithms.
                 With the help of four East-bloc computing experts, we
                 evaluated the two knowledge bases in detail in a
                 concept-association experiment bases on recall and
                 recognition tests. In our experiment, one of the
                 knowledge bases that exhibited the asymmetric link
                 property out-performed all four experts in recalling
                 relevant concepts in East-bloc computing. The knowledge
                 base, which contained about 20,000 concepts (nodes) and
                 280,000 weighted relationships (links), was
                 incorporated as a thesauras-like component into an
                 intelligent retrieval system. The system allowed users
                 to perform semantics-based information management and
                 information retrieval via interactive, conceptual
                 relevance feedback. Current research efforts include
                 development of a meta knowledge base and design of
                 semantic network and neural network based inferencing
                 algorithms.",
}

Growing Simpler Decision Trees to Facilitate Knowledge Discovery, Kevin J. Cherkauer and Jude W. Shavlik
@InProceedings{cherkauer.ea:growing-simpler:96,
  title =        "Growing Simpler Decision Trees to Facilitate Knowledge
                 Discovery",
  pages =        "315",
  author =       "Kevin J. Cherkauer and Jude W. Shavlik",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Efficient mining of association rules in distributed databases, D. W. Cheung and V. T. Ng and A. W. Fu and Y. J. Fu
@Article{cheung.ea:efficient-association:96,
  author =       "D. W. Cheung and V. T. Ng and A. W. Fu and Y. J. Fu",
  address =      "Univ Hong Kong, Dept Comp Sci, Hong Kong, Hong Kong
                 Hong Kong Polytech Univ, Dept Comp, Hong Kong, Hong
                 Kong Chinese Univ Hong Kong, Dept Comp Sci \& Engn,
                 Hong Kong, Hong Kong Simon Fraser Univ, Sch Comp Sci,
                 Burnaby, Bc V5A 1S6, Canada",
  title =        "Efficient mining of association rules in distributed
                 databases",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1996",
  month =        dec,
  volume =       "8",
  issue =        "6",
  pages =        "911--922",
  abstract =     "Many sequential algorithms have been proposed for
                 mining of association rules. However, very little work
                 has been done in mining association rules in
                 distributed databases. A direct application of
                 sequential algorithms to distributed databases is not
                 effective, because it requires a large amount of
                 communication overhead. In this study, an efficient
                 algorithm, DMA, is proposed. It generates a small
                 number of candidate sets and requires only O(n)
                 messages for support count exchange for each candidate
                 set, where n is the number of sites in a distributed
                 database. The algorithm has been implemented on an
                 experimental test bed and its performance is studied.
                 The results show that DMA has superior performance when
                 comparing with the direct application of a popular
                 sequential algorithm in distributed databases.",
  keywords =     "data mining, knowledge discovery, distributed data
                 mining, association rule, distributed database,
                 distributed algorithm, partitioned database",
}

Maintenance of Discovered Knowledge: A Case in Multi-Level Association Rules, David W. Cheung and Vincent T. Ng and Benjamin W. Tam
@InProceedings{cheung.ea:maintenance-discovered:96,
  title =        "Maintenance of Discovered Knowledge: {A} Case in
                 Multi-Level Association Rules",
  pages =        "307",
  author =       "David W. Cheung and Vincent T. Ng and Benjamin W.
                 Tam",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Knowledge discovery in databases: a rule-based attribute-oriented approach, D. W.-l. Cheung and A. W.-C. Fu and J. Han
@InProceedings{cheung.ea:rule-based-attribute-oriented:94a,
  key_modifier = "a",
  author =       "D. W.-l. Cheung and A. W.-C. Fu and J. Han",
  title =        "Knowledge discovery in databases: a rule-based
                 attribute-oriented approach",
  pages =        "164--173",
  editor =       "Zbigniew W. Ra{\'s} and Maria Zemankova",
  booktitle =    "Proceedings of the 8th International Symposium on
                 Methodologies for Intelligent Systems",
  month =        oct,
  series =       "LNAI",
  volume =       "869",
  publisher =    "Springer",
  address =      "Berlin",
  year =         "1994",
}

Knowledge discovery in databases: a rule-based attribute-oriented approach, D. W.-I. Cheung and A. W.-C. Fu and J. Han
@Article{cheung.ea:rule-based-attribute-oriented:94b,
  key_modifier = "b",
  author =       "D. W.-I. Cheung and A. W.-C. Fu and J. Han",
  title =        "Knowledge discovery in databases: a rule-based
                 attribute-oriented approach",
  journal =      "Lecture Notes in Computer Science",
  volume =       "869",
  pages =        "164--??",
  year =         "1994",
  ISSN =         "0302-9743",
}

Using Artificial Intelligence Planning to Automate Science Data Analysis for Large Image Databases, Steve Chien and Forest Fisher and and Helen Mortensen and Edisanter Lo and Ronald Greeley
@InProceedings{chien.ea:using-artificial:97,
  title =        "Using Artificial Intelligence Planning to Automate
                 Science Data Analysis for Large Image Databases",
  author =       "Steve Chien and Forest Fisher and and Helen Mortensen
                 and Edisanter Lo and Ronald Greeley",
  pages =        "147",
  crossref =     "heckerman.ea:proceedings-third:97",
}

A framework for query optimization to support data mining, R. Sunil Choenni and Arno P. J. M. Siebes
Available as
compressed postscript.
@InCollection{choenni.ea:framework-query:96,
  author =       "R. Sunil Choenni and Arno P. J. M. Siebes",
  title =        "A framework for query optimization to support data
                 mining",
  publisher =    "Centrum voor Wiskunde en Informatica (CWI)",
  ISSN =         "ISSN 0169-118X",
  month =        oct # " 31",
  year =         "1996",
  keywords =     "data mining systems, search strategies, query
                 optimization, physical database design.",
  URL =          "ftp://ftp.cwi.nl/pub/CWIreports/AA/CS-R9637.ps.Z",
  abstract =     "In order to extract knowledge from databases, data
                 mining algorithms heavily query the databases.
                 Inefficient processing of these queries will inevitably
                 have its impact on the performance of these algorithms,
                 making them less valuable. In this paper, we describe
                 an optimization framework for an efficient processing
                 of queries generated by different data mining
                 algorithms. In this framework, we show how to take
                 advantage of the physical organization of the database,
                 the operators and the control structures used in an
                 algorithm. Finally, we discuss how our framework fits
                 into conventional query optimization frameworks.",
  note =         "AA (Department of Algorithmics and Architecture)",
  annote =       "originally contained the following fields and values -
                 booktitle, 105 note, CS-R9637",
}

On multi-query optimization, R. (Sunil) Choenni and Martin L. Kersten and Johan F. P. van den Akker and Amani Saad
Available as
compressed postscript.
@InCollection{choenni.ea:on-multi-query:96,
  author =       "R. (Sunil) Choenni and Martin L. Kersten and Johan F.
                 P. van den Akker and Amani Saad",
  title =        "On multi-query optimization",
  pages =        "19",
  publisher =    "Centrum voor Wiskunde en Informatica (CWI)",
  address =      "ISSN 0169-118X",
  month =        oct # " 31",
  year =         "1996",
  keywords =     "multi-query optimization, architectures, exploiting
                 interdependencies between queries.",
  URL =          "ftp://ftp.cwi.nl/pub/CWIreports/AA/CS-R9638.ps.Z",
  abstract =     "In some key database applications, such as data
                 mining, a sequence of interdependent queries may be
                 posed simultaneously to the DBMS. The optimization of
                 such sequences is called multi-query optimization, and
                 it attempts to exploit these dependencies in the
                 derivation of a query evaluation plan (qep). Although
                 it has been observed and demonstrated by several
                 researchers that exploitation of dependencies speed up
                 the query processing, limited research has been
                 reported how to benefit from multi-query optimization,
                 taking the capabilities of existing query optimizers
                 into account. This is exactly the topic of this paper.
                 Since existing optimizers are able to optimize queries
                 in which a restricted number of basic operations
                 appears, e.g., number of joins is limited to 10, and
                 the optimization of a query is relatively expensive, we
                 attempt to profit from multi query optimization under
                 the condition that queries are passed only once and
                 separately to the optimizer. We propose a two-step
                 optimization procedure. In the first step, we
                 determine, on the basis of the dependencies between
                 queries, in which order they should be specified and
                 what results should be stored. In the second step, each
                 query is passed separately to an optimizer.",
  note =         "AA (Department of Algorithmics and Architecture)",
  annote =       "originally contained the following fields and values -
                 note, CS-R9638, booktitle, 143",
}

Using a Hybrid Neural/Expert System for Data Base Mining in Market Survey Data, Victor Ciesielski and Gregory Palstra
@InProceedings{ciesielski.ea:using-hybrid:96,
  title =        "Using a Hybrid Neural/Expert System for Data Base
                 Mining in Market Survey Data",
  pages =        "38",
  author =       "Victor Ciesielski and Gregory Palstra",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Classification Problem Solving, W. J. Clancey
@InProceedings{clancey:classification-problem:84,
  title =        "Classification Problem Solving",
  author =       "W. J. Clancey",
  editor =       "R. J. Brachman",
  booktitle =    "Proceedings of the National Conference on Artificial
                 Intelligence",
  address =      "Austin, Texas",
  month =        aug,
  year =         "1984",
  publisher =    "William Kaufmann",
  pages =        "49--55",
}

The CN2 Induction Algorithm, Peter Clark and Tim Niblett
@Article{clark.ea:cn2-induction:89,
  author =       "Peter Clark and Tim Niblett",
  title =        "The {CN2} Induction Algorithm",
  journal =      "Machine Learning",
  year =         "1989",
  volume =       "3",
  pages =        "261--283",
}

Knowledge Representation in Machine Learning, Peter Clark
@InCollection{clark:representation-machine:89,
  author =       "Peter Clark",
  title =        "Knowledge Representation in Machine Learning",
  editor =       "Yves Kodratoff and Alan Hutchinson",
  booktitle =    "Machine and Human Learning, advances in European
                 Research",
  publisher =    "Michael Horwood",
  year =         "1989",
  pages =        "35--49",
  address =      "London",
}

Security and Privacy Implications of Data Mining, Chris Clifton and Don Marks
Available as
postscript.
@InProceedings{clifton.ea:security-privacy:96,
  author =       "Chris Clifton and Don Marks",
  title =        "Security and Privacy Implications of Data Mining",
  booktitle =    "Workshop on Data Mining and Knowledge Discovery",
  address =      "Montreal, Canada",
  organization = "ACM SIGMOD",
  year =         "1996",
  publisher =    "University of British Columbia Department of Computer
                 Science",
  number =       "96-08",
  pages =        "15--19",
  month =        jun # " 2",
  URL =          "ftp://ftp.fas.sfu.ca/pub/cs/han/dmkd96/p15.ps",
  contributedby = "clifton(at)mitre.org",
}

Overfitting Explained, P. R. Cohen and D. Jensen
Available as
postscript.
@InProceedings{cohen.ea:overfitting-explained:97,
  author =       "P. R. Cohen and D. Jensen",
  title =        "Overfitting Explained",
  booktitle =    "Preliminary Papers of the Sixth International Workshop
                 on Artificial Intelligence and Statistics",
  year =         "1997",
  month =        jan,
  pages =        "115--122",
  abstract =     "Overfitting arises when model components are evaluated
                 against the wrong reference distribution. Most modeling
                 algorithms iteratively find the best of several
                 components and then test whether this component is good
                 enough to add to the model. We show that for
                 independently distributed random variables, the
                 reference distribution for any one variable
                 underestimates the reference distribution for the the
                 highest-valued variable; thus variate values will
                 appear significant when they are not, and model
                 components will be added when they should not be added.
                 We relate this problem to the well-known statistical
                 theory of multiple comparisons or simultaneous
                 inference.",
  abstract_url = "http://eksl-www.cs.umass.edu/~jensen/papers/ais97b.html",
  URL =          "http://www-eksl.cs.umass.edu/papers/cohen-ais96b.ps",
}

The Role of Knowledge Mining in the Development and Evolution of New Applications, David Cohen and L. Berke and P. Bloom and D. Cohen and D. Tsur
@InProceedings{cohen.ea:role-development:94,
  author =       "David Cohen and L. Berke and P. Bloom and D. Cohen and
                 D. Tsur",
  title =        "The Role of Knowledge Mining in the Development and
                 Evolution of New Applications",
  pages =        "166--167",
  editor =       "Ahmed K. Elmagarmid and Erich Neuhold",
  booktitle =    "Proceedings of the 10th International Conference on
                 Data Engineering",
  address =      "Houston, TX",
  month =        feb,
  year =         "1994",
  publisher =    "IEEE Computer Society Press",
}

Knowledge in context: a strategy for expert system maintenance, P. Compton and R. Jansen
@InProceedings{compton.ea:context-strategy:88,
  author =       "P. Compton and R. Jansen",
  title =        "Knowledge in context: a strategy for expert system
                 maintenance",
  booktitle =    "Proceedings of the 2nd {A}ustralian Joint Artificial
                 Intelligence conference",
  address =      "Adelaide",
  year =         "1988",
  publisher =    "Springer",
  series =       "Lecture Notes in Artificial Intelligence",
  volume =       "406",
  pages =        "292--306",
}

Knowledge discovery in molecular databases, D. Conklin and S. Fortier and J. Glasgow
@Article{conklin.ea:molecular:93,
  author =       "D. Conklin and S. Fortier and J. Glasgow",
  address =      "Queens Univ, Dept Comp \& Informat Sci, Kingston K7L
                 3N6, On, Canada Queens Univ, Dept Chem, Kingston K7L
                 3N6, On, Canada",
  title =        "Knowledge discovery in molecular databases",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1993",
  volume =       "5",
  issue =        "6",
  pages =        "985--987",
  abstract =     "This paper describes an approach to knowledge
                 discovery in complex molecular databases. The machine
                 learning paradigm used is structured concept formation,
                 in which objects described in terms of components and
                 their interrelationships are clustered and organized in
                 a knowledge base. Symbolic images are used to represent
                 classes of structured objects. A discovered molecular
                 knowledge base is successfully used in the
                 interpretation of a high resolution electron density
                 map.",
  keywords =     "PROTEIN, CASE-BASED REASONING, CHEMICAL INFORMATION
                 RETRIEVAL, CONCEPTUAL CLUSTERING, DESCRIPTION LOGICS,
                 INDEXING, RELATIONAL MODELS, SCENE ANALYSIS, SPATIAL
                 CONCEPTS, SPATIAL REASONING, STRUCTURED CONCEPT
                 FORMATION",
}

Machine discovery of protein motifs, D. Conklin
@Article{conklin:machine-protein:95,
  author =       "D. Conklin",
  address =      "Zymogenet Inc, 1201 Eastlake Ave E, Seattle, Wa,
                 98102",
  title =        "Machine discovery of protein motifs",
  journal =      "Machine Learning",
  year =         "1995",
  volume =       "21",
  issue =        "1-2",
  pages =        "125--150",
  abstract =     "The investigation of relations between protein
                 tertiary structure and amino acid sequence is a topic
                 of tremendous importance in molecular biology. The
                 automated discovery of recurrent patterns of structure
                 and sequence is an essential part of this
                 investigation. These patterns, known as protein motifs,
                 are abstractions of fragments drawn from proteins of
                 known sequence and tertiary structure. This paper has
                 two objectives. The first is to introduce and define
                 protein motifs, and provide a survey of previous
                 research on protein motif discovery. The second is to
                 present and apply a novel approach to protein motif
                 representation and discovery, which is based on a
                 spatial description logic and the symbolic machine
                 learning paradigm of structured concept formation. A
                 large database of protein fragments is processed using
                 this approach, and several interesting and significant
                 protein motifs are discovered.",
  keywords =     "SECONDARY STRUCTURE, SEQUENCE PATTERNS, PREDICTIVE
                 POWER, IDENTIFICATION, RECOGNITION, GENERATION,
                 DEFINITION, TEMPLATES, SETS, PROTEIN TERTIARY
                 STRUCTURE, MACHINE DISCOVERY, RELATIONAL LEARNING,
                 KNOWLEDGE REPRESENTATION, DESCRIPTION LOGICS,
                 INFORMATION RETRIEVAL, KNOWLEDGE DISCOVERY IN
                 DATABASES",
}

Scalable discovery of informative structural concepts using domain knowledge, D. J. Cook and L. B. Holder and S. Djoko
@Article{cook.ea:scalable-informative:96,
  author =       "D. J. Cook and L. B. Holder and S. Djoko",
  address =      "Univ Texas, Dept Comp Sci \& Engn, Arlington, Tx,
                 76019 Bell No Res, Sci Staff, Richardson, Tx",
  title =        "Scalable discovery of informative structural concepts
                 using domain knowledge",
  journal =      "Ieee Expert-Intelligent Systems \& Their
                 Applications",
  year =         "1996",
  volume =       "11",
  issue =        "5",
  pages =        "59--68",
}

Substructure Discovery Using Minimum Description Length and Background Knowledge, D. J. Cook and L. B. Holder
Available as
postscript.
@Article{cook.ea:substructure-using:94,
  author =       "D. J. Cook and L. B. Holder",
  title =        "Substructure Discovery Using Minimum Description
                 Length and Background Knowledge",
  journal =      "JAIR",
  year =         "1994",
  volume =       "1",
  pages =        "231--255",
  abstract =     "The ability to identify interesting and repetitive
                 substructures is an essential component to discovering
                 knowledge in structural data. We describe a new version
                 of our SUBDUE substructure discovery system based on
                 the minimum description length principle. The SUBDUE
                 system discovers substructures that compress the
                 original data and represent structural concepts in the
                 data. By replacing previously-discovered substructures
                 in the data, multiple passes of SUBDUE produce a
                 hierarchical description of the structural regularities
                 in the data. SUBDUE uses a computationally-bounded
                 inexact graph match that identifies similar, but not
                 identical, instances of a substructure and finds an
                 approximate measure of closeness of two substructures
                 when under computational constraints. In addition to
                 the minimum description length principle, other
                 background knowledge can be used by SUBDUE to guide the
                 search towards more appropriate substructures.
                 Experiments in a variety of domains demonstrate
                 SUBDUE's ability to find substructures capable of
                 compressing the original data and to discover
                 structural concepts important to the domain.",
  annote =       "The SUBDUE system discovers substructures that
                 compress the original data and represent structural
                 concepts in the data. By replacing
                 previously-discovered substructures in the data,
                 multiple passes of SUBDUE produce a hierarchical
                 description of the structural regularities in the
                 data.",
  URL =          "gopher://P.GP.CS.CMU.EDU:70/00/volume1/cook94a.ps",
}

What has Mill to Say About Data Mining ?, Tremaine A. O. Cornish and Anthony D. Elliman
@InProceedings{cornish.ea:what-has:95,
  author =       "Tremaine A. O. Cornish and Anthony D. Elliman",
  title =        "What has Mill to Say About Data Mining ?",
  pages =        "347--353",
  booktitle =    "Proceedings of the Eleventh Conference on Artificial
                 Intelligence for Applications",
  month =        "20--2~" # feb,
  publisher =    "IEEE Computer Society Press",
  address =      "Los Alamitos",
  year =         "1995",
}

Historical perspectives on information-science, T. A. O. Cornish
@Article{cornish:historical-perspectives:96,
  author =       "T. A. O. Cornish",
  address =      "Brunel Univ, Dept Comp Sci \& Informat Syst, Uxbridge
                 Ub8 3Ph, Middx, England",
  title =        "Historical perspectives on information-science",
  journal =      "Systems Research And Information Science",
  year =         "1996",
  volume =       "7",
  issue =        "2",
  pages =        "105--116",
  abstract =     "There is a general attitude in science and
                 particularly computer science, that if something is
                 more than five year old, then we have nothing to learn
                 from it. This paper seeks first to destroy the basis of
                 this myth with reference to areas of current research
                 which are still striving to live up to visions set many
                 years ago. Secondly to look at an area of research,
                 Knowledge Discovery in Databases and demonstrate that
                 it to has a great deal to learn from the distant past,
                 which has been all but overlooked.",
  keywords =     "KNOWLEDGE DISCOVERY, SYSTEMATIC, SCIENTIFIC, DATA
                 MINING, HISTORICAL, INFORMATION, SYSTEMS",
}

Data Mining of Multi-dimensional Remotely Sensed Images, Robert F. Cromp and William J. Campbell
@InProceedings{cromp.ea:multi-dimensional-remotely:93,
  author =       "Robert F. Cromp and William J. Campbell",
  title =        "Data Mining of Multi-dimensional Remotely Sensed
                 Images",
  pages =        "471--480",
  editor =       "Bharat Bhargava and Timothy Finin and Yelena Yesha",
  booktitle =    "Proceedings of the 2nd International Conference on
                 Information and Knowledge Management",
  month =        nov,
  publisher =    "ACM Press",
  address =      "New York, NY, USA",
  year =         "1993",
}

Knowledge Discovery in Databases: Exploiting Knowledge-Level Redescription, J. Cupit and N. Shadbolt
@Article{cupit.ea:exploiting-knowledge-level:96a,
  key_modifier = "a",
  author =       "J. Cupit and N. Shadbolt",
  title =        "Knowledge Discovery in Databases: Exploiting
                 Knowledge-Level Redescription",
  journal =      "Lecture Notes in Computer Science",
  volume =       "1076",
  pages =        "245--??",
  year =         "1996",
  ISSN =         "0302-9743",
}

Knowledge Discovery in Databases: Exploiting Knowledge-Level Redescription, James Cupit and Nigel Shadbolt
@InProceedings{cupit.ea:exploiting-knowledge-level:96b,
  key_modifier = "b",
  author =       "James Cupit and Nigel Shadbolt",
  title =        "Knowledge Discovery in Databases: Exploiting
                 Knowledge-Level Redescription",
  pages =        "245--261",
  editor =       "Nigel Shadbolt and Kieron O'Hara and Schreiber Guus",
  booktitle =    "Proceedings of the Nineth European Knowledge
                 Acquisition Workshop ({EKAW}-96)",
  month =        may # "14--17~",
  series =       "LNAI",
  volume =       "1076",
  publisher =    "Springer",
  address =      "Berlin",
  year =         "1996",
}

Mining Knowledge in Noisy Audio Data, Andrzej Czyzewski
@InProceedings{czyzewski:noisy-audio:96,
  title =        "Mining Knowledge in Noisy Audio Data",
  pages =        "220",
  author =       "Andrzej Czyzewski",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Distributed learning: An agent-based approach to data-mining, Winton Davies and Peter Edwards
@InProceedings{davies.ea:distributed-learning:95,
  title =        "Distributed learning: {A}n agent-based approach to
                 data-mining",
  author =       "Winton Davies and Peter Edwards",
  booktitle =    "Working Notes of the ICML '95 Workshop on Agents that
                 Learn from Other Agents",
  year =         "1995",
  address =      "Tahoe City, CA",
  editor =       "Diana Gordon",
}

Knowledge discovery in an infrared database, B. J. Debska and B. Guzowskaswider
@Article{debska.ea:infrared-database:97,
  author =       "B. J. Debska and B. Guzowskaswider",
  address =      "Rzeszow Univ Technol, Dept Comp Chem, 6 Powstancow
                 Warszawy Av, Pl-35041 Rseszow, Poland",
  title =        "Knowledge discovery in an infrared database",
  journal =      "Computers \& Chemistry",
  year =         "1997",
  volume =       "21",
  issue =        "1",
  pages =        "51--59",
  abstract =     "The paper describes a process of knowledge acquisition
                 in the collection of infrared spectra (infrared
                 database). In fact it is a strategy for the automated
                 generation of correlation tables, i.e. correlations
                 between specific molecular subunits (substructures,
                 chemical groups) and their absorption frequencies. The
                 data in the tables are subsequently converted
                 automatically into rules that can be used to infer the
                 existence of molecular substructures from the IR
                 spectrum of an analysed compound. Copyright (C) 1996
                 Elsevier Science Ltd",
  keywords =     "SYSTEM, STRUCTURE IDENTIFICATION, SPECTROSCOPY
                 METHODS, KNOWLEDGE DISCOVERY, RULE KNOWLEDGEBASE",
}

Technology Overview: A Report on Data Mining, K. Decker and S. Focardi
Available as
hypertext.
@TechReport{decker.ea:technology-overview:94,
  URL =          "http://www.cscs.ch/Official/PubTR95.html",
  title =        "Technology Overview: {A} Report on Data Mining",
  author =       "K. Decker and S. Focardi",
  month =        feb,
  year =         "1994",
}

Mining Multivariate Time-Series Sensor Data to Discover Behavior Envelopes, Dennis DeCoste
@InProceedings{decoste:multivariate-time-series:97,
  title =        "Mining Multivariate Time-Series Sensor Data to
                 Discover Behavior Envelopes",
  author =       "Dennis DeCoste",
  pages =        "151",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Data Mining --- There's gold in those hills of data, E. X. Dejesus
@Article{dejesus:theres-gold:95,
  author =       "E. X. Dejesus",
  address =      "Univ Bath, Sch Math Sci, Bath Ba2 7Ay, Avon, England",
  title =        "Data Mining --- There's gold in those hills of data",
  journal =      "Byte",
  year =         "1995",
  volume =       "20",
  issue =        "10",
  pages =        "81--81",
}

Clausal discovery, L. Deraedt and L. Dehaspe
@Article{deraedt.ea:clausal:97,
  author =       "L. Deraedt and L. Dehaspe",
  address =      "Katholieke Univ Leuven, Dept Comp Sci, Celestijnenlaan
                 200A, B-3001 Heverlee, Belgium",
  title =        "Clausal discovery",
  journal =      "Machine Learning",
  year =         "1997",
  volume =       "26",
  issue =        "2-3",
  pages =        "99--146",
  abstract =     "The clausal discovery engine CLAUDIEN is presented.
                 CLAUDIEN is an inductive logic programming engine that
                 fits in the descriptive data mining paradigm. CLAUDIEN
                 addresses characteristic induction from
                 interpretations, a task which is related to existing
                 formalisations of induction in logic. In characteristic
                 induction from interpretations, the regularities are
                 represented by clausal theories, and the data using
                 Herbrand interpretations. Because CLAUDIEN uses clausal
                 logic to represent hypotheses, the regularities induced
                 typically involve multiple relations or predicates.
                 CLAUDIEN also employs a novel declarative bias
                 mechanism to define the set of clauses that may appear
                 in a hypothesis.",
  keywords =     "inductive logic programming, knowledge discovery in
                 databases, data mining, learning, induction, semantics
                 for induction, logic of induction, parallel learning",
}

An Interactive Visualization Environment for Data Exploration, Mark Derthick and John Kolojejchick and Steven F. Roth
@InProceedings{derthick.ea:interactive-environment:97,
  title =        "An Interactive Visualization Environment for Data
                 Exploration",
  author =       "Mark Derthick and John Kolojejchick and Steven F.
                 Roth",
  pages =        "2",
  crossref =     "heckerman.ea:proceedings-third:97",
  abstract =     "

Abstract-Driven Pattern Discovery in Databases, V. Dhar and A. Tuzhilin
@Article{dhar.ea:abstract-driven-pattern:93,
  author =       "V. Dhar and A. Tuzhilin",
  title =        "Abstract-Driven Pattern Discovery in Databases",
  journal =      "IEEE Transactions on Knowledge and Data Engineering",
  pages =        "926--938",
  volume =       "5",
  number =       "6",
  month =        dec,
  year =         "1993",
}

A comparative review of selected methods for learning from examples, Thomas G. Dietterich and Ryszard S. Michalski
@InCollection{dietterich.ea:comparative-review:83,
  author =       "Thomas G. Dietterich and Ryszard S. Michalski",
  title =        "A comparative review of selected methods for learning
                 from examples",
  pages =        "41--81",
  crossref =     "michalski.ea:machine-learning:83",
}

A comparison of ID3 and backpropagation for English text-to-speech mapping. (Preprint), T. G. Dietterich and H. Hild and G. Bakiri
Available as
compressed postscript.
@Article{dietterich.ea:comparison-id3:95,
  author =       "T. G. Dietterich and H. Hild and G. Bakiri",
  title =        "A comparison of {ID3} and backpropagation for English
                 text-to-speech mapping. (Preprint)",
  year =         "1995",
  URL =          "ftp://ftp.cs.orst.edu/users/t/tgd/papers/mlj-nettalk.ps.gz",
}

Efficient Specific-to-General Rule Induction, Pedro Domingos
@InProceedings{domingos:efficient-specific-to-general:96,
  title =        "Efficient Specific-to-General Rule Induction",
  pages =        "319",
  author =       "Pedro Domingos",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Linear-Time Rule Induction, Pedro Domingos
@InProceedings{domingos:linear-time-rule:96,
  title =        "Linear-Time Rule Induction",
  pages =        "96",
  author =       "Pedro Domingos",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Why Does Bagging Work? A Bayesian Account and its Implications, Pedro Domingos
@InProceedings{domingos:why-does:97,
  title =        "Why Does Bagging Work? {A} Bayesian Account and its
                 Implications",
  author =       "Pedro Domingos",
  pages =        "155",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Chemistry facing the phenomena of data mining idea mining and knowledge recovery, H. Dou
@Article{dou:chemistry-facing:96,
  author =       "H. Dou",
  address =      "Univ Aix Marseille 3, Crrm, Ctr St Jerome, F-13397
                 Marseille 20, France",
  title =        "Chemistry facing the phenomena of data mining idea
                 mining and knowledge recovery",
  journal =      "Analusis",
  year =         "1996",
  volume =       "24",
  issue =        "2",
  pages =        "M 8--M 12",
  keywords =     "LAW",
}

Use of artificial-intelligence techniques for the description of processes in ni/al multilayers, M. Drobnic and M. Mozetic and T. Mozetic and M. Gams
@Article{drobnic.ea:use-artificial-intelligence:96,
  author =       "M. Drobnic and M. Mozetic and T. Mozetic and M. Gams",
  address =      "Jozef Stefan Inst, Jamova 39, Ljubljana 1001, Slovenia
                 Inst Surface Engn \& Optoelect, Ljubljana 1001,
                 Slovenia High Med Coll, Ljubljana 1001, Slovenia",
  title =        "Use of artificial-intelligence techniques for the
                 description of processes in ni/al multilayers",
  journal =      "Surface \& Coatings Technology",
  year =         "1996",
  volume =       "84",
  issue =        "1-3",
  pages =        "491--494",
  abstract =     "Knowledge discovery is a novel research area in the
                 field of artificial intelligence. Its aim is to
                 discover empirical laws that govern the behavior of
                 complex systems using measurements of system variables.
                 In this paper a brief description of the GOLDHORN
                 knowledge discovery system is presented. GOLDHORN
                 discovers differential equations and has features for
                 handling noisy data, including some digital filters. In
                 the present case, this method was used to describe
                 analytically atomic migration in thin layers. A
                 multilayer structure of nickel and aluminum was
                 deposited on a copper substrate using the triode
                 sputtering system and hollow cathode CVD plasma
                 deposition. The composition of the elements in the
                 deposited layers was determined by Auger electron
                 spectroscopy (AES). The structure was then annealed for
                 different times. After annealing, the samples were
                 analyzed again. The AES data were then analyzed by the
                 GOLDHORN software package in order to obtain an
                 analytical description of atomic migration as a
                 function of the relative concentration of elements in a
                 layer. The analysis shows that the rate of migration of
                 Al in Ni depends on the relative concentrations of the
                 elements. Different phases appeared to be indicated via
                 the changes in the slope of the curve. Our results show
                 that knowledge discovery is a very useful tool for
                 analyzing complex processes such as atomic migration in
                 multilayer systems.",
  keywords =     "INTERFACE, MULTILAYER STRUCTURES, KNOWLEDGE
                 DISCOVERY",
}

Fast Committee Machines for Regression and Classification, Harris Drucker
@InProceedings{drucker:fast-committee:97,
  title =        "Fast Committee Machines for Regression and
                 Classification",
  author =       "Harris Drucker",
  pages =        "159",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Cluster analysis: a survey, Benjamin S. Duran and Patrick L. Odell
@Book{duran.ea:cluster-analysis:74,
  author =       "Benjamin S. Duran and Patrick L. Odell",
  title =        "Cluster analysis: a survey",
  year =         "1974",
  publisher =    "Spinger-Verlag",
  series =       "Lecture Notes in Economics and Mathematical Systems",
  volume =       "100",
}

Discovering dynamics, S. D\vzeroski and L. Todorovski
@InProceedings{dzeroski.ea:discovering-dynamics:93,
  author =       "S. D\v{z}eroski and L. Todorovski",
  title =        "Discovering dynamics",
  booktitle =    "Proceedings of the AAAI-93 Workshop on Knowledge
                 Discovery in Databases",
  pages =        "125--137",
  publisher =    "AAAI Press",
  year =         "1993",
}

Inductive logic programming and knowledge discovery in databases, S. D\vzeroski
@InCollection{dzeroski:inductive-logic:95,
  author =       "S. D\v{z}eroski",
  title =        "Inductive logic programming and knowledge discovery in
                 databases",
  editor =       "U. Fayyad and G. Piatetsky-Shapiro and P. Smyth and R.
                 Uthurusamy",
  booktitle =    "Advances in Knowledge Discovery and Data Mining",
  pages =        "118--152",
  year =         "1995",
  publisher =    "The MIT Press",
}

Interactive Data Visualization at AT\&T Bell Labs, Stephen G. Eick and Brian S. Johnson
Available as
bsj\_bdy.htm.
@InProceedings{eick.ea:interactive-at:95,
  author =       "Stephen G. Eick and Brian S. Johnson",
  title =        "Interactive Data Visualization at {AT}\&{T} Bell
                 Labs",
  booktitle =    "Proceedings of ACM CHI'95 Conference on Human Factors
                 in Computing Systems",
  URL =          "http://www.acm.org/sigchi/chi95/proceedings/demos/bsj\_bdy.htm",
  series =       "Demonstrations: Visualization",
  volume =       "2",
  pages =        "17--18",
  year =         "1995",
  copyright =    "(c) Copyright 1995 Association for Computing
                 Machinery",
  keywords =     "Visualization, Graphic interaction, Abstract data
                 visualization, Database visualization, Data mining",
  abstract =     "Visualization is a key technology for understanding
                 large bodies of data. Our approach to visualizing
                 abstract, non-geometric data involves a
                 reduced-representation overview, multiple linked views,
                 filtering and focusing techniques to reduce visual
                 clutter, color, and a highly-interactive user
                 interface. The reduced representations allow users to
                 see the entire data set in one view while still
                 providing immediate access to relevant detail and
                 answers to specific questions in the linked views. We
                 have developed a software infrastructure embodying our
                 design principles for producing novel, high-bandwidth
                 visualizations of corporate datasets. Our approach to
                 abstract data visualization is one the best off-ramps
                 on the information superhighway.",
}

Essay: Anne Eisenberg --- Data mining and privacy invasion on the Net, Anne Eisenberg
@Article{eisenberg:essay-anne:96,
  author =       "Anne Eisenberg",
  title =        "Essay: Anne Eisenberg --- Data mining and privacy
                 invasion on the Net",
  journal =      "Scientific American",
  volume =       "274",
  number =       "3",
  pages =        "120--??",
  month =        mar,
  year =         "1996",
  ISSN =         "0036-8733",
}

In Defence of C4.5 Notes on Learning One-Level Decision Trees, Tapio Elomaa
Available as
compressed postscript.
@Article{elomaa:defence-c4:,
  URL =          "ftp://ftp.cs.helsinki.fi/pub/Reports/by_Project/PMDM/In_Defence_of_C4.5__Notes_on_Learning_One-Level_Decision_Trees.ps.gz",
  author =       "Tapio Elomaa",
  title =        "In Defence of {C4}.5 Notes on Learning One-Level
                 Decision Trees",
  note =         "To appear in W. Cohen \& H. Hirsh (eds.), Machine
                 Learning: Proceedings of the Eleventh International
                 Conference.(New Brunswick NJ, July 1994.) Morgan
                 Kaufmann, San Francisco CA.",
  abstract =     "We discuss the implications of Holte's recently
                 published article, which demonstrated that on the most
                 commonly used data very simple classification rules are
                 almost as accurate as decision trees produced by
                 Quinlan's C4.5. We consider, in particular, what is the
                 significance of Holte's results for the future of
                 top-down induction of decision trees. To an extent,
                 Holte questioned the sense of further research on
                 multilevel decision tree learning. We go in detail
                 through all the parts of Holte's study. We try to put
                 the results into perspective. We argue that the (in
                 absolute terms) small difference in accuracy between 1R
                 and C4.5 that was witnessed by Holte is still
                 significant. We claim that C4.5 possesses additional
                 accuracy-related advantages over 1R. In addition we
                 discuss the representativeness of the databases used by
                 Holte. We compare empirically the optimal accuracies of
                 multilevel and one-level decision trees and observe
                 some significant differences. We point out several
                 deficiencies of limited-complexity classifiers.",
}

, Werner Emde and Dierich Wettschereck and Stefan Wrobel
@Article{emde.ea:uberblick:96,
  author =       "Werner Emde and Dierich Wettschereck and Stefan
                 Wrobel",
  title =        "Data Mining - Ein {\"U}berblick",
  journal =      "Unix/Mail",
  year =         "1996",
  note =         "to appear",
}

A Guided Tour through the Data Mining Jungle, Robert Engels and Guido Lindner and Rudi Studer
@InProceedings{engels.ea:guided-tour:97,
  title =        "A Guided Tour through the Data Mining Jungle",
  author =       "Robert Engels and Guido Lindner and Rudi Studer",
  pages =        "163",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Planning Tasks for Knowledge Discovery in Databases; Performing Task-Oriented User-Guidance, Robert Engels
@InProceedings{engels:planning-tasks:96,
  title =        "Planning Tasks for Knowledge Discovery in Databases;
                 Performing Task-Oriented User-Guidance",
  pages =        "170",
  author =       "Robert Engels",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Evaluation und Erweiterung eines Verfahrens zum Finden von Regelmaessigkeiten in relationalen Datenbanken, Stefan Escher
Available as
compressed postscript.
@TechReport{escher:evaluation-und:97,
  author =       "Stefan Escher",
  title =        "Evaluation und Erweiterung eines Verfahrens zum Finden
                 von Regelmaessigkeiten in relationalen Datenbanken",
  institution =  "Universitaet Stuttgart, Fakultaet Informatik,
                 Germany",
  number =       "DIP-1444",
  month =        jan # " 1",
  year =         "1997",
  keywords =     "ILP, Data Mining, Knowledge Discovery in Databases",
  URL =          "ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/DIP-1444/DIP-1444.ps.gz",
  abstract =     "In den letzten Jahren wurden die Techniken zur
                 Datenerhebung und Speicherung stark weiterentwickelt.
                 Zum Beispiel fuehren Barcodes auf nahezu allen
                 Produkten und die Automatisierung von Betriebsablaeufen
                 zu immer groesseren Datenmengen, die interpretiert
                 werden muessen. Das Problem liegt darin, dass eine
                 grosse Menge von Information vorhanden ist, das darin
                 enthaltene Wissen jedoch aufgrund der grossen
                 Datenmenge nicht zugaenglich ist. Daraus ergibt sich
                 die Notwendigkeit zur Entdeckung von Wissen in grossen
                 Datenbanken (Knowledge Discovery in Databases, Data
                 Mining). Grundlage des in dieser Diplomarbeit
                 vorgestellten Verfahrens ist das angenaeherte
                 nichtmonotone ILP (Inductive Logic Programming).
                 Gefunden werden Hornformeln, wobei eine Menge von
                 Rumpfliteralen vom Benutzer angegeben werden muss. Die
                 Qualitaet von gefundenen Klauseln wird von den
                 Messwerten Support und Confidence bestimmt.
                 Hauptsaechlich beschaeftigt sich die Diplomarbeit mit
                 der Erweiterung eines bestehenden ILP-Verfahrens um
                 eine Komponente, die numerische Attribute behandeln
                 kann",
}

Refinement of Datalog Programs, F. Esposito and A. Laterza and D. Malerba and G. Semeraro
@InProceedings{esposito.ea:refinement-datalog:96,
  author =       "F. Esposito and A. Laterza and D. Malerba and G.
                 Semeraro",
  title =        "Refinement of {D}atalog Programs",
  booktitle =    "Proceedings of the MLnet Familiarization Workshop on
                 Data Mining with Inductive Logic Programing",
  pages =        "73--94",
  year =         "1996",
}

A Density-Based Algorithm for Discovering Clusters in Large Spatial Databases with Noise, Martin Ester and Hans-Peter Kriegel and Jorg Sander and Xiaowei Xu
@InProceedings{ester.ea:density-based-algorithm:96,
  title =        "A Density-Based Algorithm for Discovering Clusters in
                 Large Spatial Databases with Noise",
  pages =        "226",
  author =       "Martin Ester and Hans-Peter Kriegel and Jorg Sander
                 and Xiaowei Xu",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Density-Connected Sets and their Application for Trend Detection in Spatial Databases,
@InProceedings{ester.ea:density-connected-sets:97,
  title =        "Density-Connected Sets and their Application for Trend
                 Detection in Spatial Databases",
  author =       "Martin Ester and Hans-Peter Kriegel and J{\"{o}}rg
                 Sander and Xiaowei Xu",
  pages =        "10",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Knowledge discovery in large spatial databases - focusing techniques for efficient class identification, M. Ester and H. P. Kriegel and X. W. Xu
@Article{ester.ea:large-spatial:95,
  author =       "M. Ester and H. P. Kriegel and X. W. Xu",
  address =      "Univ Munich, Inst Comp Sci, Leopoldstr 11B, D-80802
                 Munich, Germany",
  title =        "Knowledge discovery in large spatial databases -
                 focusing techniques for efficient class
                 identification",
  journal =      "Lecture Notes In Computer Science",
  year =         "1995",
  volume =       "951",
  pages =        "67--82",
  abstract =     "Both, the number and the size of spatial databases are
                 rapidly growing because of the large amount of data
                 obtained from satellite images, X-ray crystallography
                 or other scientific equipment. Therefore, automated
                 knowledge discovery becomes more and more important in
                 spatial databases. So far, most of the methods for
                 knowledge discovery in databases (KDD) have been based
                 on relational database systems. In this paper, we
                 address the task of class identification in spatial
                 databases using clustering techniques. We put special
                 emphasis on the integration of the discovery methods
                 with the DB interface, which is crucial for the
                 efficiency of KDD on large databases. The key to this
                 integration is the use of a well-known spatial access
                 method, the R*-tree. The focusing component of a KDD
                 system determines which parts of the database are
                 relevant for the knowledge discovery task. We present
                 several strategies for focusing: selecting
                 representatives from a spatial database, focusing on
                 the relevant clusters and retrieving all objects of a
                 given cluster. We have applied the proposed techniques
                 to real data from a large protein database used for
                 predicting protein-protein docking. A performance
                 evaluation on this database indicates that clustering
                 on large spatial databases can be performed, both,
                 efficiently and effectively.",
  keywords =     "PROTEIN",
}

Discovering Functional and Inclusion Dependancies in Relational Databases, Martti Kantola etal.
@Article{etal:discovering-functional:92,
  crossref =     "ijis-special-issue:92",
  author =       "Martti Kantola etal.",
  title =        "Discovering Functional and Inclusion Dependancies in
                 Relational Databases",
  pages =        "591--607",
}

Overcoming Process Delays with Decision Tree Induction, Bob Evans and Doug Fisher
@Article{evans.ea:overcoming-process:94,
  author =       "Bob Evans and Doug Fisher",
  title =        "Overcoming Process Delays with Decision Tree
                 Induction",
  journal =      "IEEE Expert",
  year =         "1994",
  pages =        "60--66",
  month =        feb,
  keywords =     "Knowledge Acquisition, Decision Trees, ID3",
}

CLARIT, David A. Evans
@InProceedings{evans:clarit:95,
  author =       "David A. Evans",
  title =        "{CLARIT}",
  booktitle =    "Proceedings of the Eighteenth Annual International ACM
                 SIGIR Conference on Research and Development in
                 Information Retrieval",
  series =       "Systems Demonstrations: Abstracts",
  pages =        "360",
  year =         "1995",
  copyright =    "(c) Copyright 1995 Association for Computing
                 Machinery",
  abstract =     "The CLARIT system consists of a set of flexible tools
                 for application in a wide range of information
                 management problems. These tools integrate
                 natural-language processing (NLP), automatic knowledge
                 discovery, and traditional information retrieval
                 techniques. An advanced functionality application for
                 free-text database management is demonstrated,
                 incorporating full NLP, a broad range of querying
                 mechanisms, automatic or user controlled query
                 expansion, document collection profiling, document
                 summarization, automatic document classification, and
                 integrated handling of scanned images. The application
                 provides rapid analysis of potentially large queries
                 over large-scale databases in monolithic or
                 client/server processing modes.",
}

Constructing bayesian networks to predict uncollectible telecommunications accounts, K. J. Ezawa and S. W. Norton
@Article{ezawa.ea:constructing-bayesian:96,
  author =       "K. J. Ezawa and S. W. Norton",
  address =      "At\&T Bell Labs, Consumer Lab, Tech Staff, 600 Mt Ave,
                 Rm 7E-523, Murray Hill, Nj, 07974",
  title =        "Constructing bayesian networks to predict
                 uncollectible telecommunications accounts",
  journal =      "Ieee Expert-Intelligent Systems \& Their
                 Applications",
  year =         "1996",
  volume =       "11",
  issue =        "5",
  pages =        "45--51",
  keywords =     "EXPERT-SYSTEMS",
}

Interaction Selection and Complexity Control for Learning in Binarized Domains, Gerald Fahner
@TechReport{fahner:interaction-selection:96,
  author =       "Gerald Fahner",
  title =        "Interaction Selection and Complexity Control for
                 Learning in Binarized Domains",
  institution =  "International Computer Science Institute",
  number =       "TR-96-001",
  address =      "Berkeley, CA",
  month =        may,
  year =         "1996",
  keywords =     "learning algorithms, feature selection,
                 Walsh-functions, input-space representation, complexity
                 measures, capacity control, model comparison",
  abstract =     "We empirically investigate the potential of a novel,
                 greatly simplified classifier design for binarized
                 data. The generic model allocates a sparse, _digital_
                 hidden layer comprised of interaction nodes that
                 compute PARITY of selected submasks of input bits,
                 followed by a sigmoidal output node with adjustable
                 weights. Model identification incorporates
                 user-assigned complexity preferences. We discuss the
                 situations: a) when the input space obeys a metrics b)
                 when the inputs are discrete attributes We propose a
                 family of respective model priors that make search
                 through the combinatorial space of multi-input
                 interactions feasible. Model capacity and smoothness of
                 the approximation are controlled by two complexity
                 parameters. Model comparison over the parameter plane
                 discovers models with excellent performance. In some
                 cases interpretable structures are achieved. We point
                 out the significance of our novel data mining tool for
                 overcoming scaling problems, impacts on real-time
                 systems, and possible contributions to the development
                 of non-standard computing devices for inductive
                 inference.",
}

Data Mining with Sparse and Simplified Interaction Selection, Gerald Fahner
@InProceedings{fahner:with-sparse:96,
  title =        "Data Mining with Sparse and Simplified Interaction
                 Selection",
  pages =        "359",
  author =       "Gerald Fahner",
  crossref =     "simoudis.ea:proceedings-second:96",
}

FastMap: A Fast Algorithm for Indexing, Data-Mining and Visualization of Traditional and Multimedia Datasets, Christos Faloutsos and King-Ip Lin
@InProceedings{faloutsos.ea:fastmap-fast:95a,
  key_modifier = "a",
  title =        "{FastMap}: {A} Fast Algorithm for Indexing,
                 Data-Mining and Visualization of Traditional and
                 Multimedia Datasets",
  author =       "Christos Faloutsos and King-Ip Lin",
  editor =       "Michael J. Carey and Donovan A. Schneider",
  booktitle =    "Proceedings of the 1995 {ACM} {SIGMOD} International
                 Conference on Management of Data",
  address =      "San Jose, California",
  month =        "22--25~" # may,
  year =         "1995",
  pages =        "163--174",
}

FastMap: a fast algorithm for indexing, data-mining and visualization of traditional and multimedia datasets, C. Faloutsos and King-Ip Lin
@Article{faloutsos.ea:fastmap-fast:95b,
  key_modifier = "b",
  author =       "C. Faloutsos and King-Ip Lin",
  title =        "{FastMap}: a fast algorithm for indexing, data-mining
                 and visualization of traditional and multimedia
                 datasets",
  journal =      "SIGMOD Record (ACM Special Interest Group on
                 Management of Data)",
  volume =       "24",
  number =       "2",
  pages =        "163--174",
  month =        jun,
  year =         "1995",
  ISSN =         "0163-5808",
  affiliation =  "AT\&T Bell Labs., Murray Hill, NJ, USA",
  classification = "C6160 (Database management systems (DBMS)); C6170K
                 (Knowledge engineering techniques); C7240 (Information
                 analysis and indexing); C6160S (Spatial and pictorial
                 databases); C4240 (Programming and algorithm theory)",
  keywords =     "FastMap; Fast algorithm; Indexing; Data-mining;
                 Visualization; Multimedia datasets; Traditional
                 datasets; Feature-extraction functions; Domain expert;
                 Highly fine-tuned spatial access methods; Best-match
                 query; K-dimensional space; Potential clusters; Pattern
                 recognition",
  thesaurus =    "Feature extraction; Indexing; Knowledge acquisition;
                 Multimedia computing; Pattern matching",
}

FastMap: A Fast Algorithm for Indexing, Data-Mining and Visualization of Traditional and Multimedia Datasets, Christos Faloutsos and King-Ip (David) Lin
Available as
compressed postscript.
@TechReport{faloutsos.ea:fastmap-fast:95c,
  key_modifier = "c",
  author =       "Christos Faloutsos and King-Ip (David) Lin",
  title =        "FastMap: {A} Fast Algorithm for Indexing, Data-Mining
                 and Visualization of Traditional and Multimedia
                 Datasets",
  institution =  "University of Maryland Institute for Advanced Computer
                 Studies Dept. of Computer Science, Univ. of Maryland",
  number =       "CS-TR-3383",
  address =      "College Park, MD",
  month =        jan,
  year =         "1995",
  URL =          "ftp://ftp.cs.umd.edu/pub/papers/papers/3383/3383.ps.Z",
  abstract =     "A very promising idea for fast searching in
                 traditional and multimedia databases is to map objects
                 into points in k-d space, using k feature-extraction
                 functions, provided by a domain expert rJag91]. Thus.
                 we can subsequently use highly fine-tuned spatia l
                 access methods (SAMs), to answer several types of
                 queries, including the 'Query By Example' type (which
                 translates to a range query); the 'all pairs' query
                 (which translates to a spatial join [BKSS94]); the
                 nearest-neighbor or best-match query, etc. \par
                 However, designing feature extraction functions can be
                 hard. It is relatively easier for a domain expert to
                 assess the similarity/distance of two objects. Given
                 only the distance information though, it is not obvious
                 how to map objects into points. \par This is exactly
                 the topic of this paper. We describe a fast algorithm
                 to map objects into points in some k-dimensional space
                 (k is user-defined), such that the dissimilarities are
                 preserved. There are two benefits from this mapping:
                 (a) efficient retriev al, in conjunction with a SAM, as
                 discussed before and (b) visualization and data-mining:
                 the objects can now be plotted as points in 2-d or Sd
                 space, revealing potential clusters, correlations among
                 attributes and other regularities that data-mining is l
                 ooking for. \par We introduce an older method from
                 pattern recognition, namely, Multi-Dimcnsional Scaling
                 (MDS) [Tor52]; although unsuitable for indexing, we use
                 it as yardstick for our method. Then, we propose a much
                 faster algorithm to solve the problem in hand, while in
                 addition it allows for indexing. Experiments on real
                 and synthetic data indeed show that the proposed
                 algorithm is significantly faster than MDS, (being
                 linear, as opposed to quadratic, on the database size
                 N), while it manages to preserve distances an d the
                 overall structure of the data-set. \par (Also
                 cross-referenced as UMIACS-TR-94-132)",
}

Combining Data Mining and Machine Learning for Effective User Profiling, Tom Fawcett and Foster Provost
@InProceedings{fawcett.ea:combining-machine:96,
  title =        "Combining Data Mining and Machine Learning for
                 Effective User Profiling",
  page =         "8",
  author =       "Tom Fawcett and Foster Provost",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Advances in Knowledge Discovery and Data Mining, U. M. Fayyad and G. Piatetsky-Shapiro and P. Smyth and R. Uthurusamy (Eds)
@Book{fayyad.ea:advances:96,
  editor =       "U. M. Fayyad and G. Piatetsky-Shapiro and P. Smyth and
                 R. Uthurusamy",
  title =        "Advances in Knowledge Discovery and Data Mining",
  publisher =    "MII Press",
  address =      "Mento Park",
  year =         "1996",
  ISBN =         "0-262-56097-6",
  descriptor =   "Data Mining, Daten",
}

The Attribute Selection Problem in Decision Tree Generation, U. M. Fayyad and K. B. Irani
@InProceedings{fayyad.ea:attribute-selection:92,
  author =       "U. M. Fayyad and K. B. Irani",
  title =        "The Attribute Selection Problem in Decision Tree
                 Generation",
  year =         "1992",
  booktitle =    "Proc.\ of AAAI-92",
  pages =        "104--110",
}

Automated cataloging and analysis of ski survey image databases: the SKICAT system, Usama M. Fayyad and Nicholas Weir and S. Djorgovski
@InProceedings{fayyad.ea:automated-cataloging:93,
  author =       "Usama M. Fayyad and Nicholas Weir and S. Djorgovski",
  title =        "Automated cataloging and analysis of ski survey image
                 databases: the {SKICAT} system",
  booktitle =    "Proc. of the second Int. Conf. on Information and
                 Knowledge Management",
  address =      "Washington DC",
  pages =        "527--536",
  year =         "1993",
}

Data Mining and Knowledge Discovery in Databases, Usama Fayyad and Ramasamy Uthurusamy
@Article{fayyad.ea:data-mining:96,
  author =       "Usama Fayyad and Ramasamy Uthurusamy",
  address =      "Microsoft Corp, Res, Redmond, Wa, 98052 Gm Corp,
                 Knowledge \& Decis Support, Detroit, Mi, 48202",
  title =        "Data Mining and Knowledge Discovery in Databases",
  journal =      "Communications of the ACM",
  volume =       "39",
  number =       "11",
  pages =        "24--27",
  month =        nov,
  year =         "1996",
  ISSN =         "0001-0782",
}

From digitized images to online catalogs - data mining a sky survey, U. M. Fayyad and S. G. Djorgovski and N. Weir
@Article{fayyad.ea:digitized-images:96,
  author =       "U. M. Fayyad and S. G. Djorgovski and N. Weir",
  address =      "Microsoft Res, Redmond, Ca Caltech, Jpl, Machine
                 Learning Syst Grp, Pasadena, Ca, 91125",
  title =        "From digitized images to online catalogs - data mining
                 a sky survey",
  journal =      "Ai Magazine",
  year =         "1996",
  volume =       "17",
  issue =        "2",
  pages =        "51--66",
  abstract =     "The value of scientific digital-image libraries seldom
                 lies in the pixels of images. For large collections of
                 images, such as those resulting from astronomy sky
                 surveys, the typical useful product is an online
                 database cataloging entries of interest. We focus on
                 the automation of the cataloging effort of a major sky
                 survey and the availability of digital libraries in
                 general. The SKICAT system automates the reduction and
                 analysis of the three terabytes worth of images,
                 expected to contain on the order of 2 billion sky
                 objects. For the primary scientific analysis of these
                 data, it is necessary to detect, measure, and classify
                 every sky object. SKICAT integrates techniques for
                 image processing, classification learning, database
                 management, and visualization. The learning algorithms
                 are trained to classify the detected objects and can
                 classify objects too faint for visual classification
                 with an accuracy level exceeding 90 percent. This
                 accuracy level increases the number of classified
                 objects in the final catalog threefold relative to the
                 best results from digitized photographic sky surveys to
                 date. Hence, learning algorithms played a powerful and
                 enabling role and solved a difficult, scientifically
                 significant problem, enabling the consistent, accurate
                 classification and the ease of access and analysis of
                 an otherwise unfathomable data set.",
}

Mining Scientific Data, Usama Fayyad and David Haussler and Paul Stolorz
@Article{fayyad.ea:scientific:96,
  author =       "Usama Fayyad and David Haussler and Paul Stolorz",
  title =        "Mining Scientific Data",
  journal =      "Communications of the ACM",
  volume =       "39",
  number =       "11",
  pages =        "51--57",
  month =        nov,
  year =         "1996",
  ISSN =         "0001-0782",
}

From data mining to knowledge discovery in databases, U. Fayyad and G. Piatetsky-Shapiro and P. Smyth
@Article{fayyad.ea:to:96,
  author =       "U. Fayyad and G. Piatetsky-Shapiro and P. Smyth",
  address =      "Univ Calif Irvine, Dept Comp \& Informat Sci, Irvine,
                 Ca, 92717 Gte Labs Inc, Knowledge Discovery Databases
                 Kdd Project, Tech Staff, Waltham, Ma, 02254",
  title =        "From data mining to knowledge discovery in databases",
  journal =      "Ai Magazine",
  year =         "1996",
  volume =       "17",
  issue =        "3",
  pages =        "37--54",
  abstract =     "Data mining and knowledge discovery in databases have
                 been attracting a significant amount of research,
                 industry, and media attention of late. What is all the
                 excitement about? This article provides an overview of
                 this emerging field, clarifying how data mining and
                 knowledge discovery in databases are related both to
                 each other and to related fields, such as machine
                 learning, statistics, and databases. The article
                 mentions particular real-world applications, specific
                 data-mining techniques, challenges involved in real-
                 world applications of knowledge discovery, and current
                 and future research directions in the field.",
  keywords =     "NEURAL NETWORKS",
}

Knowledge Discovery and Data Mining: Towards a Unifying Framework, Usama Fayyad and Gregory Piatetsky-Shapiro and Padhraic Smyth
@InProceedings{fayyad.ea:towards-unifying:96,
  title =        "Knowledge Discovery and Data Mining: Towards a
                 Unifying Framework",
  pages =        "82",
  author =       "Usama Fayyad and Gregory Piatetsky-Shapiro and
                 Padhraic Smyth",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Data Mining and Knowledge Discovery in Databases: Applications in Astronomy and Planetary Science (Invited Talk), Usama M. Fayyad
@InProceedings{fayyad:applications-astronomy:96a,
  key_modifier = "a",
  author =       "Usama M. Fayyad",
  title =        "Data Mining and Knowledge Discovery in Databases:
                 Applications in Astronomy and Planetary Science
                 (Invited Talk)",
  pages =        "1590--1592",
  booktitle =    "Proceedings of the Thirteenth National Conference on
                 Artificial Intelligence and the Eighth Innovative
                 Applications of Artificial Intelligence Conference",
  month =        aug # "4--8~",
  publisher =    "AAAI Press / MIT Press",
  address =      "Menlo Park",
  year =         "1996",
}

Data mining and knowledge discovery - making sense out of data, U. M. Fayyad
@Article{fayyad:making-sense:96b,
  key_modifier = "b",
  author =       "U. M. Fayyad",
  address =      "Microsoft Res, 1 Microsoft Way 9-S, Redmond, Wa,
                 98052",
  title =        "Data mining and knowledge discovery - making sense out
                 of data",
  journal =      "Ieee Expert-Intelligent Systems \& Their
                 Applications",
  year =         "1996",
  volume =       "11",
  issue =        "5",
  pages =        "20--25",
}

Learning from Biased Data Using Mixture Models, A. J. Feelders
@InProceedings{feelders:learning-biased:96,
  title =        "Learning from Biased Data Using Mixture Models",
  pages =        "102",
  author =       "A. J. Feelders",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Mining Associations in Text in the Presence of Background Knowledge, Ronen Feldman and Haym Hirsh
@InProceedings{feldman.ea:associations-text:96,
  title =        "Mining Associations in Text in the Presence of
                 Background Knowledge",
  pages =        "343",
  author =       "Ronen Feldman and Haym Hirsh",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Maximal Association Rules: A New Tool for Mining for Keyword Co-Occurrences in Document Collections, Ronen Feldman and Yonatan Aumann and Amihood Amir and Willi Kloesgen Amir Zilberstein
@InProceedings{feldman.ea:maximal-association:97,
  title =        "Maximal Association Rules: {A} New Tool for Mining for
                 Keyword Co-Occurrences in Document Collections",
  author =       "Ronen Feldman and Yonatan Aumann and Amihood Amir and
                 Willi Kloesgen Amir Zilberstein",
  pages =        "167",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Visualization Techniques to Explore Data Mining Results for Document Collections,
@InProceedings{feldman.ea:techniques-to:97,
  title =        "Visualization Techniques to Explore Data Mining
                 Results for Document Collections",
  author =       "Ronen Feldman and Willi Kl{\"{o}}sgen and Amir
                 Zilberstein",
  pages =        "16",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Applying AI Clustering to Engineering Tasks., Doug Fisher and etal.
@Article{fisher.ea:applying-ai:93,
  author =       "Doug Fisher and etal.",
  title =        "Applying {AI} Clustering to Engineering Tasks.",
  journal =      "IEEE Expert",
  year =         "1993",
  pages =        "51--60",
  month =        dec,
  keywords =     "COBWEB, Clustering, Applications, Time Series",
  annote =       "Application of COBWEB to fault diagnosis, Bridge
                 design and human gait analysis. 17 references",
}

Iterative Optimization and Simplification of Hierarchical Clusterings, Doug Fisher
Available as
hypertext.
@TechReport{fisher:iterative-optimization:,
  URL =          "http://cswww.vuse.vanderbilt.edu/~dfisher/tech-reports/tr-95-01.html",
  title =        "Iterative Optimization and Simplification of
                 Hierarchical Clusterings",
  number =       "Technical Report CS-95-01",
  author =       "Doug Fisher",
  abstract =     "Clustering is often used for discovering structure in
                 data. Clustering systems differ in the objective
                 function used to evaluate clustering quality and the
                 control strategy used to search the space of
                 clusterings. Ideally, the search strategy should
                 consistently construct clusterings of high quality, but
                 be computationally inexpensive as well. In general, we
                 cannot have it both ways, but we can partition the
                 search so that a system inexpensively constructs a
                 `tentative' clustering for initial examination,
                 followed by iterative optimization, which continues to
                 search in background for improved clusterings. Given
                 this motivation, we evaluate an inexpensive `sorting'
                 strategy coupled with several control strategies for
                 iterative optimization, each of which repeatedly
                 modifies an initial clustering in search of a better
                 one. One of these optimization strategies, inspired by
                 work on macro-operator learning, appears to be novel in
                 the clustering literature. Once a clustering has been
                 constructed it is judged by analysts -- often according
                 to task-specific criteria. Several authors have
                 abstracted these criteria and posited a generic
                 performance task akin to pattern completion, where the
                 error rate over completed patterns is used to
                 `externally' judge clustering utility. Given this
                 performance task we adapt resampling-based pruning
                 strategies used by supervised learning systems to the
                 task of simplifying hierarchical clusterings, thus
                 promising to ease post-clustering analysis. Finally, we
                 propose a number of objective functions, based on
                 attribute-selection measures for decision-tree
                 induction, that might perform well on the error rate
                 and simplicity dimensions.",
  keywords =     "clustering, iterative optimization, cluster
                 validation, resampling, pruning, objective functions",
}

Iterative Optimization and Simplification of Hierarchical Clusterings, Doug Fisher
Available as
hypertext.
@Article{fisher:iterative-optimization:96,
  URL =          "http://cswww.vuse.vanderbilt.edu/~dfisher/jair-96/html-final/html-final.html",
  title =        "Iterative Optimization and Simplification of
                 Hierarchical Clusterings",
  author =       "Doug Fisher",
  year =         "1996",
  journal =      "Journal of Artificial Intelligence Research",
  volume =       "4",
  pages =        "147--180",
  abstract =     "

Inductive characterisation of database relations., P. A. Flach
Available as
compressed postscript.
@Article{flach:inductive-characterisation:90,
  URL =          "ftp://ftp.gmd.de/MachineLearning/ILP/public/papers/flach-ITKreport23.ps.Z",
  title =        "Inductive characterisation of database relations.",
  author =       "P. A. Flach",
  year =         "1990",
  note =         "In Proc. International Symposium on Methodologies for
                 Intelligent Systems, Z.W. Ras, M. Zemankowa \& M.L.
                 Emrich (eds.), pp. 371-378, North-Holland, Amsterdam.
                 ITK Research Report No. 23.",
}

10 hottest technologies in telecom, Patrick Flanagan
@Article{flanagan:10-hottest:96,
  author =       "Patrick Flanagan",
  title =        "10 hottest technologies in telecom",
  journal =      "Telecommunications (Americas Edition)",
  volume =       "30",
  number =       "5",
  month =        may,
  year =         "1996",
  ISSN =         "0278-4831",
  classification = "716.1; 722.3; 723.1.1; 901",
  journalabr =   "Telecommunications Am Ed",
  keywords =     "Asynchronous transfer mode; Automated network
                 management; Cable modems; Computer networks; Computer
                 programming languages; Data mining; Electric relays;
                 Internet appliances; Intranet; Java programming
                 language; Local area networks; Modems; Personal
                 communication systems; Personal satellite phones;
                 Technology; Telecommunication; Telecommunication
                 systems; Telecommunication technology; Voice over frame
                 relay; Voice/data communication systems",
  pages =        "6",
}

A Genetic Algorithm-Based Approach to Data Mining, Ian W. Flockhart and Nicholas J. Radcliffe
@InProceedings{flockhart.ea:genetic-algorithm-based:96,
  title =        "A Genetic Algorithm-Based Approach to Data Mining",
  pages =        "299",
  author =       "Ian W. Flockhart and Nicholas J. Radcliffe",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Inductive Learning for Expert Systems, Richard Forsyth
@InCollection{forsyth:inductive-learning:89,
  author =       "Richard Forsyth",
  title =        "Inductive Learning for Expert Systems",
  booktitle =    "Expert Systems Principles and Case Studies",
  publisher =    "Chapman and Hall, New York",
  year =         "1989",
}

Knowledge discovery in databases: an overview, W. J. Frawley and G. Piatetsky-Shapiro and C. J. Matheus
@InProceedings{frawley.ea:overview:91,
  author =       "W. J. Frawley and G. Piatetsky-Shapiro and C. J.
                 Matheus",
  title =        "Knowledge discovery in databases: an overview",
  editor =       "G. Piatetsky-Shapiro and W. J. Frawley",
  booktitle =    "Knowledge discovery in databases",
  pages =        "1--27",
  publisher =    "AAAI Press/MIT Press",
  address =      "Menlo Park, CA/Cambridge, MA",
  year =         "1991",
}

Knowledge Discovery in Databases: An Overview., W. Frawley and G. Piatetsky-Shapiro and C. Matheus
@Article{frawley.ea:overview:92a,
  key_modifier = "a",
  author =       "W. Frawley and G. Piatetsky-Shapiro and C. Matheus",
  title =        "Knowledge Discovery in Databases: An Overview.",
  journal =      "AI Magazine",
  year =         "1992",
  pages =        "213--228",
  month =        "Fall (Autumn)",
  abstract =     "After a decade of fundamental interdisciplinary
                 research in machine learning, the spadework in this
                 field has been done; the 1990s should see the
                 widespread exploitation of knowledge discovery as an
                 aid to assembling knowledge bases. The contributors to
                 the AAAI Press book Knowledge Discovery in Databases
                 were excited at the potential benefits of this
                 research. The editors hope that some of this excitement
                 will communicate itself to AI Magazine readers of this
                 article.",
  note =         "Reprint of the introductory chapter of {\em Knowledge
                 Discovery in Databases} collection, AAAI/MIT Press,
                 1991.",
  annote =       "Conflicting page numbers in another bibtex entry!",
}

Knowledge discovery in databases - an overview, W. J. Frawley and G. Piatetsky-Shapiro and C. J. Matheus
@Article{frawley.ea:overview:92b,
  key_modifier = "b",
  author =       "W. J. Frawley and G. Piatetsky-Shapiro and C. J.
                 Matheus",
  address =      "Gte Labs Inc, Distributed Cooperating Learning Syst
                 Project, Waltham, Ma, 02254 Gte Labs Inc, Knowledge
                 Discovery Databases Project, Waltham, Ma, 02254",
  title =        "Knowledge discovery in databases - an overview",
  journal =      "Ai Magazine",
  year =         "1992",
  volume =       "13",
  issue =        "3",
  pages =        "57--70",
  abstract =     "After a decade of fundamental interdisciplinary
                 research in machine learning, the spadework in this
                 field has been done; the 1990s should see the
                 widespread exploitation of knowledge discovery as an
                 aid to assembling knowledge bases. The contributors to
                 the AAAI Press book Knowledge Discovery in Databases
                 were excited at the potential benefits of this
                 research. The editors hope that some of this excitement
                 will communicate itself to AI Magazine readers of this
                 article.",
  annote =       "Conflicting page numbers in another bibtex entry!",
}

Using function to encode domain and contextual knowledge in statistical induction, W. Frawley
@Article{frawley:using-function:91,
  crossref =     "piatetsky-shapiro.ea:knowledge-discovery:91",
  editor =       "Gregory Piatetsky-Shapiro and William J. Frawley",
  booktitle =    "Knowledge Discovery in Databases",
  publisher =    "AAAI Press / The MIT Press",
  address =      "Menlo Park, California",
  edition =      "1st",
  year =         "1991",
  author =       "W. Frawley",
  title =        "Using function to encode domain and contextual
                 knowledge in statistical induction",
  annote =       "Details of the FBI system for decision tree
                 induction",
}

A data-parallel primitive for high-performance knowledge discovery in large databases, S. H. Freitas and A. A. Lavington
Available as
compressed postscript.
@TechReport{freitas.ea:data-parallel-primitive:95,
  URL =          "ftp://ftp.essex.ac.uk/pub/csc/technical-reports/CSM-242.ps.Z",
  title =        "A data-parallel primitive for high-performance
                 knowledge discovery in large databases",
  author =       "S. H. Freitas and A. A. Lavington",
  number =       "Internal Report CSM-242",
  institution =  "University of Essex, UK",
  month =        may,
  year =         "1995",
  abstract =     "Efficiency is crucial in KDD (Knowledge Discovery in
                 Databases), due to the huge amount of data stores in
                 current databases. We argue that high efficiency in KDD
                 can be achieved by combining two approaches, namely
                 encapsulating KDD functionally within standard DBMS
                 operations and using parallel processing. Hence, KDD
                 tasks can be executed on a back-end SQL server, e.g. a
                 parallel DB machine. We propose a KDD primitive (a set
                 of basic operations) which underlies the candidate-rule
                 evaluation procedures of many KDD algorithms. We
                 compare and analyse the time required to carry out this
                 primitive on three different computational
                 architecture, viz. a conventional workstation and two
                 parallel DB machines. The main advantages of
                 encapsulating a KDD primitive in a parallel DB server
                 are automatic parallelization and the run-time speed
                 which can be achieved through parallel processing.",
}

Parallel Data Mining for Very Large Relational Databases, A. A. Freitas and S. H. Lavington
@Article{freitas.ea:parallel-very:96,
  author =       "A. A. Freitas and S. H. Lavington",
  title =        "Parallel Data Mining for Very Large Relational
                 Databases",
  journal =      "Lecture Notes in Computer Science",
  volume =       "1067",
  pages =        "158--??",
  year =         "1996",
  ISSN =         "0302-9743",
}

Speeding up Knowledge Discovery in Large Relational Databases by Means of a New Discretization Algorithm, A. A. Freitas and S. H. Lavington
@Article{freitas.ea:speeding-up:96,
  author =       "A. A. Freitas and S. H. Lavington",
  title =        "Speeding up Knowledge Discovery in Large Relational
                 Databases by Means of a New Discretization Algorithm",
  journal =      "Lecture Notes in Computer Science",
  volume =       "1094",
  pages =        "124--??",
  year =         "1996",
  ISSN =         "0302-9743",
}

Lazy Decision Trees, Jerome Friedman and Ron Kohavi and Yeogirl Yun
Available as
ronnyk.
@InProceedings{friedman.ea:lazy-decision:96,
  author =       "Jerome Friedman and Ron Kohavi and Yeogirl Yun",
  title =        "Lazy Decision Trees",
  booktitle =    "Proceedings of the Thirteenth National Conference on
                 Artificial Intelligence",
  publisher =    "AAAI Press and the MIT Press",
  year =         "1996",
  pages =        "717--724",
  URL =          "http://robotics.stanford.edu/users/ronnyk",
  month =        aug,
  contributedby = "Ronny Kohavi, ronnyk(at)sgi.com",
}

Mining Optimized Association Rules for Numeric Attributes, T. Fukuda and Y. Morimoto and S. Morishita and T. Tokuyama
@InProceedings{fukuda.ea:optimized-association:96,
  author =       "T. Fukuda and Y. Morimoto and S. Morishita and T.
                 Tokuyama",
  title =        "Mining Optimized Association Rules for Numeric
                 Attributes",
  editor =       "{ACM}",
  booktitle =    "Proceedings of the Fifteenth {ACM}
                 {SIGACT}-{SIGMOD}-{SIGART} Symposium on Principles of
                 Database Systems, {PODS} 1996, Montr{\'e}al, Canada,
                 June 3--5, 1996",
  volume =       "15",
  publisher =    "ACM Press",
  address =      "New York, NY 10036, USA",
  year =         "1996",
  series =       "Proceedings of the ACM SIGACT SIGMOD SIGART Symposium
                 on Principles of Database Systems",
  pages =        "182--191",
  annote =       "Held in conjunction with the 1996 ACM SIGMOD
                 international conference on management of data. Also
                 known as PODS 1996",
  keywords =     "database systems; PODS; ACM; SIGMOD; SIGART; SIGACT",
}

Data Mining using Two-dimensional Optimized Association Rules: Scheme, Algorithms, and Visualization, Takeshi Fukuda and Yasuhiko Morimoto and Shinichi Morishita and Takeshi Tokuyama
@InProceedings{fukuda.ea:using-two-dimensional:96,
  title =        "Data Mining using Two-dimensional Optimized
                 Association Rules: Scheme, Algorithms, and
                 Visualization",
  author =       "Takeshi Fukuda and Yasuhiko Morimoto and Shinichi
                 Morishita and Takeshi Tokuyama",
  editor =       "H. V. Jagadish and Inderpal Singh Mumick",
  booktitle =    "Proceedings of the 1996 {ACM} {SIGMOD} International
                 Conference on Management of Data",
  address =      "Montreal, Quebec, Canada",
  month =        "4--6~" # jun,
  year =         "1996",
  pages =        "13--23",
}

Local Induction of Decision Trees: Towards Interactive Data Mining, Truxton Fulton and Steven Salzberg and Simon Kasif and David Waltz
@InProceedings{fulton.ea:local-induction:96,
  title =        "Local Induction of Decision Trees: Towards Interactive
                 Data Mining",
  pages =        "14",
  author =       "Truxton Fulton and Steven Salzberg and Simon Kasif and
                 David Waltz",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Knowledge discovery in international conflict databases, J. Furnkranz and J. Petrak and R. Trappl
@Article{furnkranz.ea:international-conflict:97,
  author =       "J. Furnkranz and J. Petrak and R. Trappl",
  address =      "Austrian Res Inst Artificial Intelligence,
                 Schottengasse 3, a-1010 Vienna, Austria Austrian Res
                 Inst Artificial Intelligence, a-1010 Vienna, Austria",
  title =        "Knowledge discovery in international conflict
                 databases",
  journal =      "Applied Artificial Intelligence",
  year =         "1997",
  volume =       "11",
  issue =        "2",
  pages =        "91--118",
  abstract =     "Artificial intelligence (AI) is heavily supported by
                 military institutions, while practically no effort goes
                 into the investigation of possible contributions of AI
                 to the avoidance and termination of crises and wars.
                 This article rakes a first step in this direction by
                 investigating the use of machine learning techniques
                 for discovering knowledge in international conflict and
                 conflict management databases. We have applied
                 similarity-based case retrieval to the KOSIMO database
                 of international conflicts. Furthermore, we present
                 results of analyzing the CONFMAN database of successful
                 and unsuccessful conflict management attempts with an
                 inductive decision tree learning algorithm. The latter
                 approach seems to be particularly promising, as
                 conflict management events apparently are more
                 repetitive and thus better suited for machine-aided
                 analysis.",
  keywords =     "MEDIATION",
}

A Comparison of Pruning Methods for Relational Concept Learning,
@InProceedings{furnkranz:comparison-pruning:94,
  author =       "J. F{\"{u}}rnkranz",
  title =        "A Comparison of Pruning Methods for Relational Concept
                 Learning",
  booktitle =    "Proceedings of the AAAI-94 Workshop on Knowledge
                 Discovery in Databases",
  year =         "1994",
}

Induction of meta-knowledge about knowledge discovery, B. R. Gaines and P. Compton
@Article{gaines.ea:induction-meta-knowledge:93,
  author =       "B. R. Gaines and P. Compton",
  address =      "Univ Calgary, Inst Knowledge Sci, Calgary T2N 1N4, Ab,
                 Canada Univ New S Wales, Dept Comp Sci, Sydney, Nsw
                 2033, Australia",
  title =        "Induction of meta-knowledge about knowledge
                 discovery",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1993",
  volume =       "5",
  issue =        "6",
  pages =        "990--992",
  abstract =     "A study is reported of the use of ripple-down rule
                 induction to develop a meta-model of ten years of
                 clinical data captured as part of the development of an
                 expert system for thyroid diagnosis. The study shows
                 how the suitability for inductive knowledge discovery
                 of such real-world data can be characterized in terms
                 of its stationarity, and how the best error rates
                 achievable and the amount of data necessary to achieve
                 them, can be estimated.",
  keywords =     "GARVAN THYROID DATABASE, INDUCT, INDUCTION, KNOWLEDGE
                 DISCOVERY, MACHINE LEARNING, MEDICAL DIAGNOSIS,
                 METAMODELING, META-KNOWLEDGE, RIPPLE-DOWN RULES, RULES
                 WITH EXCEPTIONS",
}

Improving Scalability in a Scientific Discovery System by Exploiting Parallelism, Gehad Galal and Diane J. Cook and Lawrence B. Holder
@InProceedings{galal.ea:improving-scalability:97,
  title =        "Improving Scalability in a Scientific Discovery System
                 by Exploiting Parallelism",
  author =       "Gehad Galal and Diane J. Cook and Lawrence B. Holder",
  pages =        "171",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Connectionist Expert Systems, Stephen I. Gallant
@Article{gallant:connectionist-expert:88,
  author =       "Stephen I. Gallant",
  title =        "Connectionist Expert Systems",
  journal =      "Communications of the ACM",
  year =         "1988",
  volume =       "32",
  number =       "2",
  pages =        "153--168",
}

Mining Entity-Identification Rules for Database Integration, M. Ganesh and Jaideep Srivastava and Travis Richardson
@InProceedings{ganesh.ea:entity-identification-rules:96,
  title =        "Mining Entity-Identification Rules for Database
                 Integration",
  pages =        "291",
  author =       "M. Ganesh and Jaideep Srivastava and Travis
                 Richardson",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Discovering interesting statements from a database, F. Gebhardt
@Article{gebhardt:discovering-interesting:94,
  author =       "F. Gebhardt",
  address =      "Gesell Math \& Datenverarbeitung Gmbh, Schloss
                 Birlinghoven, Postfach 1316, D-53731 St Augustin,
                 Germany",
  title =        "Discovering interesting statements from a database",
  journal =      "Applied Stochastic Models And Data Analysis",
  year =         "1994",
  volume =       "10",
  issue =        "1",
  pages =        "1--14",
  abstract =     "Knowledge discovery aims at extracting new knowledge
                 from potentially large databases; this may be in the
                 form of interesting statements about the data. Two
                 interrelated classes of problem arise that are treated
                 here: to put the subjective notion of 'interesting'
                 into concrete terms and to deal with large numbers of
                 statements that are related to one another (one
                 rendering the other redundant or at least less
                 interesting). Four increasingly subjective facets of
                 'interestingness' are identified: the subject field
                 under consideration, the conspicuousness of a finding,
                 its novelty, and its deviation from prior knowledge. A
                 procedure is proposed, and tried out on two quite
                 different data sets, that allows for specifying
                 interestingness by various means and that ranks the
                 results in a way that takes interestingness (relevance,
                 evidence) as well as mutual relatedness (similarity,
                 affinity) into account- manifestations of the second
                 and third facets of interestingness in the given data
                 environment.",
  keywords =     "PROJECTION PURSUIT, KNOWLEDGE DISCOVERY IN DATABASES,
                 EXPLORATORY DATA ANALYSIS, INTERESTINGNESS, PROJECT
                 EXPLORA",
}

Excavate Your Data, Cheryl Gerber
Available as
hypertext.
@Article{gerber:excavate-your:96,
  author =       "Cheryl Gerber",
  title =        "Excavate Your Data",
  journal =      "Datamation",
  year =         "1996",
  volume =       "42",
  number =       "9",
  month =        may,
  abstract =     "Datamining could be your No. 1 strategic weapon--and
                 source of profit--in dissecting archival information.
                 But with its roots in machine learning, this esoteric
                 technology takes some time to master.",
  URL =          "http://www.datamation.com/PlugIn/issues/1996/may1/05asoft3frame.html",
}

ENIGMA: A System that Learns Diagnostic Knowledge, A. Giordana and L. Saitta and F. Bergadano and F. Brancadori and D. De Marchi
@Article{giordana.ea:enigma-system:93,
  author =       "A. Giordana and L. Saitta and F. Bergadano and F.
                 Brancadori and D. De Marchi",
  title =        "{ENIGMA}: {A} System that Learns Diagnostic
                 Knowledge",
  journal =      "IEEE Transactions on Knowledge and Data Engineering",
  pages =        "15--28",
  volume =       "5",
  number =       "1",
  month =        feb,
  year =         "1993",
}

Statistical Inference and Data Mining, Clark Glymour and David Madigan and Daryl Pregibon and Padhraic Smyth
@Article{glymour.ea:statistical-inference:96,
  author =       "Clark Glymour and David Madigan and Daryl Pregibon and
                 Padhraic Smyth",
  address =      "Carnegie Mellon Univ, Pittsburgh, Pa, 15213 Univ Calif
                 San Diego, La Jolla, Ca, 92093 Washington Univ,
                 Seattle, Wa At\&T Bell Labs, Murray Hill, Nj, 07974",
  title =        "Statistical Inference and Data Mining",
  journal =      "Communications of the ACM",
  volume =       "39",
  number =       "11",
  pages =        "35--41",
  month =        nov,
  year =         "1996",
  ISSN =         "0001-0782",
}

Knowledge discovery in deductive databases with large deduction results: the first step, C. L. Goh and M. Tsukamoto and S. Nishio
@Article{goh.ea:deductive-with:96,
  author =       "C. L. Goh and M. Tsukamoto and S. Nishio",
  address =      "Osaka Univ, Fac Engn, Dept Informat Syst Engn, 2-1
                 Yamadaoka, Suita, Osaka 565, Japan",
  title =        "Knowledge discovery in deductive databases with large
                 deduction results: the first step",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1996",
  month =        dec,
  volume =       "8",
  issue =        "6",
  pages =        "952--956",
  abstract =     "Deductive databases have the ability to deduce new
                 facts from a set of facts using a set of rules. They
                 are also useful in the integration of artificial
                 intelligence and database. However, when recursive
                 rules are involved, the amount of deduced facts can
                 become too large to be practically stored, viewed or
                 analyzed. This seriously hinders the usefulness of
                 deductive databases. In order to overcome this problem,
                 we propose four methods to discover characteristic
                 rules from large amount of deduction results without
                 actually having to store all the deduction results.
                 This paper presents the first step in the application
                 of knowledge discovery techniques to deductive
                 databases with large deduction results.",
  keywords =     "attribute-oriented algorithm, characteristic rule,
                 data mining, deductive database, recursive rule",
}

COGIN: Symbolic Induction with Genetic Algorithms, D. P. Greene and S. F. Smith
@InProceedings{greene.ea:cogin-symbolic:92,
  author =       "D. P. Greene and S. F. Smith",
  title =        "{COGIN}: Symbolic Induction with Genetic Algorithms",
  year =         "1992",
  booktitle =    "Proc.\ of AAAI-92",
  pages =        "111--116",
  keywords =     "GA",
}

Visualization for knowledge discovery, G. Grinstein and J. C. Sieg and S. Smith and M. G. Williams
@Article{grinstein.ea:visualization-for:92,
  crossref =     "ijis-special-issue:92",
  author =       "G. Grinstein and J. C. Sieg and S. Smith and M. G.
                 Williams",
  address =      "Univ Massachusetts, Lowell, Ma, 01854",
  title =        "Visualization for knowledge discovery",
  journal =      "International J. Of Intelligent Systems",
  year =         "1992",
  volume =       "7",
  issue =        "7",
  pages =        "637--648",
  abstract =     "Although the fields of data visualization and
                 automated knowledge discovery (AKD) share many goals,
                 workers in each field have been reluctant to adopt the
                 tools and methods of the other field. Many AKD
                 researchers discourage the use of visualization tools
                 because they believe that dependence on human steering
                 will impede the development of numerical or analytical
                 descriptions of complex data. Many visualization
                 researchers are concerned that their present platforms
                 are being pushed to the limits of their performance by
                 the most advanced visualization techniques and are
                 therefore unwilling to incur the perceived overhead of
                 having a database system mediate access to the data. We
                 argue that these attitudes are somewhat short- sighted
                 and that the techniques of these two communities are
                 complementary. We discuss a specific visualization
                 system that we have developed and describe the
                 obstacles that must be overcome in integrating it into
                 an AKD system.",
  annote =       "Deals with Exvis system, which produces textures of
                 icons, each icon representing upto 15 variables + X,Y
                 co-ords. Also use of sound textures.",
}

Harnessing the Human in Knowledge Discovery, Georges G. Grinstein
@InProceedings{grinstein:harnessing-human:96,
  title =        "Harnessing the Human in Knowledge Discovery",
  pages =        "384",
  author =       "Georges G. Grinstein",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Data Mining and Tree-Based Optimization, Robert Grossman and Haim Bodek and Dave Northcutt and Vince Poor
@InProceedings{grossman.ea:tree-based-optimization:96,
  title =        "Data Mining and Tree-Based Optimization",
  pages =        "323",
  author =       "Robert Grossman and Haim Bodek and Dave Northcutt and
                 Vince Poor",
  crossref =     "simoudis.ea:proceedings-second:96",
}

The Terabyte Challenge: An Open, Distributed Testbed for Managing and Mining Massive Data Sets, Robert Grossman
@InProceedings{grossman:terabyte-challenge:96,
  author =       "Robert Grossman",
  title =        "The Terabyte Challenge: An Open, Distributed Testbed
                 for Managing and Mining Massive Data Sets",
  booktitle =    "CD-ROM Proceedings of Supercomputing'96",
  publisher =    "IEEE",
  address =      "Pittsburgh, PA",
  month =        nov,
  year =         "1996",
  keywords =     "contest,",
}

Data-base mining - discovering new knowledge and competitive advantage, F. H. Grupe and M. M. Owrang
@Article{grupe.ea:data-base-discovering:95,
  author =       "F. H. Grupe and M. M. Owrang",
  address =      "Univ Nevada, Reno, Nv, 89557 American Univ,
                 Washington, Dc, 20016",
  title =        "Data-base mining - discovering new knowledge and
                 competitive advantage",
  journal =      "Information Systems Management",
  year =         "1995",
  volume =       "12",
  issue =        "4",
  pages =        "26--31",
  abstract =     "Buried in the huge data bases assembled by large
                 organizations is information useful for generating new
                 facts and relationships that can provide significant
                 competitive advantage. This article describes how data
                 base mining extracts knowledge from existing data
                 bases, data base mining applications and their
                 limitations, and bottom-line benefits.",
}

Data mining, Hypergraph Transversals, and Machine Learning, Dimitrios Gunopulos and Roni Khardon and Heikki Mannila and Hannu Toivonen
@InProceedings{gunopulos.ea:hypergraph-transversals:97,
  title =        "Data mining, Hypergraph Transversals, and Machine
                 Learning",
  author =       "Dimitrios Gunopulos and Roni Khardon and Heikki
                 Mannila and Hannu Toivonen",
  booktitle =    "Proceedings of the Sixteenth {ACM}
                 {SIGACT}-{SIGMOD}-{SIGART} Symposium on Principles of
                 Database Systems",
  month =        "12--15 " # may,
  year =         "1997",
  address =      "Tucson, Arizona",
}

Data mining - mother lode or fools gold, B. Gunter
@Article{gunter:mother-lode:96,
  author =       "B. Gunter",
  address =      "Pob 9, Hopewell, Nj, 08525",
  title =        "Data mining - mother lode or fools gold",
  journal =      "Quality Progress",
  year =         "1996",
  volume =       "29",
  issue =        "4",
  pages =        "113",
}

Classification trees with Neural Network Feature Extraction, Heng Guo and Saul B. Gelfand
@Article{guo.ea:classification-trees:92,
  author =       "Heng Guo and Saul B. Gelfand",
  title =        "Classification trees with Neural Network Feature
                 Extraction",
  journal =      "IEEE Transactions on Neural Networks.",
  year =         "1992",
  volume =       "3",
  number =       "6",
  pages =        "923--933",
  month =        nov,
  keywords =     "Neural Nets, binary decision trees, CART",
  annote =       "Uses small multilayer nets at the decision nodes of a
                 binary classification tree. Comparison with CART",
}

Deep Knowledge Discovery from Natural Language Texts, Udo Hahn and Klemens Schnattinger
@InProceedings{hahn.ea:deep-natural:97,
  title =        "Deep Knowledge Discovery from Natural Language Texts",
  author =       "Udo Hahn and Klemens Schnattinger",
  pages =        "175",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Integrating and Mining Distributed Customer Databases,
@InProceedings{haimowitz.ea:integrating-distributed:97,
  title =        "Integrating and Mining Distributed Customer
                 Databases",
  author =       "Ira J. Haimowitz and {\"{o}}zden G{\"{u}}r-Ali and
                 Henry Schwarz",
  pages =        "179",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Analyzing fd inference in relational databases, J. Hale and S. Shenoi
@Article{hale.ea:analyzing-fd:96,
  author =       "J. Hale and S. Shenoi",
  address =      "Univ Tulsa, Dept Math \& Comp Sci, Keplinger Hall,
                 Tulsa, Ok, 74104 Univ Tulsa, Dept Math \& Comp Sci,
                 Tulsa, Ok, 74104",
  title =        "Analyzing fd inference in relational databases",
  journal =      "Data \& Knowledge Engineering",
  year =         "1996",
  volume =       "18",
  issue =        "2",
  pages =        "167--183",
  abstract =     "This paper deals with the general problem of analyzing
                 fuzzy inference based on functional dependencies (FDs)
                 in database relations. Fuzzy inference, the ability to
                 infer fuzzy set values, generalizes imprecise
                 (set-valued) inference and precise inference. Likewise,
                 fuzzy relational databases generalize their classical
                 and imprecise counterparts by supporting fuzzy
                 information storage and retrieval. Inference analysis
                 is performed using a special abstract model which
                 maintains vital links to classical, imprecise and fuzzy
                 relational database models. These links increase the
                 utility of the inference formalism in practical
                 applications involving ''catalytic inference
                 analysis'', including knowledge discovery and database
                 security.",
  keywords =     "DATABASE INFERENCE, FUNCTIONAL DEPENDENCIES, KNOWLEDGE
                 DISCOVERY, DATABASE SECURITY, FUZZY SETS",
}

A practical formalism for imprecise inference control, J. Hale and J. Threet and S. Shenoi
@Article{hale.ea:practical-formalism:94,
  author =       "J. Hale and J. Threet and S. Shenoi",
  address =      "Univ Tulsa, Dept Math \& Comp Sci, Keplinger Hall,
                 Tulsa, Ok, 74104",
  title =        "A practical formalism for imprecise inference
                 control",
  journal =      "Ifip Trans. A-Computer Science And Technology",
  year =         "1994",
  volume =       "60",
  pages =        "139--156",
  abstract =     "This paper describes a powerful, yet practical,
                 formalism for modeling and controlling imprecise
                 FD-based inference in relational database systems. The
                 formalism provides a canonical representation of
                 inference which unifies precise inference and the
                 primitive imprecise inference mechanisms of abduction
                 and partial deduction. Whereas other imprecise
                 (partial) inference models estimate the probability of
                 making inferences, the formalism supports the analysis
                 of the actual imprecise values inferred in a database
                 extension. Imprecise inference is analyzed by
                 transforming a precise database augmented with
                 additional ''catalytic'' relations, conveying possibly
                 imprecise a priori knowledge, into an equivalent
                 imprecise database. The analysis of imprecise inference
                 and the related infer ence control methodology are
                 highly flexible and robust. They can be directly
                 applied to classical, MLS, and imprecise databases.
                 With minimal modifications, they also can be used in
                 knowledge discovery or database mining.",
  keywords =     "DATABASE MANAGEMENT, GENERAL, ARTIFICIAL INTELLIGENCE,
                 DEDUCTION AND THEOREM PROVING",
}

Estimating dblearns potential for knowledge discovery in databases, H. J. Hamilton and D. R. Fudger
@Article{hamilton.ea:estimating-dblearns:95,
  author =       "H. J. Hamilton and D. R. Fudger",
  address =      "Univ Regina, Dept Comp Sci, Regina, Sk S4S 0A2,
                 Canada",
  title =        "Estimating dblearns potential for knowledge discovery
                 in databases",
  journal =      "Computational Intelligence",
  year =         "1995",
  volume =       "11",
  issue =        "2",
  pages =        "280--296",
  abstract =     "We propose a procedure for estimating DBLEARN's
                 potential for knowledge discovery, given a relational
                 database and concept hierarchies. This procedure is
                 most useful for evaluating alternative concept
                 hierarchies for the same database. The DBLEARN
                 knowledge discovery program uses an attribute-oriented
                 inductive-inference method to discover potentially
                 significant high-level relationships in a database. A
                 concept forest, with at most one concept hierarchy for
                 each attribute, defines the possible generalizations
                 that DBLEARN can make for a database. The potential for
                 discovery in a database is estimated by examining the
                 complexity of the corresponding concept forest. Two
                 heuristic measures are defined based on the number,
                 depth, and height of the interior nodes. Higher values
                 for these measures indicate more complex concept
                 forests and arguably more potential for discovery.
                 Experimental results using a variety of concept forests
                 and four commercial databases show that in practice
                 both measures permit quite reliable decisions to be
                 made; thus, the simplest may be most appropriate.",
  keywords =     "KNOWLEDGE DISCOVERY, CONCEPT HIERARCHIES, DISCOVERY
                 POTENTIAL, DATABASES, MACHINE LEARNING",
}

Knowledge Discovery in Databases: An Attribute-oriented Approach, Jiawei Han and Yandong Cai and Nick Cercone
@InProceedings{han.ea:attribute-oriented-approach:92,
  author =       "Jiawei Han and Yandong Cai and Nick Cercone",
  title =        "Knowledge Discovery in Databases: An
                 Attribute-oriented Approach",
  booktitle =    "Proceedings of the 18th {VLDB} Conference",
  pages =        "547--559",
  address =      "Vancouver, British Columbia, Canada",
  year =         "1992",
  month =        aug,
  keywords =     "dblearn",
  annote =       "simple hierarchies are used to generate attribute
                 summaries",
}

Data-driven discovery of quantitative rules in relational databases, J. W. Han and Y. D. Cai and N. Cercone
@Article{han.ea:data-driven-quantitative:93,
  author =       "J. W. Han and Y. D. Cai and N. Cercone",
  address =      "Simon Fraser Univ, Sch Comp Sci, Ctr Syst Sci, Burnaby
                 V5A 1S6, Bc, Canada",
  title =        "Data-driven discovery of quantitative rules in
                 relational databases",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1993",
  month =        "Feburary",
  volume =       "5",
  issue =        "1",
  pages =        "29--40",
  abstract =     "A quantitative rule is a rule associated with
                 quantitative information which assesses the
                 representativeness of the rule in the database. In this
                 paper, an efficient induction method is developed for
                 learning quantitative rules in relational databases.
                 With the assistance of knowledge about concept
                 hierarchies, data relevance, and expected rule forms,
                 attribute-oriented induction can be performed on the
                 database, which integrates database operations with the
                 learning process and provides a simple, efficient way
                 of learning quantitative rules from large databases.
                 Our method learns both characteristic rules and
                 classification rules. Quantitative information
                 facilitates quantitative reasoning, incremental
                 learning, and learning in the presence of noise.
                 Moreover, learning qualitative rules can be treated as
                 a special case of learning quantitative rules. Our
                 paper shows that attribute-oriented induction provides
                 an efficient and effective mechanism for learning
                 various kinds of knowledge rules from relational
                 databases.",
  keywords =     "KNOWLEDGE DISCOVERY IN DATABASES, MACHINE LEARNING,
                 ATTRIBUTE- ORIENTED INDUCTION, QUANTITATIVE RULES,
                 CHARACTERISTIC RULES, CLASSIFICATION RULES, DATA-DRIVEN
                 LEARNING ALGORITHMS",
}

DBLEARN: A Knowledge Discovery System for Large Databases, J. Han and Y. Cai and N. Cerone and Y. Huang
@InProceedings{han.ea:dblearn-system:92,
  author =       "J. Han and Y. Cai and N. Cerone and Y. Huang",
  title =        "{DBLEARN}: {A} Knowledge Discovery System for Large
                 Databases",
  booktitle =    "Int. Conf. on Information and Knowledge Management,
                 Baltimore",
  year =         "1992",
  month =        nov,
}

DBLearn: A System Prototype for Knowledge Discovery in Relational Databases, Jiawei Han and Yongjian Fu and Yue Huang and Yandong Cai and N. Cercone
@Article{han.ea:dblearn-system:94,
  author =       "Jiawei Han and Yongjian Fu and Yue Huang and Yandong
                 Cai and N. Cercone",
  title =        "{DBLearn}: {A} System Prototype for Knowledge
                 Discovery in Relational Databases",
  journal =      "SIGMOD Record (ACM Special Interest Group on
                 Management of Data)",
  volume =       "23",
  number =       "2",
  pages =        "516--516",
  month =        jun,
  year =         "1994",
  ISSN =         "0163-5808",
  affiliation =  "Sch. of Comput. Sci., Simon Fraser Univ., Burnaby, BC,
                 Canada",
  classification = "C6160D (Relational DBMS); C6160K (Deductive
                 databases)",
  keywords =     "DBLearn; System prototype; Knowledge discovery;
                 Relational databases; Data mining system; Knowledge
                 rule extraction; High level learning interfaces;
                 Automatic refinement; Concept hierarchies; Efficient
                 discovery algorithms; Performance; Knowledge mining;
                 Object-oriented databases; Deductive databases; Spatial
                 databases",
  thesaurus =    "Deductive databases; Knowledge acquisition; Relational
                 databases; Very large databases",
  xxcrossref =   "Anonymous:1994:ASI",
}

DBMiner: Interactive Mining of Multiple-Level Knowledge in Relational Databases,
@InProceedings{han.ea:dbminer-interactive:96,
  title =        "{DBMiner}: Interactive Mining of Multiple-Level
                 Knowledge in Relational Databases",
  author =       "Jiawei Han and Yongjian Fu and Wei Wang and Jenny
                 Chiang and Osmar R. Za{\"\i}ane and Krzysztof
                 Koperski",
  editor =       "H. V. Jagadish and Inderpal Singh Mumick",
  booktitle =    "Proceedings of the 1996 {ACM} {SIGMOD} International
                 Conference on Management of Data",
  address =      "Montreal, Quebec, Canada",
  month =        "4--6~" # jun,
  year =         "1996",
  pages =        "550",
}

DBMiner: A System for Mining Knowledge in Large Relational Databases, Jiawei Han and Yongjian Fu and Wei Wang and Jenny Chiang and Wan Gong and Krzystof Koperski and Deyi Li and Yijun Lu and Amynmohamed Rajan and Nebojsa Stefanovic and Betty Xia and Osmar R. Zaiane
@InProceedings{han.ea:dbminer-system:96,
  title =        "{DBM}iner: {A} System for Mining Knowledge in Large
                 Relational Databases",
  pages =        "250",
  author =       "Jiawei Han and Yongjian Fu and Wei Wang and Jenny
                 Chiang and Wan Gong and Krzystof Koperski and Deyi Li
                 and Yijun Lu and Amynmohamed Rajan and Nebojsa
                 Stefanovic and Betty Xia and Osmar R. Zaiane",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Intelligent query answering by knowledge discovery techniques, J. W. Han and Y. Huang and N. Cercone and Y. J. Fu
@Article{han.ea:intelligent-query:96,
  author =       "J. W. Han and Y. Huang and N. Cercone and Y. J. Fu",
  address =      "Simon Fraser Univ, Sch Comp Sci, Burnaby, Bc V5A 1S6,
                 Canada Univ Regina, Dept Comp Sci, Regina, Sk S4S 0A2,
                 Canada",
  title =        "Intelligent query answering by knowledge discovery
                 techniques",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1996",
  volume =       "8",
  issue =        "3",
  pages =        "373--390",
  abstract =     "Knowledge discovery facilitates querying database
                 knowledge and intelligent query answering in database
                 systems. In this paper, we investigate the application
                 of discovered knowledge, concept hierarchies, and
                 knowledge discovery tools for intelligent query
                 answering in database systems. A knowledge-rich data.
                 model is constructed to incorporate discovered
                 knowledge and knowledge discovery tools. Queries are
                 classified into data queries and knowledge queries.
                 Both types of queries can be answered directly by
                 simple retrieval or intelligently by analyzing the
                 intent of query and providing generalized, neighborhood
                 or associated information using stored or discovered
                 knowledge. Techniques have been developed for
                 intelligent query answering using discovered knowledge
                 and/or knowledge discovery tools, which includes
                 generalization, data summarization, concept clustering,
                 rule discovery, query rewriting, deduction, lazy
                 evaluation, application of multiple-layered databases,
                 etc. Our study shows that knowledge discovery
                 substantially broadens the spectrum of intelligent
                 query answering and may have deep implications on query
                 answering in data- and knowledge-base systems.",
  keywords =     "RELATIONAL DATABASES, MODEL, DATABASE AND
                 KNOWLEDGE-BASE SYSTEMS, KNOWLEDGE DISCOVERY IN
                 DATABASES, KNOWLEDGE-RICH DATA MODEL, INTELLIGENT QUERY
                 ANSWERING, MULTIPLE LAYERED DATABASES, QUERY ANALYSIS
                 AND QUERY PROCESSING",
}

Mining Multi-Dimensional Association Rules Using Data Cubes, Jiawei Han and Micheline Kamber and Jenny Chiang
Available as
compressed postscript.
@TechReport{han.ea:multi-dimensional-association:97,
  number =       "TR 97-06",
  author =       "Jiawei Han and Micheline Kamber and Jenny Chiang",
  title =        "Mining Multi-Dimensional Association Rules Using Data
                 Cubes",
  month =        feb,
  year =         "1997",
  org =          "SFU-CMPT",
  school =       "School of Computing Science, Simon Fraser University",
  abstract =     "Methods for mining association rules have been studied
                 extensively. However, most previous studies have been
                 confined to the mining of single dimensional and single
                 variable association rules. There are applications in
                 relational databases and data warehouses which require
                 the mining of multi-dimensional association rules. In
                 this paper, we study efficient methods for mining
                 multi-dimensional association rules using a data cube
                 structure, a popular data structure used in data
                 warehouses. Efficient algorithms are developed for
                 mining multi-dimensional association rules by either
                 using an existing data cube, when available, or
                 construction of a data cube on the fly. In both cases,
                 the algorithms outperform the direct application of a
                 table-based Apriori algorithm to the mining of
                 multi-dimensional association rules. The extension of
                 the method for mining multi-level, multi-dimensional
                 association rules and meta-rule guided mining is also
                 discussed in the paper.",
  URL =          "ftp://fas.sfu.ca/pub/cs/TR/1997/CMPT97-06.ps.Z",
}

Discovery of Multiple-Level Association Rules from Large Databases, Jiawei Han and Yongjian Fu
Available as
compressed postscript.
@TechReport{han.ea:multiple-level-association:95,
  number =       "TR 95-05",
  author =       "Jiawei Han and Yongjian Fu",
  title =        "Discovery of Multiple-Level Association Rules from
                 Large Databases",
  month =        mar,
  year =         "1995",
  org =          "SFU-CMPT",
  school =       "School of Computing Science, Simon Fraser University",
  pages =        "35",
  abstract =     "Discovery of association rules from large databases
                 has been a focused topic recently in the research into
                 database mining. Previous studies discover association
                 rules at a single concept level, however, mining
                 association rules at multiple concept levels may lead
                 to finding more informative and refined knowledge from
                 data. In this paper, we study efficient methods for
                 mining multiple-level association rules from large
                 transaction databases. A top-down progressive deepening
                 method is proposed by extension of some existing
                 (single-level) association rule mining algorithms. In
                 particular, a group of algorithms for mining
                 multiple-level association rules are developed and
                 their relative performance are tested on different
                 kinds of transaction data. Relaxation of the rule
                 conditions for finding flexible multiple-level
                 association rules is also discussed. Our study shows
                 that efficient algorithms can be developed for the
                 discovery of interesting and strong multiple-level
                 association rules from large databases.",
  URL =          "ftp://ftp.fas.sfu.ca/pub/cs/techreports/1995/CMPT95-05.ps.Z",
}

Resource and Knowledge Discovery in Global Information Systems: A Multiple Layered Database Approach, Jiawei Han and Osmar R. Zaiane and Yongjian Fu
Available as
compressed postscript.
@TechReport{han.ea:resource-global:94,
  number =       "TR 94-10",
  author =       "Jiawei Han and Osmar R. Zaiane and Yongjian Fu",
  title =        "Resource and Knowledge Discovery in Global Information
                 Systems: {A} Multiple Layered Database Approach",
  month =        nov,
  year =         "1994",
  org =          "SFU-CMPT",
  school =       "School of Computing Science, Simon Fraser University",
  pages =        "30",
  keywords =     "Resource Discovery, Knowledge Discovery, Data Mining,
                 Multiple Layered Database, Internet, World Wide Web,
                 Global Information Network",
  abstract =     "With huge amounts of information connected to the
                 global information network (Internet), efficient and
                 effective discovery of resource and knowledge from the
                 ``global information base'' has become an imminent
                 research issue, especially with the advent of the
                 Information Highway. In this article, a multiple
                 layered database (MLDB) approach is proposed to handle
                 the resource and knowledge discovery in global
                 information base. A multiple layered database is a
                 database formed by generalization and transformation of
                 the information, layer-by-layer, starting from the
                 original information base (treated as layer-0, the
                 primitive layer). Information retrieval, data mining,
                 and data analysis techniques can be used to extract and
                 transform information from a lower layer database to a
                 higher one. Layer-1 and higher layers of an MLDB can be
                 modeled by an extended-relational or object-oriented
                 model, constructed automatically, and updated
                 incrementally. Information at all the layers except the
                 primitive one can be stored, managed and retrieved by
                 the available database technology; resources can be
                 found by controlled search through different layers of
                 the database; and knowledge discovery can be performed
                 efficiently in such a multiple layered database.",
  URL =          "ftp://ftp.fas.sfu.ca/pub/cs/techreports/1994/CMPT94-10.ps.Z",
  note =         "(also CSS/LCCR TR94-24)",
}

Data Mining Techniques, Jiawei Han
@InProceedings{han:techniques:96,
  title =        "Data Mining Techniques",
  author =       "Jiawei Han",
  editor =       "H. V. Jagadish and Inderpal Singh Mumick",
  booktitle =    "Proceedings of the 1996 {ACM} {SIGMOD} International
                 Conference on Management of Data",
  address =      "Montreal, Quebec, Canada",
  month =        "4--6~" # jun,
  year =         "1996",
  pages =        "545",
}

Towards efficient induction mechanisms in database-systems, J. W. Han
@Article{han:towards-efficient:94,
  author =       "J. W. Han",
  address =      "Simon Fraser Univ, Sch Comp Sci, Burnaby V5A 1S6, Bc,
                 Canada",
  title =        "Towards efficient induction mechanisms in
                 database-systems",
  journal =      "Theoretical Computer Science",
  year =         "1994",
  volume =       "133",
  issue =        "2",
  pages =        "361--385",
  abstract =     "With the wide availability of huge amounts of data in
                 database systems, the extraction of knowledge in
                 databases by efficient and powerful induction or
                 knowledge discovery mechanisms has become an important
                 issue in the construction of new generation database
                 and knowledge-base systems. In this article, an
                 attribute-oriented induction method for knowledge
                 discovery in databases is investigated, which provides
                 an efficient, set-oriented induction mechanism for
                 extraction of different kinds of knowledge rules, such
                 as characteristic rules, discriminant rules, data
                 evolution regularities and high level dependency rules
                 in large relational databases. Our study shows that the
                 method is robust in the existence of noise and database
                 updates, is extensible to knowledge discovery in
                 advanced and/or special purpose databases, such as
                 object-oriented databases, active databases, spatial
                 databases, etc., and has wide applications.",
}

Mega-Classification: Discovering Motifs in Massive Datastreams, N. L. Harris and L. Hunter and D. J. States
@InProceedings{harris.ea:mega-classification-discovering:92,
  author =       "N. L. Harris and L. Hunter and D. J. States",
  title =        "Mega-Classification: Discovering Motifs in Massive
                 Datastreams",
  year =         "1992",
  booktitle =    "Proc.\ of AAAI-92",
  pages =        "837--842",
}

Knowledge discovery from telecommunication network alarm databases, K. Hatonen and M. Klemettinen and H. Mannila and P. Ronkainen and H. Toivonen
Available as
hypertext.
@InProceedings{hatonen.ea:telecommunication-network:96,
  author =       "K. Hatonen and M. Klemettinen and H. Mannila and P.
                 Ronkainen and H. Toivonen",
  title =        "Knowledge discovery from telecommunication network
                 alarm databases",
  editor =       "Stanley Y. W. Su",
  booktitle =    "Proceedings of the twelfth International Conference on
                 Data Engineering, February 26--March 1, 1996, New
                 Orleans, Louisiana",
  publisher =    "IEEE Computer Society Press",
  address =      "1109 Spring Street, Suite 300, Silver Spring, MD
                 20910, USA",
  year =         "1996",
  pages =        "115--122",
  affiliation =  "Dept. of Comput. Sci., Helsinki Univ., Finland",
  keywords =     "Knowledge discovery, data mining, frequent episodes,
                 telecommunication alarm databases, telecommunication
                 network management, SGML",
  URL =          "http://www.cs.Helsinki.FI/research/pmdm/datamining/ICDE96.html",
  abstract =     "A telecommunication network produces daily large
                 amounts of alarm data. The data contains hidden
                 valuable knowledge about the behavior of the network.
                 This knowledge can be used in filtering redundant
                 alarms, locating problems in the network, and possibly
                 in predicting severe faults. We describe the TASA
                 (Telecommunication Network Alarm Sequence Analyzer)
                 system for discovering and browsing knowledge from
                 large alarm databases.

Learning qualitative models of dynamic systems, D. T. Hau and E. W. Coiera
@Article{hau.ea:learning-qualitative:97,
  author =       "D. T. Hau and E. W. Coiera",
  address =      "Johns Hopkins Univ, Sch Med, Baltimore, Md, 21205
                 Hewlett Packard Labs, Bristol Bs12 6Qz, Avon, England",
  title =        "Learning qualitative models of dynamic systems",
  journal =      "Machine Learning",
  year =         "1997",
  volume =       "26",
  issue =        "2-3",
  pages =        "177--211",
  abstract =     "The automated construction of dynamic system models is
                 an important application area for ILP. We describe a
                 method that learns qualitative models from time-varying
                 physiological signals. The goal is to understand the
                 complexity of the learning task when faced with
                 numerical data, what signal processing techniques are
                 required, and how this affects learning. The
                 qualitative representation is based on Kuipers' QSIM.
                 The learning algorithm for model construction is based
                 on Coiera's GENMODEL. We show that QSIM models are
                 efficiently PAC learnable from positive examples only,
                 and that GENMODEL is an ILP algorithm for efficiently
                 constructing a QSIM model. We describe both GENMODEL
                 which performs RLGG on qualitative states to learn a
                 QSIM model, and the front-end processing and segmenting
                 stages that transform a signal into a set of
                 qualitative states. Next we describe results of
                 experiments on data from six cardiac bypass patients.
                 Useful models were obtained, representing both normal
                 and abnormal physiological states. Model variation
                 across time and across different levels of temporal
                 abstraction and fault tolerance is explored. The
                 assumption made by many previous workers that the
                 abstraction of examples from data can be separated from
                 the learning task is not supported by this study.
                 Firstly, the effects of noise in the numerical data
                 manifest themselves in the qualitative examples.
                 Secondly, the models learned are directly dependent on
                 the initial qualitative abstraction chosen.",
  keywords =     "SIMULATION, inductive logic programming, qualitative
                 modelling, system identification, PAC learning,
                 physiological modelling, cardiovascular system, data
                 mining, patient monitoring",
}

Digging For Gold, Emma Haughton
@Article{haughton:digging-gold:94,
  author =       "Emma Haughton",
  title =        "Digging For Gold",
  journal =      "Computing",
  year =         "1994",
  pages =        "20--21",
  month =        jan,
  keywords =     "Logica, applications, neural networks, Data Mariner,
                 Netmap, 4thought, autonet, recognition research",
}

KDD for Science Data Analysis: Issues and Examples, Usama Fayyadand David Haussler and Paul Stolorz
@InProceedings{haussler.ea:kdd-science:96,
  title =        "{KDD} for Science Data Analysis: Issues and Examples",
  pages =        "50",
  author =       "Usama Fayyadand David Haussler and Paul Stolorz",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Quantifying Inductive Bias: AI Learning Algorithms and Valiant's Learning Framework, D. Haussler
@Article{haussler:quantifying-inductive:88,
  author =       "D. Haussler",
  title =        "Quantifying Inductive Bias: {AI} Learning Algorithms
                 and Valiant's Learning Framework",
  year =         "1988",
  journal =      "Artificial Intelligence",
  volume =       "36",
  number =       "2",
  month =        sep,
  pages =        "177--221",
}

Proceedings of the Third International Conference on Knowledge Discovery and Data Mining (KDD-97), David Heckerman and Heikki Mannila and Daryl Pregibon and Ramasamy Uthurusamy (Eds)
@Proceedings{heckerman.ea:proceedings-third:97,
  title =        "Proceedings of the Third International Conference on
                 Knowledge Discovery and Data Mining ({KDD}-97)",
  year =         "1997",
  editor =       "David Heckerman and Heikki Mannila and Daryl Pregibon
                 and Ramasamy Uthurusamy",
  publisher =    "AAAI Press",
}

The Data Gold Rush --- Here's how corporations, researchers, and scientists are using data-mining techniques to discover everything from new customers to new galaxies, Sara Reese Hedberg
@Article{hedberg:gold-rush:95,
  author =       "Sara Reese Hedberg",
  title =        "The Data Gold Rush --- Here's how corporations,
                 researchers, and scientists are using data-mining
                 techniques to discover everything from new customers to
                 new galaxies",
  journal =      "Byte Magazine",
  volume =       "20",
  number =       "10",
  pages =        "83--??",
  month =        oct,
  year =         "1995",
  ISSN =         "0360-5280",
}

Parallelism speeds data mining, S. R. Hedberg
@Article{hedberg:parallelism-speeds:95,
  author =       "S. R. Hedberg",
  address =      "Zymogenet Inc, 1201 Eastlake Ave E, Seattle, Wa,
                 98102",
  title =        "Parallelism speeds data mining",
  journal =      "Ieee Parallel \& Distributed Technology",
  year =         "1995",
  volume =       "3",
  issue =        "4",
  pages =        "3--6",
}

Searching for the mother lode - tales of the first data miners, S. R. Hedberg
@Article{hedberg:searching-mother:96,
  author =       "S. R. Hedberg",
  address =      "Mit, Spoken Language Syst Grp, Cambridge, Ma, 02139",
  title =        "Searching for the mother lode - tales of the first
                 data miners",
  journal =      "Ieee Expert-Intelligent Systems \& Their
                 Applications",
  year =         "1996",
  volume =       "11",
  issue =        "5",
  pages =        "4--7",
}

Troubleshooting CFM 56-3 Engines for the Boeing 737 --- Using CBR and Data-Mining, R. Heider
@Article{heider:troubleshooting-cfm:96,
  author =       "R. Heider",
  title =        "Troubleshooting {CFM} 56-3 Engines for the {Boeing
                 737} --- Using {CBR} and Data-Mining",
  journal =      "Lecture Notes in Computer Science",
  volume =       "1168",
  pages =        "512--??",
  year =         "1996",
  ISSN =         "0302-9743",
}

GA-Based Rule Enhancement in Concept Learning, Jukka Hekanaho
@InProceedings{hekanaho:ga-based-rule:97,
  title =        "{GA}-Based Rule Enhancement in Concept Learning",
  author =       "Jukka Hekanaho",
  pages =        "183",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Target-Independent Mining for Scientific Data: Capturing Transients and Trends for Phenomena Mining, Thomas H. Hinke and John Rushing and Heggere Ranganath and Sara J. Graves
@InProceedings{hinke.ea:target-independent-scientific:97,
  title =        "Target-Independent Mining for Scientific Data:
                 Capturing Transients and Trends for Phenomena Mining",
  author =       "Thomas H. Hinke and John Rushing and Heggere Ranganath
                 and Sara J. Graves",
  pages =        "187",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Zeta: A Global Method for Discretization of Continuous Variables, K. M. Ho and P. D. Scott
@InProceedings{ho.ea:zeta-global:97,
  title =        "Zeta: {A} Global Method for Discretization of
                 Continuous Variables",
  author =       "K. M. Ho and P. D. Scott",
  pages =        "191",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Knowledge Discovery in RNA Sequence Families of HIV Using Scalable Computers, Ivo L. Hofacker and Martijn A. Huynen and Peter F. Stadler and Paul E. Stolorz
@InProceedings{hofacker.ea:rna-sequence:96,
  title =        "Knowledge Discovery in {RNA} Sequence Families of
                 {HIV} Using Scalable Computers",
  pages =        "20",
  author =       "Ivo L. Hofacker and Martijn A. Huynen and Peter F.
                 Stadler and Paul E. Stolorz",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Inferring Hierarchical Clustering Structures by Deterministic Annealing, Thomas Hofmann and Joachim M. Buhmann
@InProceedings{hofmann.ea:inferring-hierarchical:96,
  title =        "Inferring Hierarchical Clustering Structures by
                 Deterministic Annealing",
  pages =        "363",
  author =       "Thomas Hofmann and Joachim M. Buhmann",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Induction: processes of inference, learning and discovery, John H. Holland and Keith J. Holyoak and Richard E. Nisbett and Paul R. Thagard
@Book{holland.ea:induction-processes:86,
  author =       "John H. Holland and Keith J. Holyoak and Richard E.
                 Nisbett and Paul R. Thagard",
  title =        "Induction: processes of inference, learning and
                 discovery",
  publisher =    "MIT Press",
  year =         "1986",
  series =       "Computational models of cognition and perception",
  address =      "Cambridge",
}

Adaptation in natural artificial systems, John H. Holland
@Book{holland:adaptation-natural:75,
  author =       "John H. Holland",
  title =        "Adaptation in natural artificial systems",
  publisher =    "University of Michigan Press",
  year =         "1975",
  address =      "Ann Arbor",
}

Escaping brittleness: the possibilities of general purpose algorithms applied to parallel rule-based systems, John H. Holland
@InCollection{holland:escaping-brittleness:86,
  author =       "John H. Holland",
  title =        "Escaping brittleness: the possibilities of general
                 purpose algorithms applied to parallel rule-based
                 systems",
  crossref =     "michalski.ea:machine-learning:86",
  pages =        "593--623",
}

Architectural Support for Data Mining, Marcel Holsheimer and Martin L. Kersten
Available as
compressed postscript.
@TechReport{holsheimer.ea:architectural-support:,
  URL =          "ftp://ftp.cwi.nl/pub/CWIreports/AA/CS-R9429.ps.Z",
  title =        "Architectural Support for Data Mining",
  author =       "Marcel Holsheimer and Martin L. Kersten",
  institution =  "CWI Amsterdam",
  address =      "PO Box 94079, 1090 GB, Amsterdam, The Netherlands",
  abstract =     "One of the main obstacles in applying data mining
                 techniques to large, real-world databases is the lack
                 of efficient data management. In this paper, we present
                 the design and implementation of an effective two-level
                 architecture for a data mining environment. It consists
                 of a mining tool and a parallel DBMS server. The mining
                 tool organizes and controls the search process, while
                 the DBMS provides optimal response times for the few
                 query types being used by the tool. Key elements of our
                 architecture are its use of fast and simple database
                 operations, its re-use of results obtained by previous
                 queries, its maximal use of main-memory to keep the
                 database hot-set resident, and its parallel computation
                 of queries.

Apart from a clear separation of responsibilities, we show that this architecture leads to competitive performance on large data sets. Moreover, this architecture provides a flexible experimentation platform for further studies in optimization of repetitive database queries and quality driven rule discovery schemes.

CR subject classification (1991): Data storage representations (E.2), Database systems (H.2.4) parallel systems, query processing, Information search and retrieval (H.3.3), Learning (I.2.6) induction, knowledge acquisition

Keywords \& Phrases: data mining, parallel databases, inductive learning, knowledge discovery in databases", }

A perspective on databases and data mining, Marcel Holsheimer and Martin L. Kersten and Heikki Mannila and Hannu Toivonen
Available as
compressed postscript.
@InCollection{holsheimer.ea:perspective-on:95,
  author =       "Marcel Holsheimer and Martin L. Kersten and Heikki
                 Mannila and Hannu Toivonen",
  title =        "A perspective on databases and data mining",
  pages =        "10",
  publisher =    "Centrum voor Wiskunde en Informatica (CWI)",
  address =      "ISSN 0169-118X",
  month =        apr # " 30",
  year =         "1995",
  keywords =     "Association rules, database techniques, generalization
                 hierarchies.",
  URL =          "ftp://ftp.cwi.nl/pub/CWIreports/AA/CS-R9531.ps.Z",
  abstract =     "We discuss the use of database methods for data
                 mining. Recently impressive results have been achieved
                 for some data mining problems using highly specialized
                 and clever data structures. We study how well one can
                 manage by using general purpose database management
                 systems. We illustrate our ideas by investigating the
                 use of a dbms for a well-researched area: the discovery
                 of association rules. We present a simple algorithm,
                 consisting of only union and intersection operations,
                 and show that it achieves quite good performance on an
                 efficient dbms. Our method can incorporate inheritance
                 hierarchies to the association rule algorithm easily.
                 We also present a technique that effectively reduces
                 the number of database operations when searching large
                 search spaces that contain only few interesting items.
                 Our work shows that database techniques are promising
                 for data mining: general architectures can achieve
                 reasonable results.",
  note =         "AA (Department of Algorithmics and Architecture)",
  annote =       "Originally contained the following fields and values -
                 booktitle, 128, note,CS-R9531",
}

Data Mining, The Search for Knowledge in Databases, Marcel Holsheimer and Arno Siebes
Available as
compressed postscript.
@TechReport{holsheimer.ea:search:94,
  URL =          "ftp://ftp.cwi.nl/pub/CWIreports/AA/CS-R9406.ps.Z",
  title =        "Data Mining, The Search for Knowledge in Databases",
  author =       "Marcel Holsheimer and Arno Siebes",
  pages =        "88",
  institution =  "CWI, Amsterdam",
  address =      "PO Box 94079, 1090 GB Amsterdam, The Netherlands",
  keywords =     "Data Mining, Knowlege Engineering, Database
                 applications, Information search and Retrieval.",
  number =       "CS-R9406",
  type =         "Report",
  year =         "1994",
  annote =       "Large (88 pages), Includes information on ID3, AQ15,
                 CN2, DBLearn, Meta-Dendral, Radix/Rx, Bacon and KEDS.",
  abstract =     "Data mining is the search for relationships and global
                 patterns that exist in large databases, but are
                 `hidden' among the vast amounts of data, such as a
                 relationship between patient data and their medical
                 diagnosis. These relationships represent valuable
                 knowledge about the database and objects in the
                 database and, if the database is a faithful mirror, of
                 the real world registered by the database.

Extraction and applications of statistical relationships in relational databases, W. C. Hou
@Article{hou:extraction-applications:96,
  author =       "W. C. Hou",
  address =      "So Illinois Univ, Dept Comp Sci, Carbondale, Il,
                 62901",
  title =        "Extraction and applications of statistical
                 relationships in relational databases",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1996",
  volume =       "8",
  issue =        "6",
  pages =        "939--945",
  abstract =     "In this paper, we discuss modeling and extraction of
                 statistical relationships among attributes. Different
                 methods are used for extraction of different types of
                 relationships. A complete methodology for extraction is
                 developed by integrating widely accepted statistical
                 methods. Statistical relationships manifest embedded
                 relationships in data and thus lend themselves
                 naturally to estimating unknown attribute values and
                 detecting unlikely values. We will carefully examine
                 these applications and evaluate the usefulness of
                 statistical relationships in these applications using a
                 real-life database.",
  keywords =     "data mining, estimating unknown attribute values,,
                 integration of data mining techniques, integrity
                 constraints, knowledge discovery in databases,
                 statistical relationships",
}

Set-Oriented Mining for Association Rules in Relational Databases, M. Houtsma and A. Swami
@InProceedings{houtsma.ea:set-oriented-association:95,
  author =       "M. Houtsma and A. Swami",
  title =        "Set-Oriented Mining for Association Rules in
                 Relational Databases",
  pages =        "25--34",
  editor =       "P. S. Yu and A. L. P. Chen",
  booktitle =    "Proceedings of the 11th International Conference on
                 Data Engineering",
  month =        mar,
  publisher =    "IEEE Computer Society Press",
  address =      "Los Alamitos, CA, USA",
  year =         "1995",
}

Set-oriented data mining in relational databases, M. Houtsma and A. Swami
@Article{houtsma.ea:set-oriented-relational:95,
  author =       "M. Houtsma and A. Swami",
  address =      "Telemat Res Ctr, Pob 217, 7500 Ae Enschede,
                 Netherlands Univ Twente, 7500 Ae Enschede, Netherlands
                 Ibm Corp, Almaden Res Ctr, San Jose, Ca",
  title =        "Set-oriented data mining in relational databases",
  journal =      "Data \& Knowledge Engineering",
  year =         "1995",
  volume =       "17",
  issue =        "3",
  pages =        "245--262",
  abstract =     "Data mining is an important real-life application for
                 businesses. It is critical to find efficient ways of
                 mining large data sets. In order to benefit from the
                 experience with relational databases, a set-oriented
                 approach to mining data is needed. In such an approach,
                 the data mining operations are expressed in terms of
                 relational or set-oriented operations. Query
                 optimization technology can then be used for efficient
                 processing. In this paper, we describe set- oriented
                 algorithms for mining association rules. Such
                 algorithms imply performing multiple joins and thus may
                 appear to be inherently less efficient than special-
                 purpose algorithms. We develop new algorithms that can
                 be expressed as SQL queries, and discuss optimization
                 of these algorithms. After analytical evaluation, an
                 algorithm named SETM emerges as the algorithm of
                 choice. Algorithm SETM uses only simple database
                 primitives, viz., sorting and merge-scan join.
                 Algorithm SETM is simple, fast, and stable over the
                 range of parameter values. It is easily parallelized
                 and we suggest several additional optimizations. The
                 set-oriented nature of Algorithm SETM makes it possible
                 to develop extensions easily and its performance makes
                 it feasible to build interactive data mining tools for
                 large databases.",
  keywords =     "DATA MINING, OPTIMIZATION, SET-ORIENTED ALGORITHMS",
}

Learning in relational databases - a rough set approach, X. H. Hu and N. Cercone
@Article{hu.ea:learning-relational:95,
  author =       "X. H. Hu and N. Cercone",
  address =      "Univ Regina, Dept Comp Sci, Regina, Sk S4S 0A2,
                 Canada",
  title =        "Learning in relational databases - a rough set
                 approach",
  journal =      "Computational Intelligence",
  year =         "1995",
  volume =       "11",
  issue =        "2",
  pages =        "323--338",
  abstract =     "Knowledge discovery in databases, or data mining, is
                 an important direction in the development of data and
                 knowledge- based systems. Because of the huge amount of
                 data stored in large numbers of existing databases, and
                 because the amount of data generated in electronic
                 forms is growing rapidly, it is necessary to develop
                 efficient methods to extract knowledge from databases.
                 An attribute- oriented rough set approach has been
                 developed for knowledge discovery in databases. The
                 method integrates machine-learning paradigm, especially
                 learning-from-examples techniques, with rough set
                 techniques. An attribute-oriented concept tree
                 ascension technique is first applied in generalization,
                 which substantially reduces the computational
                 complexity of database learning processes. Then the
                 cause-effect relationship among the attributes in the
                 database is analyzed using rough set techniques, and
                 the unimportant or irrelevant attributes are
                 eliminated. Thus concise and strong rules with little
                 or no redundant information can be learned efficiently.
                 Our study shows that attribute-oriented induction
                 combined with rough set theory provide an efficient and
                 effective mechanism for knowledge discovery in database
                 systems.",
  keywords =     "KNOWLEDGE DISCOVERY IN DATABASES, MACHINE LEARNING,
                 ROUGH SET, ATTRIBUTE-ORIENTED INDUCTION",
}

Mining Knowledge Rules from Databases: A Rough Set Approach, X. Hu and N. Cercone
@InProceedings{hu.ea:rules-rough:96,
  author =       "X. Hu and N. Cercone",
  title =        "Mining Knowledge Rules from Databases: {A} Rough Set
                 Approach",
  pages =        "96--105",
  booktitle =    "Proceedings of the 12th International Conference on
                 Data Engineering",
  month =        feb,
  publisher =    "IEEE Computer Society",
  address =      "Washington - Brussels - Tokyo",
  year =         "1996",
}

Knowledge Discovery in Databases: An Attribute-Oriented Rough Set Approach, Tony Xiaohua Hu
Available as
compressed postscript.
@PhdThesis{hu:attribute-oriented-rough:,
  title =        "Knowledge Discovery in Databases: An
                 Attribute-Oriented Rough Set Approach",
  author =       "Tony Xiaohua Hu",
  URL =          "http://www.cs.bham.ac.uk/~anp/dm_docs/tony_xiaohua_hu.ps.gz",
}

Conceptual Clustering and Concept Hierarchies in Knowledge Discovery., Xiaohua Hu
Available as
compressed postscript.
@MastersThesis{hu:conceptual-clustering:93,
  URL =          "ftp://fas.sfu.ca/pub/cs/theses/1993/XiaohuaHuMSc.ps.Z",
  title =        "Conceptual Clustering and Concept Hierarchies in
                 Knowledge Discovery.",
  author =       "Xiaohua Hu",
  year =         "1993",
  month =        jan,
  abstract =     "Knowledge discovery is the nontrivial extraction of
                 implicit, previously unknown, and potentially useful
                 information from data. Knowledge discovery from a
                 database is a form of machine learning where the
                 discovered knowledge is represented in a high-level
                 language. The growth in the size and number of existing
                 databases far exceeds human abilities to analyse the
                 data, which creates both a need and an opportunity for
                 extracting knowledge from databases. In this thesis, I
                 propose two algorithms for knowledge discovery in
                 database systems. One algorithm finds knowledge rules
                 associated with concepts in the different levels of the
                 conceptual hierarchy; the algorithm is developed based
                 on earlier attribute-oriented conceptual ascension
                 techniques. The other algorithm combines a conceptual
                 clustering technique and machine learning. It can find
                 three kinds of rules, characteristic rules, inheritance
                 rules, and domain knowledge, even in the absence of a
                 conceptual hierarchy. The two algorithms are
                 implemented as a component of the database learning
                 system (DBLEARN) using C under Sybase/Unix environment.
                 The test of DBLEARN on NSERC's grant information system
                 shows that our method can discover many meaningful
                 knowledge rules very quickly. The application of
                 knowledge discovery in database is very wide. I will
                 discuss how to apply DBLEARN to a lot of
                 data-intensified areas such as Hospital's patient
                 information system, customer database of telephone
                 company , airplane company and bank, inventory system
                 of department store and so on to find some intesesting
                 rules hidden among the data, and how the people in
                 these companies can use these learned rules to help
                 them.",
  annote =       "M.Sc Thesis. From Simon Fraser University. The
                 supervisiors are Nick Cercone and Jiawei Han. It
                 discusses extensions to DBLearn. 88pages.",
}

Object aggregation and cluster identification - a knowledge discovery approach, X. H. Hu
@Article{hu:object-aggregation:94,
  author =       "X. H. Hu",
  address =      "Univ Regina, Dept Comp Sci, Regina S4S 0A2,
                 Saskatchewan, Canada",
  title =        "Object aggregation and cluster identification - a
                 knowledge discovery approach",
  journal =      "Applied Mathematics Letters",
  year =         "1994",
  volume =       "7",
  issue =        "4",
  pages =        "29--34",
  abstract =     "A method for object aggregation and cluster
                 identification has been proposed for knowledge
                 discovery in databases. By integrating conceptual
                 clustering and machine learning (especially
                 learning-from- examples) paradigms, the method
                 classifies the data into different clusters, extracts
                 the characteristics of each cluster, and discovers
                 knowledge rules based on the relationships among
                 different clusters. Different kinds of knowledge rules,
                 including hierarchical, equivalence an inheritance
                 rules can be discovered efficiently.",
  keywords =     "KNOWLEDGE DISCOVERY IN DATABASES, CONCEPTUAL
                 CLUSTERING",
}

From Large to Huge: A Statistician's Reactions to KDD \& DM, Peter J. Huber
@InProceedings{huber:large-to:97,
  title =        "From Large to Huge: {A} Statistician's Reactions to
                 {KDD} \& {DM}",
  author =       "Peter J. Huber",
  pages =        "304",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Knowledge acquisition planning for inference from large databases., L. Hunter
@Article{hunter:acquisition-planning:90,
  author =       "L. Hunter",
  title =        "Knowledge acquisition planning for inference from
                 large databases.",
  journal =      "Hawaii Int. Conf. Sys. Sci-23",
  volume =       "2",
  pages =        "35--44",
  publisher =    "IEEE",
  year =         "1990",
  keywords =     "AI, database data base, mining, HICSS HICSS23
                 HICSS90",
}

A Radial Basis Function Approach to Financial Time Series Analysis, James M. Hutchinson
Available as
compressed postscript.
@TechReport{hutchinson:radial-basis:93,
  title =        "A Radial Basis Function Approach to Financial Time
                 Series Analysis",
  author =       "James M. Hutchinson",
  institution =  "Artificial Intelligence Laboratory, Massachusetts
                 Institute of Technology (MIT)",
  address =      "Cambridge, Massachusetts",
  month =        dec,
  year =         "1993",
  pages =        "160",
  URL =          "ftp://publications.ai.mit.edu/ai-publications/1000-1499/AITR-1457.ps.Z",
  abstract =     "Nonlinear multivariate statistical techniques on fast
                 computers offer the potential to capture more of the
                 dynamics of the high dimensional, noisy systems
                 underlying financial markets than traditional models,
                 while making fewer restrictive assumptions. This thesis
                 presents a collection of practical techniques to
                 address important estimation and confidence issues for
                 Radial Basis Function networks arising from such a data
                 driven approach, including efficient methods for
                 parameter estimation and pruning, a pointwise
                 prediction error estimator, and a methodology for
                 controlling the ``data mining'' problem. Novel
                 applications in the finance area are described,
                 including customized, adaptive option pricing and stock
                 price prediction.",
}

Data Mining: Extending the Information Warehouse Framework,
Available as
datamine.
@Misc{ibm:white-paper,
  key =          "ibm:white-paper",
  title =        "Data Mining: Extending the Information Warehouse
                 Framework",
  note =         "IBM white paper on data mining",
  URL =          "http://booksrv2.raleigh.ibm.com/cgi-bin/bookmgr/bookmgr.cmd/BOOKS/datamine",
}

Mining Data, IceBreaker
Available as
ice_it.htm.
@Unpublished{icebreaker:mining-data:96,
  title =        "Mining Data",
  author =       "IceBreaker",
  URL =          "http://www.bdt.com/icemfg/ice_it.htm",
  keywords =     "Data Mining",
  month =        sep,
  year =         "1996",
}

Discovering Knowledge in Commercial Databases Using Modern Heuristic Techniques, B. de la Iglesia and J. C. W. Debuse and V. J. Rayward-Smth
@InProceedings{iglesia.ea:discovering-commercial:96,
  title =        "Discovering Knowledge in Commercial Databases Using
                 Modern Heuristic Techniques",
  pages =        "44",
  author =       "B. de la Iglesia and J. C. W. Debuse and V. J.
                 Rayward-Smth",
  crossref =     "simoudis.ea:proceedings-second:96",
}

IJIS Special issue on Knowledge Discovery in Databases and Knowledge Bases, G. Piatetsky-Shapiro (guest editor) (Ed)
@Article{ijis-special-issue:92,
  key =          "ijis-special-issue:92",
  title =        "{IJIS} Special issue on Knowledge Discovery in
                 Databases and Knowledge Bases",
  journal =      "International Journal of Intelligent Systems",
  year =         "1992",
  volume =       "7",
  number =       "7",
  month =        sep,
  editor =       "G. Piatetsky-Shapiro (guest editor)",
  note =         "Special issue on Knowledge Discovery in Databases and
                 Knowledge Bases, edited selection of best papers from
                 AAAI KDD-91 workshop",
}

A database perspective on knowledge discovery, T. Imielinski and H. Mannila
@Article{imielinski.ea:database-perspective:96,
  author =       "T. Imielinski and H. Mannila",
  address =      "Rutgers State Univ, Dept Comp Sci, New Brunswick, Nj,
                 08903 Univ Helsinki, Fin-00014 Helsinki, Finland",
  title =        "A database perspective on knowledge discovery",
  journal =      "Comm. Of The Acm",
  year =         "1996",
  volume =       "39",
  issue =        "11",
  pages =        "58--64",
}

DataMine: Application Programming Interface and Query Language for Database Mining, Tomasz Imielinski and Aashu Virmani and Amin Abdulghani
@InProceedings{imielinski.ea:datamine-application:96,
  title =        "DataMine: Application Programming Interface and Query
                 Language for Database Mining",
  pages =        "256",
  author =       "Tomasz Imielinski and Aashu Virmani and Amin
                 Abdulghani",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Understanding Data Pattern Processing, W. H. Inmon and S. Osterfelt
@Book{inmon.ea:understanding-pattern:91,
  title =        "Understanding Data Pattern Processing",
  author =       "W. H. Inmon and S. Osterfelt",
  publisher =    "QED Techincal Publishing Group",
  year =         "1991",
  address =      "Wellesley, MA.",
  annote =       "Piatetsky : a business-oriented, nontechnical book",
}

The Data Warehouse and Data Mining, W. H. Inmon
@Article{inmon:warehouse:96,
  author =       "W. H. Inmon",
  title =        "The Data Warehouse and Data Mining",
  journal =      "Communications of the ACM",
  volume =       "39",
  number =       "11",
  pages =        "49--50",
  month =        nov,
  year =         "1996",
  ISSN =         "0001-0782",
}

Discovery of Relevant New Features by Generating Non-Linear Decision Trees, Andreas Ittner and Michael Schlosser
@InProceedings{ittner.ea:relevant-new:96,
  title =        "Discovery of Relevant New Features by Generating
                 Non-Linear Decision Trees",
  pages =        "108",
  author =       "Andreas Ittner and Michael Schlosser",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Data-Driven Discovery of Quantitative Ruels in Relational Databases, Han. J. and Y. Cai and N. Cercone
@Article{j.ea:data-driven-quantitative:93,
  author =       "Han. J. and Y. Cai and N. Cercone",
  title =        "Data-Driven Discovery of Quantitative Ruels in
                 Relational Databases",
  journal =      "IEEE Transactions on Knowledge and Data Engineering",
  pages =        "29--40",
  volume =       "5",
  number =       "1",
  month =        feb,
  year =         "1993",
}

Adjusting for Multiple Comparisons in Decision Tree Pruning, David Jensen and Matt Schmill
@InProceedings{jensen.ea:adjusting-multiple:97,
  title =        "Adjusting for Multiple Comparisons in Decision Tree
                 Pruning",
  author =       "David Jensen and Matt Schmill",
  pages =        "195",
  crossref =     "heckerman.ea:proceedings-third:97",
}

SIPping from the Data Firehose, George H. John and Brian Lent
@InProceedings{john.ea:sipping-firehose:97,
  title =        "{SIP}ping from the Data Firehose",
  author =       "George H. John and Brian Lent",
  pages =        "199",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Static Versus Dynamic Sampling for Data Mining, George H. John and Pat Langley
@InProceedings{john.ea:static-versus:96,
  title =        "Static Versus Dynamic Sampling for Data Mining",
  pages =        "367",
  author =       "George H. John and Pat Langley",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Stock selection using rule induction, G. H. John and P. Miller and R. Kerber
@Article{john.ea:stock-selection:96,
  author =       "G. H. John and P. Miller and R. Kerber",
  address =      "Ibm Corp, Data Min Grp, Armonk, Ny, 10504 Stanford
                 Univ, Dept Comp Sci, Stanford, Ca, 94305 Lockheed
                 Martin Corp, Ctr Artificial Intelligence, Palo Alto,
                 Ca, 94304",
  title =        "Stock selection using rule induction",
  journal =      "Ieee Expert-Intelligent Systems \& Their
                 Applications",
  year =         "1996",
  volume =       "11",
  issue =        "5",
  pages =        "52--58",
}

Genetic-algorithm-based learning, Kenneth De Jong
@InCollection{jong:genetic-algorithm-based-learning:90,
  author =       "Kenneth De Jong",
  title =        "Genetic-algorithm-based learning",
  crossref =     "kodratoff.ea:machine-learning:90",
  pages =        "611--638",
}

Seer: Maximum Likelihood Regression for Learning-Speed Curves, Ph.D. Carl Myers Kadie
Available as
compressed postscript.
@PhdThesis{kadie:seer-maximum:,
  author =       "Ph.D. Carl Myers Kadie",
  title =        "Seer: Maximum Likelihood Regression for Learning-Speed
                 Curves",
  URL =          "ftp://ftp.cs.uiuc.edu/pub/TechReports/UIUCDCS-R-95-1874.ps.Z",
  school =       "Department of Computer Science, University of Illinois
                 at Urbana-Champaign.",
  annote =       "The research presented here focuses on modeling
                 machine-learning performance",
}

Mining Generalized Term Associations: Count Propagation Algorithm, Wen-Hsiang Kevin Liao Jonghyun Kahng and Dennis McLeod
@InProceedings{kahng.ea:generalized-term:97,
  title =        "Mining Generalized Term Associations: Count
                 Propagation Algorithm",
  author =       "Wen-Hsiang Kevin Liao Jonghyun Kahng and Dennis
                 McLeod",
  pages =        "203",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Metarule-Guided Mining of Multi-Dimensional Association Rules Using Data Cubes, Micheline Kamber and Jiawei Han and Jenny Y. Chiang
@InProceedings{kamber.ea:metarule-guided-multi-dimensional:97,
  title =        "Metarule-Guided Mining of Multi-Dimensional
                 Association Rules Using Data Cubes",
  author =       "Micheline Kamber and Jiawei Han and Jenny Y. Chiang",
  pages =        "207",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Discovering functional and inclusion dependencies in relational databases, M. Kantola and H. Mannila and K. J. Raiha and H. Siirtola
@Article{kantola.ea:discovering-functional:92,
  author =       "M. Kantola and H. Mannila and K. J. Raiha and H.
                 Siirtola",
  address =      "Univ Tampere, Tampere, Finland Univ Helsinki, Sf-00100
                 Helsinki 10, Finland",
  title =        "Discovering functional and inclusion dependencies in
                 relational databases",
  journal =      "International J. Of Intelligent Systems",
  year =         "1992",
  volume =       "7",
  issue =        "7",
  pages =        "591--607",
  abstract =     "We consider the problem of discovering the functional
                 and inclusion dependencies that a given database
                 instance satisfies. This technique is used in a
                 database design tool that uses example databases to
                 give feedback to the designer. If the examples show
                 deficiencies in the design, the designer can directly
                 modify the examples. The tool then infers new
                 dependencies and the database schema can be modified,
                 if necessary. The discovery of the functional and
                 inclusion dependencies can also be used in analyzing an
                 existing database. The problem of inferring functional
                 dependencies has several connections to other topics in
                 knowledge discovery and machine learning. In this
                 article we discuss the use of examples in the design of
                 databases, and give an overview of the complexity
                 results and algorithms that have been developed for
                 this problem.",
  keywords =     "DESIGN",
}

Scalable, Distributed Data Mining-An Agent Architecture, Hillol Kargupta and Ilker Hamzaoglu and Brian Stafford
@InProceedings{kargupta.ea:scalable-distributed:97,
  title =        "Scalable, Distributed Data Mining-An Agent
                 Architecture",
  author =       "Hillol Kargupta and Ilker Hamzaoglu and Brian
                 Stafford",
  pages =        "211",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Mining for Knowledge in Databases: Goals and General Description of the INLEN system, Kenneth A. Kaufman and Ryszard S. Michalski and Larry Kerschberg
@InCollection{kaufman.ea:goals-general:91,
  crossref =     "piatetsky-shapiro.ea:knowledge-discovery:91",
  editor =       "Gregory Piatetsky-Shapiro and William J. Frawley",
  booktitle =    "Knowledge Discovery in Databases",
  publisher =    "AAAI Press / The MIT Press",
  address =      "Menlo Park, California",
  edition =      "1st",
  year =         "1991",
  author =       "Kenneth A. Kaufman and Ryszard S. Michalski and Larry
                 Kerschberg",
  title =        "Mining for Knowledge in Databases: Goals and General
                 Description of the {INLEN} system",
}

A Method for Reasoning with Structured and Continuous Attributes in the INLEN-2 Multistrategy Knowledge Discovery System, Kenneth A. Kaufman and Ryszard S. Michalski
@InProceedings{kaufman.ea:method-reasoning:96,
  title =        "A Method for Reasoning with Structured and Continuous
                 Attributes in the {INLEN}-2 Multistrategy Knowledge
                 Discovery System",
  pages =        "232",
  author =       "Kenneth A. Kaufman and Ryszard S. Michalski",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Reverse Engineering Databases for Knowledge Discovery, Stephen Mc Kearney and Huw Roberts
@InProceedings{kearney.ea:reverse-engineering:96,
  title =        "Reverse Engineering Databases for Knowledge
                 Discovery",
  pages =        "375",
  author =       "Stephen Mc Kearney and Huw Roberts",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Supporting Data Mining of Large Databases by Visual Feedback Queries, D. A. Keim and H.-P. Kriegel and T. Seidl
@TechReport{keim.ea:supporting-large:93,
  author =       "D. A. Keim and H.-P. Kriegel and T. Seidl",
  title =        "Supporting Data Mining of Large Databases by Visual
                 Feedback Queries",
  address =      "Muenchen",
  year =         "1993",
  descriptor =   "Anfrage-Bearbeitung, Benutzerschnittstelle, Datenbank,
                 Feedback, Visualisierungskomponente",
}

Supporting Data Mining of Large Databases by Visual Feedback Queries, D. A. Keim and H.-P. Kriegel and T. Seidl
Available as
postscript.
@InProceedings{keim.ea:supporting-large:94,
  author =       "D. A. Keim and H.-P. Kriegel and T. Seidl",
  title =        "Supporting Data Mining of Large Databases by Visual
                 Feedback Queries",
  pages =        "302--313",
  editor =       "Ahmed K. Elmagarmid and Erich Neuhold",
  booktitle =    "Proceedings of the 10th International Conference on
                 Data Engineering",
  address =      "Houston, TX",
  month =        feb,
  year =         "1994",
  URL =          "http://www.dbs.informatik.uni-muenchen.de/dbs/projekt/papers/datamining.ps",
  publisher =    "IEEE Computer Society Press",
}

Visualization techniques for mining large databases: a comparison, D. A. Keim and H. P. Kriegel
@Article{keim.ea:techniques-large:96,
  author =       "D. A. Keim and H. P. Kriegel",
  address =      "Univ Munich, Inst Comp Sci, Oettingenstr 67, D-80538
                 Munich, Germany",
  title =        "Visualization techniques for mining large databases: a
                 comparison",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1996",
  month =        dec,
  volume =       "8",
  issue =        "6",
  pages =        "923--938",
  abstract =     "Visual data mining techniques have proven to be of
                 high value in exploratory data analysis, and they also
                 have a high potential for mining large databases. In
                 this article, we describe and evaluate a new
                 visualization-based approach to mining large databases.
                 The basic idea of our visual data mining techniques is
                 to represent as many data items as possible on the
                 screen at the same time by mapping each data value to a
                 pixel of the screen and arranging the pixels
                 adequately. The major goal of this article is to
                 evaluate our visual data mining techniques and to
                 compare them to other well-known visualization
                 techniques for multidimensional data. the parallel
                 coordinate and stick figure visualization techniques.
                 For the evaluation of visual data mining techniques, in
                 the first place the perception of properties of the
                 data counts, and only in the second place the CPU time
                 and the number of secondary storage accesses are
                 important. In addition to testing the visualization
                 techniques using real data, we developed a testing
                 environment for database visualizations similar to the
                 benchmark approach used for comparing the performance
                 of database systems. The testing environment allows the
                 generation of test data sets with predefined data
                 characteristics which are important for comparing the
                 perceptual abilities of visual data mining
                 techniques.",
  keywords =     "SPACE, data mining, explorative data analysis,
                 visualizing large databases, visualizing
                 multidimensional, multivariate data",
}

Using Visualization to Support Data Mining of Large Existing Databases, D. A. Keim and H.-P. Kriegel
@Article{keim.ea:using-to:94,
  author =       "D. A. Keim and H.-P. Kriegel",
  title =        "Using Visualization to Support Data Mining of Large
                 Existing Databases",
  journal =      "Lecture Notes in Computer Science",
  volume =       "871",
  pages =        "210--??",
  year =         "1994",
  ISSN =         "0302-9743",
}

VisDB: Database Exploration using Multidimensional Visualization, D. A. Keim and H. Kriegel
Available as
postscript.
@Article{keim.ea:visdb-database:94,
  author =       "D. A. Keim and H. Kriegel",
  title =        "Vis{DB}: Database Exploration using Multidimensional
                 Visualization",
  journal =      "Computer Graphics and Applications",
  year =         "1994",
  URL =          "http://www.dbs.informatik.uni-muenchen.de/dbs/projekt/papers/visdb.ps",
}

Databases and Visualization, D. A. Keim
Available as
postscript.
@InProceedings{keim:databases-and:96,
  author =       "D. A. Keim",
  title =        "Databases and Visualization",
  note =         "Tutorial",
  booktitle =    "Proc. ACM SIGMOD Int. Conf. on Management of Data",
  address =      "Montreal, Canada",
  year =         "1996",
  URL =          "http://www.dbs.informatik.uni-muenchen.de/~daniel/Sigmod96TutorialNotes.ps",
  annote =       "Comprehensive tutorial on Database visualisation for
                 exploratory analysis",
}

Pixel-Oriented Database Visualizations, D. A. Keim
@Article{keim:pixel-oriented-database:96,
  author =       "D. A. Keim",
  title =        "Pixel-Oriented Database Visualizations",
  journal =      "SIGMOD Record (ACM Special Interest Group on
                 Management of Data)",
  volume =       "25",
  number =       "4",
  pages =        "35--39",
  month =        dec,
  year =         "1996",
}

Pixel-oriented Visualization Techniques for Exploring Very Large Databases, D. A. Keim
Available as
postscript.
@Article{keim:pixel-oriented-techniques:96,
  author =       "D. A. Keim",
  title =        "Pixel-oriented Visualization Techniques for Exploring
                 Very Large Databases",
  journal =      "Journal of Computational and Graphical Statistics",
  number =       "March",
  year =         "1996",
  URL =          "http://www.dbs.informatik.uni-muenchen.de/dbs/projekt/papers/StatisticsPaper.ps",
}

A Probabilistic Approach to Fast Pattern Matching in Time Series Databases, Eamonn Keogh and Padhraic Smyth
@InProceedings{keogh.ea:probabilistic-approach:97,
  title =        "A Probabilistic Approach to Fast Pattern Matching in
                 Time Series Databases",
  author =       "Eamonn Keogh and Padhraic Smyth",
  pages =        "24",
  crossref =     "heckerman.ea:proceedings-third:97",
}

On the symbiosis of a data mining environment and a DBMS, Martin L. Kersten and Marcel Holsheimer
Available as
compressed postscript.
@InCollection{kersten.ea:on-symbiosis:95,
  author =       "Martin L. Kersten and Marcel Holsheimer",
  title =        "On the symbiosis of a data mining environment and a
                 {DBMS}",
  pages =        "12",
  publisher =    "Centrum voor Wiskunde en Informatica (CWI)",
  address =      "ISSN 0169-118X",
  month =        mar # " 30",
  year =         "1995",
  keywords =     "data mining, parallel databases, knoewledge discovery
                 in databases.",
  URL =          "ftp://ftp.cwi.nl/pub/CWIreports/AA/CS-R9521.ps.Z",
  abstract =     "One of the main obstacles in applying data mining
                 techniques to large, real-world databases is the lack
                 of efficient data management. In this paper, we outline
                 a two-level architecture, consisting of a mining tool
                 and a database server. Key elements in its success are
                 a clear separation of concerns: the mining tool
                 organizes and controls the search process, while all
                 data-handling is performed by the parallel main memory
                 DBMS. Data is stored as a set of binary tables. The
                 interaction consists of queries for statistical
                 information. Properties of the DBMS and the search
                 algorithm are exploited for optimization of the data
                 handling. In particular, results of previous
                 computations are re-used, and I/O activity is reduced
                 by keeping a small hot-set of binary tables in
                 main-memory. As test results show, this system handles
                 large datasets at a competitive performance.",
  note =         "AA (Department of Algorithmics and Architecture)",
  annote =       "Originally contained the fields and values -
                 note,CS-R9521 booktitle,92",
}

Clustering Sequences of Complex Objects, A. Ketterlin
@InProceedings{ketterlin:clustering-sequences:97,
  title =        "Clustering Sequences of Complex Objects",
  author =       "A. Ketterlin",
  pages =        "215",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Privacy and knowledge discovery - a response, Y. T. Khaw and H. Y. Lee
@Article{khaw.ea:privacy-response:95,
  author =       "Y. T. Khaw and H. Y. Lee",
  address =      "Natl Comp Board, Inst Informat Technol, Ncb Bldg, 71
                 Sci Pk Dr, Singapore 0511, Singapore",
  title =        "Privacy and knowledge discovery - a response",
  journal =      "Ieee Expert-Intelligent Systems \& Their
                 Applications",
  year =         "1995",
  volume =       "10",
  issue =        "2",
  pages =        "58--58",
}

Learning rules with local exceptions, Jyrki Kivinen and Heikki Mannila and Esko Ukkonen
@TechReport{kivinen.ea:learning-rules:93,
  author =       "Jyrki Kivinen and Heikki Mannila and Esko Ukkonen",
  title =        "Learning rules with local exceptions",
  year =         "1993",
  institution =  "University of Helsinki",
}

Finding interesting rules from large sets of discovered association rules, Mika Klemettinen and Heikki Mannila and Pirjo Ronkainen and Hannu Toivonen and A. Inkeri Verkamo
Available as
compressed postscript.
@InProceedings{klemettinen.ea:finding-interesting:94,
  author =       "Mika Klemettinen and Heikki Mannila and Pirjo
                 Ronkainen and Hannu Toivonen and A. Inkeri Verkamo",
  booktitle =    "Third International Conference on Information and
                 Knowledge Management (CIKM'94)",
  title =        "Finding interesting rules from large sets of
                 discovered association rules",
  year =         "1994",
  URL =          "ftp://ftp.cs.helsinki.fi/pub/Reports/by_Project/PMDM/Finding_Interesting_Rules_from_Large_Sets_of_Discovered_Association_Rules.ps.gz",
  editor =       "Nabil R. Adam and Bharat K. Bhargava and Yelena
                 Yesha",
  keywords =     "Knowledge discovery, Data mining, Association rules,
                 Rule selection, Visualization",
  month =        nov,
  pages =        "401--407",
  publisher =    "ACM Press",
  abstract =     "Association rules, introduced by Agrawal, Imielinski,
                 and Swami, are rules of the form ``for 90 \% of the
                 rows of the relation, if the row has value 1 in the
                 columns in set $W$, then it has 1 also in column $B$''.
                 Efficient methods exist for discovering association
                 rules from large collections of data. The number of
                 discovered rules can, however, be so large that
                 browsing the rule set and finding interesting rules
                 from it can be quite difficult for the user. We show
                 how a simple formalism of {\em rule templates} makes it
                 possible to easily describe the structure of
                 interesting rules. We also give examples of
                 visualization of rules, and show how a visualization
                 tool interfaces with rule templates.",
}

Knowledge discovery in databases and data mining, W. Kloesgen
@Article{kloesgen:knowledge-discovery:96,
  author =       "W. Kloesgen",
  title =        "Knowledge discovery in databases and data mining",
  journal =      "Lecture Notes in Computer Science",
  volume =       "1079",
  pages =        "623--??",
  year =         "1996",
  ISSN =         "0302-9743",
}

Efficient Discovery of Interesting statements in Databases,
@TechReport{klosgen:efficient-interesting:93,
  author =       "Willi Kl{\"o}sgen",
  title =        "Efficient Discovery of Interesting statements in
                 Databases",
  institution =  "GMD",
  year =         "1993",
}

Problems for knowledge discovery in databases and their treatment in the statistics interpreter explora, W. Klosgen
@Article{klosgen:problems-their:92,
  crossref =     "ijis-special-issue:92",
  author =       "W. Klosgen",
  address =      "German Natl Res Ctr Comp Sci, St Augustin 1, Germany",
  title =        "Problems for knowledge discovery in databases and
                 their treatment in the statistics interpreter explora",
  journal =      "International J. Of Intelligent Systems",
  year =         "1992",
  volume =       "7",
  issue =        "7",
  pages =        "649--673",
  abstract =     "In this article we describe some goals and problems of
                 KDD. Approaches are presented which have been
                 implemented in the Statistics Interpreter Explora, a
                 prototype assistant system for discovering interesting
                 findings in recurrent datasets. We introduce patterns
                 to identify what is interesting in data and give some
                 examples of patterns for difference-, change-, and
                 trend-detection. Then we summarize what must be
                 specified to define a pattern. Besides some descriptive
                 parts, this includes a procedural verification method.
                 Object-oriented programming techniques can simplify the
                 specializations of general patterns. We identify search
                 as a constituent principle of discovery and introduce
                 object structures as a basis to induce a graph
                 structure on the search space. We mention several
                 strategies for graph search and describe approaches for
                 dealing with the aggregation, redundancy, and
                 overlapping problems. Then we address the presentation
                 of findings in natural language and graphical form,
                 focusing on the methods to design good graphical
                 presentations by knowledge-based techniques. Finally,
                 we discuss the paradigm of an adaptive discovery
                 assistant, including the problem of how to reuse the
                 discovered knowledge for further discovery.",
}

What makes a compelling empirical-evaluation, K. Knight
@Article{knight:what-makes:96,
  author =       "K. Knight",
  address =      "Univ So Calif, Inst Sci Informat, 4676 Admiralty Way,
                 Marina Del Rey, Ca, 90292 Univ Massachusetts, Dept Comp
                 Sci, Amherst, Ma, 01003 Inst Study Learning \&
                 Expertise, Palo Alto, Ca, 94306 Stanford Univ,
                 Stanford, Ca, 94305",
  title =        "What makes a compelling empirical-evaluation",
  journal =      "Ieee Expert-Intelligent Systems \& Their
                 Applications",
  year =         "1996",
  volume =       "11",
  issue =        "5",
  pages =        "10--14",
}

Analysing Binary Associations, Arno J. Knobbe and Pieter W. Adriaans
@InProceedings{knobbe.ea:analysing-binary:96,
  title =        "Analysing Binary Associations",
  pages =        "311",
  author =       "Arno J. Knobbe and Pieter W. Adriaans",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Extraction of Spatial Proximity Patterns by Concept Generalization, Edwin M. Knorr and Raymond T. Ng
@InProceedings{knorr.ea:extraction-spatial:96,
  title =        "Extraction of Spatial Proximity Patterns by Concept
                 Generalization",
  pages =        "347",
  author =       "Edwin M. Knorr and Raymond T. Ng",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Finding aggregate proximity relationships and commonalities in spatial data mining, E. M. Knorr and R. T. Ng
@Article{knorr.ea:finding-aggregate:96,
  author =       "E. M. Knorr and R. T. Ng",
  address =      "Univ British Columbia, Dept Comp Sci, 2366 Main Mall,
                 Vancouver, Bc V6T 1Z4, Canada",
  title =        "Finding aggregate proximity relationships and
                 commonalities in spatial data mining",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1996",
  month =        dec,
  volume =       "8",
  issue =        "6",
  pages =        "884--897",
  abstract =     "In this paper, we study two spatial knowledge
                 discovery problems involving proximity relationships
                 between clusters and features. The first problem is:
                 Given a cluster of points, how can we efficiently find
                 features (represented as polygons) that are closest to
                 the majority of points in the cluster? We measure
                 proximity in an aggregate sense due to the nonuniform
                 distribution of points in a cluster (e.g., houses on a
                 map), and the different shapes and sizes of features
                 (e.g., natural or man-made geographic features). The
                 second problem is: Given n clusters of points, how can
                 we extract the aggregate proximity commonalities (i.e.,
                 features) that apply to most, if not all, of the n
                 clusters? Regarding the first problem, the main
                 contribution of the paper is the development of
                 Algorithm CRH which uses geometric approximations
                 (i.e., circles, rectangles, and convex hulls) to filter
                 and select features. Highly scalable and incremental,
                 Algorithm CRH can examine over 50,000 features and
                 their spatial relationships with a given cluster in
                 approximately one second of CPU time. Regarding the
                 second problem, the key contribution is the development
                 of Algorithm GenCom that makes use of concept
                 generalization to effectively derive many meaningful
                 commonalities that cannot be found otherwise.",
  keywords =     "spatial knowledge discovery, concept generalization,
                 proximity relationships, geometric filtering, GIS",
}

A Unified Notion of Outliers: Properties and Computation, Edwin M. Knorr and Raymond T. Ng
@InProceedings{knorr.ea:unified-notion:97,
  title =        "A Unified Notion of Outliers: Properties and
                 Computation",
  author =       "Edwin M. Knorr and Raymond T. Ng",
  pages =        "219",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Machine Learning, an Artificial Intelligence approach, Yves Kodratoff and Ryszard S. Michalski (Eds)
@Book{kodratoff.ea:machine-learning:90,
  editor =       "Yves Kodratoff and Ryszard S. Michalski",
  title =        "Machine Learning, an {Artificial Intelligence}
                 approach",
  publisher =    "Morgan Kaufmann",
  year =         "1990",
  volume =       "3",
  address =      "San Mateo, California",
}

Automatic Parameter Selection by Minimizing Estimated Error, Ron Kohavi and George John
Available as
ronnyk.
@InProceedings{kohavi.ea:automatic-parameter:95,
  author =       "Ron Kohavi and George John",
  title =        "Automatic Parameter Selection by Minimizing Estimated
                 Error",
  booktitle =    "Machine Learning: Proceedings of the Twelfth
                 International Conference",
  month =        jul,
  publisher =    "Morgan Kaufmann",
  editor =       "Armand Prieditis and Stuart Russell",
  pages =        "304--312",
  URL =          "http://robotics.stanford.edu/users/ronnyk",
  year =         "1995",
  contributedby = "Ronny Kohavi, ronnyk(at)sgi.com",
}

Bias Plus Variance Decomposition for Zero-One Loss Functions, Ron Kohavi and David H. Wolpert
Available as
ronnyk.
@InProceedings{kohavi.ea:bias-plus:96,
  author =       "Ron Kohavi and David H. Wolpert",
  title =        "Bias Plus Variance Decomposition for Zero-One Loss
                 Functions",
  booktitle =    "Machine Learning: Proceedings of the Thirteenth
                 International Conference",
  year =         "1996",
  publisher =    "Morgan Kaufmann",
  editor =       "Lorenza Saitta",
  pages =        "275--283",
  URL =          "http://robotics.stanford.edu/users/ronnyk",
  month =        jul,
  contributedby = "Ronny Kohavi, ronnyk(at)sgi.com",
}

Error-Based and Entropy-Based Discretization of Continuous Features, Ron Kohavi and Mehran Sahami
Available as
ronnyk.
@InProceedings{kohavi.ea:error-based-entropy-based:96,
  author =       "Ron Kohavi and Mehran Sahami",
  title =        "Error-Based and Entropy-Based Discretization of
                 Continuous Features",
  booktitle =    "Proceedings of the Second International Conference on
                 Knowledge Discovery and Data Mining",
  pages =        "114--119",
  URL =          "http://robotics.stanford.edu/users/ronnyk",
  url2 =         "ftp://starry.stanford.edu/pub/ronnyk/disc2.ps",
  year =         "1996",
  contributedby = "Ronny Kohavi, ronnyk(at)sgi.com",
  affiliation =  "Silicon Graphics Inc.; Stanford University",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Feature Subset Selection Using the Wrapper Method: Overfitting and Dynamic Search Space Topology, Ron Kohavi and Sommerfield Dan
Available as
postscript.
@InProceedings{kohavi.ea:feature-subset:95,
  author =       "Ron Kohavi and Sommerfield Dan",
  booktitle =    "First International Conference on Knowledge Discovery
                 and Data Mining (KDD-95)",
  title =        "Feature Subset Selection Using the Wrapper Method:
                 Overfitting and Dynamic Search Space Topology",
  year =         "1995",
  URL =          "ftp://starry.stanford.edu/pub/ronnyk/fssWrapper.ps",
  editor =       "Usama M Fayyad and Ramasamy Uthurusamy",
  keywords =     "feature subset selection relevant/irrelevant features
                 accuracy estimation, cross-validation",
  month =        aug,
}

Option Decision Trees with Majority Votes, Ron Kohavi and Clayton Kunz
Available as
ronnyk.
@InProceedings{kohavi.ea:option-decision:97,
  author =       "Ron Kohavi and Clayton Kunz",
  title =        "Option Decision Trees with Majority Votes",
  booktitle =    "Machine Learning: Proceedings of the Fourteenth
                 International Conference",
  year =         "1997",
  publisher =    "Morgan Kaufmann Publishers, Inc.",
  editor =       "Doug Fisher",
  URL =          "http://robotics.stanford.edu/users/ronnyk",
  month =        jul,
  contributedby = "Ronny Kohavi, ronnyk(at)sgi.com",
}

Data Mining Using MLC++: A Machine Learning Library in C++, Ron Kohavi and Dan Sommerfield and James Dougherty
Available as
compressed postscript.
@InProceedings{kohavi.ea:using-mlc:96,
  author =       "Ron Kohavi and Dan Sommerfield and James Dougherty",
  title =        "Data Mining Using {MLC}++: {A} Machine Learning
                 Library in {C}++",
  booktitle =    "Tools with Artificial Intelligence",
  year =         "1996",
  pages =        "234--245",
  note =         "Received the best paper award",
  publisher =    "IEEE Computer Society Press",
  URL =          "ftp://starry.stanford.edu/pub/ronnyk/mlc96.ps.Z",
  contributedby = "Ronny Kohavi, ronnyk(at)sgi.com",
}

Wrappers for Feature Subset Selection, Ron Kohavi and George H. John
Available as
ronnyk.
@Article{kohavi.ea:wrappers-feature:,
  author =       "Ron Kohavi and George H. John",
  title =        "Wrappers for Feature Subset Selection",
  journal =      "Artificial Intelligence",
  URL =          "http://robotics.stanford.edu/users/ronnyk",
  year =         "(to appear)",
  contributedby = "Ronny Kohavi, ronnyk(at)sgi.com",
}

Scaling Up the Accuracy of Naive-Bayes Classifiers: a Decision-Tree Hybrid, Ron Kohavi
Available as
postscript.
@InProceedings{kohavi:scaling-up:96,
  author =       "Ron Kohavi",
  title =        "Scaling Up the Accuracy of {N}aive-{B}ayes
                 Classifiers: a Decision-Tree Hybrid",
  booktitle =    "Proceedings of the Second International Conference on
                 Knowledge Discovery and Data Mining",
  url2 =         "http://robotics.stanford.edu/users/ronnyk",
  URL =          "ftp://starry.stanford.edu/pub/ronnyk/nbtree.ps",
  pages =        "202--207",
  year =         "1996",
  contributedby = "Ronny Kohavi, ronnyk(at)sgi.com",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Wrappers for Performance Enhancement and Oblivious Decision Graphs, Ron Kohavi
Available as
ronnyk.
@PhdThesis{kohavi:wrappers-performance:95,
  author =       "Ron Kohavi",
  title =        "Wrappers for Performance Enhancement and Oblivious
                 Decision Graphs",
  year =         "1995",
  address =      "Computer Science department",
  note =         "STAN-CS-TR-95-1560",
  URL =          "ftp://starry.stanford.edu/pub/ronnyk",
  school =       "Stanford University",
  contributedby = "Ronny Kohavi, ronnyk(at)sgi.com",
}

Predictive Data Mining with Finite Mixtures, Petri Kontkanen and Petri Myllymaki and Henry Tirri
@InProceedings{kontkanen.ea:predictive-with:96,
  title =        "Predictive Data Mining with Finite Mixtures",
  pages =        "176",
  author =       "Petri Kontkanen and Petri Myllymaki and Henry Tirri",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Discovery of spatial association rules in geographic information databases, K. Koperski and J. W. Han
@Article{koperski.ea:spatial-association:95,
  author =       "K. Koperski and J. W. Han",
  address =      "Simon Fraser Univ, Sch Comp Sci, Burnaby, Bc V5A 1S6,
                 Canada",
  title =        "Discovery of spatial association rules in geographic
                 information databases",
  journal =      "Lecture Notes In Computer Science",
  year =         "1995",
  volume =       "951",
  pages =        "47--66",
  abstract =     "Spatial data mining, i.e., discovery of interesting,
                 implicit knowledge in spatial databases, is an
                 important task for understanding and use of spatial
                 data- and knowledge- bases. In this paper, an efficient
                 method for mining strong spatial association rules in
                 geographic information databases is proposed and
                 studied. A spatial association rule is a rule
                 indicating certain association relationship among a set
                 of spatial and possibly some nonspatial predicates. A
                 strong rule indicates that the patterns in the rule
                 have relatively frequent occurrences in the database
                 and strong implication relationships. Several
                 optimization techniques are explored, including a
                 two-step spatial computation technique (approximate
                 computation on large sets, and refined computations on
                 small promising patterns), shared processing in the
                 derivation of large predicates at multiple concept
                 levels, etc. Our analysis shows that interesting
                 association rules can be discovered efficiently in
                 large spatial databases.",
}

Quantifiable Data Mining Using Principal Component Analysis, Flip Korn and Alexandros Labrinidis and Yannis Kotidis and Christos Faloutsos and Alex Kaplunovich and Dejan Perkovic
Available as
compressed postscript.
@TechReport{korn.ea:quantifiable-using:97,
  author =       "Flip Korn and Alexandros Labrinidis and Yannis Kotidis
                 and Christos Faloutsos and Alex Kaplunovich and Dejan
                 Perkovic",
  title =        "Quantifiable Data Mining Using Principal Component
                 Analysis",
  institution =  "University of Maryland Institute for Advanced Computer
                 Studies Dept. of Computer Science, Univ. of Maryland",
  number =       "CS-TR-3754",
  address =      "College Park, MD",
  month =        feb,
  year =         "1997",
  URL =          "ftp://ftp.cs.umd.edu/pub/papers/papers/3754/3754.ps.Z",
  abstract =     "Association Rule Mining algorithms operate on a data
                 matrix (e.g., customers x products) to derive rules. We
                 propose a single-pass algorithm for mining linear rules
                 in such a matrix based on Principal Component Analysis.
                 PCA detects correlated columns of the matrix, which
                 correspond to, e.g., products that sell together.\par
                 The first contribution of this work is that we propose
                 to quantify the ``goodness'' of a set of discovered
                 rules. We define the ``guessing error'': the
                 root-mean-square error of the reconstructed values of
                 the cells of the given matrix, when we pretend that
                 they are unknown. The second contribution is a novel
                 method to guess missing/hidden values from the linear
                 rules that our method derives. For example, if somebody
                 bought \$10 of milk and \$3 of bread, our rules can
                 ``guess'' the amount spent on, say, butter. Thus, we
                 can perform a variety of important tasks such as
                 forecasting, `what-if' scenarios, outlier detection,
                 and visualization. Moreover, we show that we can
                 compute the principal components with a single pass
                 over the dataset.\par Experiments on real datasets
                 (e.g., NBA statistics) demonstrate that the proposed
                 method consistently achieves a ``guessing error'' of up
                 to 5 times lower than the straightforward
                 competitor.\par (Also cross-referenced as
                 UMIACS-TR-97-13)",
}

Mining for Causes of Cancer: Machine Learning Experiments at Various Levels of Detail, Stefan Kramer and Bernhard Pfahringer and Christoph Helma
@InProceedings{kramer.ea:causes-cancer:97,
  title =        "Mining for Causes of Cancer: Machine Learning
                 Experiments at Various Levels of Detail",
  author =       "Stefan Kramer and Bernhard Pfahringer and Christoph
                 Helma",
  pages =        "223",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Efficient Search for Strong Partial Determinations, Stefan Kramer and Bernhard Pfahringer
@InProceedings{kramer.ea:efficient-search:96,
  title =        "Efficient Search for Strong Partial Determinations",
  pages =        "371",
  author =       "Stefan Kramer and Bernhard Pfahringer",
  crossref =     "simoudis.ea:proceedings-second:96",
}

The Complexity of Data Mining on the Web, Evangelos Kranakis and Danny Krizanc and Andrzej Pelc and David Peleg
@InProceedings{kranakis.ea:complexity-on:96,
  author =       "Evangelos Kranakis and Danny Krizanc and Andrzej Pelc
                 and David Peleg",
  title =        "The Complexity of Data Mining on the Web",
  pages =        "153--153",
  booktitle =    "Proceedings of the 15th Annual {ACM} Symposium on
                 Principles of Distributed Computing",
  month =        may,
  publisher =    "ACM",
  address =      "New York",
  year =         "1996",
}

Data-Mining Dynamite --- Supercharge your data-mining projects with data cleansing, data warehouses, parallel processing, and mega-storage, Cheryl D. Krivda
@Article{krivda:data-mining-dynamite:95,
  author =       "Cheryl D. Krivda",
  title =        "Data-Mining Dynamite --- Supercharge your data-mining
                 projects with data cleansing, data warehouses, parallel
                 processing, and mega-storage",
  journal =      "Byte Magazine",
  volume =       "20",
  number =       "10",
  pages =        "97--??",
  month =        oct,
  year =         "1995",
  ISSN =         "0360-5280",
}

Unearthing Underground Data, Cheryl D Krivda
Available as
9605mine.htm.
@Article{krivda:unearthing-underground:96,
  author =       "Cheryl D Krivda",
  title =        "Unearthing Underground Data",
  journal =      "LAN Magazine",
  year =         "1996",
  note =         "May 20 - June 2",
  URL =          "http://www.lanmag.com/9605mine.htm",
}

Multi-class problems and discretization in ICL, W. Van Laer and S. D\vzeroski and L. De Raedt
@InProceedings{laer.ea:multi-class-problems:96,
  author =       "W. Van Laer and S. D\v{z}eroski and L. De Raedt",
  title =        "Multi-class problems and discretization in {ICL}",
  booktitle =    "Proceedings of the MLnet Familiarization Workshop on
                 Data Mining with Inductive Logic Programing",
  pages =        "53--60",
  year =         "1996",
}

Self-Organizing Maps of Document Collections: A New Approach to Interactive Exploration, Krista Lagus and Timo Honkela and Samuel Kaski and Teuvo Kohonen
@InProceedings{lagus.ea:self-organizing-maps:96,
  title =        "Self-Organizing Maps of Document Collections: {A} New
                 Approach to Interactive Exploration",
  pages =        "238",
  author =       "Krista Lagus and Timo Honkela and Samuel Kaski and
                 Teuvo Kohonen",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Discrete Sequence Prediction and Its Applications, P. Laird
@InProceedings{laird:discrete-sequence:92,
  author =       "P. Laird",
  title =        "Discrete Sequence Prediction and Its Applications",
  year =         "1992",
  booktitle =    "Proc.\ of AAAI-92",
  pages =        "135--140",
}

Imputation of Missing Data Using Machine Learning Techniques, Kamakshi Lakshminarayan and Steven A. Harp and Robert Goldman and Tariq Samad
@InProceedings{lakshminarayan.ea:imputation-missing:96,
  title =        "Imputation of Missing Data Using Machine Learning
                 Techniques",
  pages =        "140",
  author =       "Kamakshi Lakshminarayan and Steven A. Harp and Robert
                 Goldman and Tariq Samad",
  crossref =     "simoudis.ea:proceedings-second:96",
}

An Empirical Test of the Weighted Effect Approach to Generalized Prediction Using Recursive Neural Nets, Rense Lange
@InProceedings{lange:empirical-test:96,
  title =        "An Empirical Test of the Weighted Effect Approach to
                 Generalized Prediction Using Recursive Neural Nets",
  pages =        "183",
  author =       "Rense Lange",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Data-Driven Approaches to Empirical Discovery, Pat Langley and Jan M. Zytkow
@Article{langley.ea:data-driven-approaches:89,
  author =       "Pat Langley and Jan M. Zytkow",
  title =        "Data-Driven Approaches to Empirical Discovery",
  journal =      "Artificial Intelligence",
  pages =        "283--312",
  volume =       "40",
  month =        sep,
  year =         "1989",
}

Rediscovering chemistry with the Bacon system, Pat Langley and Gary L. Bradshaw and Herbert A. Simon
@InCollection{langley.ea:rediscovering-chemistry:86,
  author =       "Pat Langley and Gary L. Bradshaw and Herbert A.
                 Simon",
  title =        "Rediscovering chemistry with the {Bacon} system",
  crossref =     "michalski.ea:machine-learning:86",
  pages =        "307--329",
}

The Search for Regularity: Four Aspects of Scientific Discovery, Pat Langley and Jan M. Zytkow and Herbert A. Simon and Gary L. Bradshaw
@InCollection{langley.ea:search-regularity:86,
  author =       "Pat Langley and Jan M. Zytkow and Herbert A. Simon and
                 Gary L. Bradshaw",
  title =        "The Search for Regularity: Four Aspects of Scientific
                 Discovery",
  crossref =     "michalski.ea:machine-learning:86",
  pages =        "425--469",
}

Induction of Condensed Determinations, Pat Langley
@InProceedings{langley:induction-condensed:96,
  title =        "Induction of Condensed Determinations",
  pages =        "327",
  author =       "Pat Langley",
  crossref =     "simoudis.ea:proceedings-second:96",
}

A Context-Sensitive Discretization of Numeric Attributes for Classification Learning, Changhwan Lee and Dong-Guk Shin
@InProceedings{lee.ea:context-sensitive-discretization:94,
  author =       "Changhwan Lee and Dong-Guk Shin",
  title =        "A Context-Sensitive Discretization of Numeric
                 Attributes for Classification Learning",
  booktitle =    "ECAI 94. Proceeding of the 11th European Conference on
                 Artificial Intelligence",
  publisher =    "John Wiley and Sons, Ltd",
  year =         "1994",
  pages =        "428--432",
}

Database summarization using fuzzy isa hierarchies, D. H. Lee and M. H. Kim
@Article{lee.ea:database-summarization:97,
  author =       "D. H. Lee and M. H. Kim",
  address =      "Chonnam Natl Univ, Dept Comp Sci, Kwangju, South Korea
                 Korea Adv Inst Sci \& Technol, Dept Comp Sci, Taejon
                 305701, South Korea",
  title =        "Database summarization using fuzzy isa hierarchies",
  journal =      "Ieee Trans. On Systems Man And Cybernetics Part B-
                 Cybernetics",
  year =         "1997",
  volume =       "27",
  issue =        "1",
  pages =        "68--78",
  abstract =     "Summ. discovery is one of the major components of
                 knowledge discovery in databases, which provides the
                 user with comprehensive information for grasping the
                 essence from a large amount of information in a
                 database. In this paper, we propose an interactive
                 top-down summary discovery process which utilizes fuzzy
                 ISA hierarchies as domain knowledge. We define a
                 generalized tuple as a representational form of a
                 database summary including fuzzy concepts. By virtue of
                 fuzzy ISA hierarchies where fuzzy ISA relationships
                 common in actual domains are naturally expressed, the
                 discovery process comes up with more accurate database
                 summaries, We also present an informativeness measure
                 for distinguishing generalized tuples that delivers
                 much information to users, based on Shannon's
                 information theory.",
  keywords =     "data mining, fuzzy set application, summary
                 discovery",
}

A Hypothesis Refinement Method for Summary Discovery in Databases, Do Heon Lee and Myoung Ho Kim
@InProceedings{lee.ea:hypothesis-refinement:93,
  author =       "Do Heon Lee and Myoung Ho Kim",
  title =        "A Hypothesis Refinement Method for Summary Discovery
                 in Databases",
  pages =        "274--282",
  editor =       "Bharat Bhargava and Timothy Finin and Yelena Yesha",
  booktitle =    "Proceedings of the 2nd International Conference on
                 Information and Knowledge Management",
  month =        nov,
  publisher =    "ACM Press",
  address =      "New York, NY, USA",
  year =         "1993",
}

Visualization support for data mining, H. Y. Lee and H. L. Ong
Available as
IEEEpub.zip.
@Article{lee.ea:support:96,
  author =       "H. Y. Lee and H. L. Ong",
  address =      "Inst Informat Technol, Japan Singapore Artificial
                 Intelligence Ctr, 11 Sci Pk Rd, Singapore 117685,
                 Singapore",
  title =        "Visualization support for data mining",
  journal =      "Ieee Expert-Intelligent Systems \& Their
                 Applications",
  year =         "1996",
  volume =       "11",
  issue =        "5",
  pages =        "69--75",
  annote =       "Discusses Winvis System",
  URL =          "http://jsaic.iti.gov.sg/pubs/papers/papers_archive/IEEEpub.zip",
  keywords =     "visualisation, Winviz, parallel co-ordinates",
}

EURISKO: A program that learns new heuristics and domain concepts. The nature of heuristics III: Background and examples, D. Lenat
@Article{lenat:eurisko-program:83,
  author =       "D. Lenat",
  title =        "{EURISKO}: {A} program that learns new heuristics and
                 domain concepts. {T}he nature of heuristics {III}:
                 Background and examples",
  journal =      "Artificial Intelligence",
  year =         "1983",
  pages =        "61--98",
  volume =       "21",
}

Discovering Trends in Text Databases, Brian Lent and Rakesh Agrawal and Ramakrishnan Srikant
@InProceedings{lent.ea:discovering-trends:97,
  title =        "Discovering Trends in Text Databases",
  author =       "Brian Lent and Rakesh Agrawal and Ramakrishnan
                 Srikant",
  pages =        "227",
  crossref =     "heckerman.ea:proceedings-third:97",
}

A Framework for Integrating Fault Diagnosis and Incremental Knowledge Acquisition in Connectionist Expert Systems, J. H. Lim and H. C. Lui and P. Z. Wang and
@InProceedings{lim.ea:framework-integrating:92,
  author =       "J. H. Lim and H. C. Lui and P. Z. Wang and",
  title =        "A Framework for Integrating Fault Diagnosis and
                 Incremental Knowledge Acquisition in Connectionist
                 Expert Systems",
  year =         "1992",
  booktitle =    "Proceedings of AAAI-92",
  pages =        "159--164",
}

Data mining - tools and techniques, P. R. Limb and G. J. Meggs
@Article{limb.ea:tools-techniques:94,
  author =       "P. R. Limb and G. J. Meggs",
  address =      "British Telecommun Labs, Martlesham Heath, Ipswich 1P5
                 7Re, Suffolk, England",
  title =        "Data mining - tools and techniques",
  journal =      "Bt Technology J.",
  year =         "1994",
  volume =       "12",
  issue =        "4",
  pages =        "32--41",
  abstract =     "The amount of data collected by large
                 telecommunications companies like BT is vast. In order
                 to turn this voluminous data into valuable information
                 it is necessary to apply analysis techniques to build
                 models and characteristics of data. This paper gives an
                 overview of a range of techniques used for data
                 analysis collectively known as data mining. Three broad
                 categories of data mining techniques are suggested and
                 the reader is introduced to popular algorithms within
                 each category. References to additional algorithms are
                 also presented so that the reader may gain more
                 detailed information if required.",
}

Dimensionality reduction via discretization, H. Liu and R. Setiono
@Article{liu.ea:dimensionality-reduction:96,
  author =       "H. Liu and R. Setiono",
  address =      "Natl Univ Singapore, Dept Informat Syst \& Comp Sci,
                 Singapore 0511, Singapore",
  title =        "Dimensionality reduction via discretization",
  journal =      "Knowledge-Based Systems",
  year =         "1996",
  volume =       "9",
  issue =        "1",
  pages =        "67--72",
  abstract =     "The existence of numeric data and large numbers of
                 records in a database present a challenging task in
                 terms of explicit concepts extraction from the raw
                 data. The paper introduces a method that reduces data
                 vertically and horizontally, keeps the discriminating
                 power of the original data, and paves the way for
                 extracting concepts. The method is based on
                 discretization (vertical reduction) and feature
                 selection (horizontal reduction). The experimental
                 results show that (a) the data can be effectively
                 reduced by the proposed method; (b) the predictive
                 accuracy of a classifier (C4.5) can be improved ai-ter
                 data and dimensionality reduction; and (c) the
                 classification rules learned are simpler.",
  keywords =     "DIMENSIONALITY REDUCTION, DISCRETIZATION, KNOWLEDGE
                 DISCOVERY",
}

Using General Impressions to Analyze Discovered Classification Rules, Bing Liu and Wynne Hsu and Shu Chen
@InProceedings{liu.ea:using-general:97,
  title =        "Using General Impressions to Analyze Discovered
                 Classification Rules",
  author =       "Bing Liu and Wynne Hsu and Shu Chen",
  pages =        "31",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Expert systems, clinical-data analyses, and knowledge discovery - the posch ai project, J. M. Long and J. R. Slagle
@Article{long.ea:expert-systems:92,
  author =       "J. M. Long and J. R. Slagle",
  address =      "Univ Minnesota, Dept Surg, Box 290 Umhc, 420 Delaware
                 St Se, Minneapolis, Mn, 55455",
  title =        "Expert systems, clinical-data analyses, and knowledge
                 discovery - the posch ai project",
  journal =      "Annals Of The New York Academy Of Sciences",
  year =         "1992",
  volume =       "670",
  pages =        "146--154",
}

Application of Clausal Discovery to Temporal Databases, D. Lorenzo
@InProceedings{lorenzo:application-clausal:96,
  author =       "D. Lorenzo",
  title =        "Application of Clausal Discovery to Temporal
                 Databases",
  booktitle =    "Proceedings of the MLnet Familiarization Workshop on
                 Data Mining with Inductive Logic Programing",
  pages =        "25--40",
  year =         "1996",
}

Effective data mining using neural networks, H. J. Lu and R. Setiono and H. Liu
@Article{lu.ea:effective-using:96,
  author =       "H. J. Lu and R. Setiono and H. Liu",
  address =      "Natl Univ Singapore, Dept Informat Syst \& Comp Sci,
                 Lower Kent Ridge Rd, Singapore 119260, Singapore",
  title =        "Effective data mining using neural networks",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1996",
  volume =       "8",
  issue =        "6",
  pages =        "957--961",
  abstract =     "Classification is one of the data mining problems
                 receiving great attention recently in the database
                 community. This paper presents an approach to discover
                 symbolic classification rules using neural networks.
                 Neural networks have not been thought suited for data
                 mining because how the classifications were made is not
                 explicitly stated as symbolic rules that are suitable
                 for verification or interpretation by humans. With the
                 proposed approach, concise symbolic rules with high
                 accuracy can be extracted from a neural network. The
                 network is first trained to achieve the required
                 accuracy rate. Redundant connections of the network are
                 then removed by a network pruning algorithm. The
                 activation values of the hidden units in the network
                 are analyzed, and classification rules are generated
                 using the result of this analysis. The effectiveness of
                 the proposed approach is clearly demonstrated by the
                 experimental results on a set of standard data mining
                 test problems.",
  keywords =     "data mining, neural networks, rule extraction, network
                 pruning, classification",
}

NeuroRule: A Connectionist Approach to Data Mining, H. Lu and R. Setiono and H. Liu
Available as
postscript.
@Article{lu.ea:neurorule-connectionist:95,
  author =       "H. Lu and R. Setiono and H. Liu",
  journal =      "Proc. of VLDB95",
  title =        "{N}euro{R}ule: {A} Connectionist Approach to Data
                 Mining",
  year =         "1995",
  URL =          "http://www.iscs.nus.sg/~liuh/vldb95.ps",
  keywords =     "Neural Networks, Data Mining, Classification Rules",
}

Efd - a hybrid knowledge statistical-based system for the detection of fraud, J. A. Major and D. R. Riedinger
@Article{major.ea:efd-hybrid:92,
  author =       "J. A. Major and D. R. Riedinger",
  address =      "Travelers Insurance Co, Hartford, Ct, 06183",
  title =        "Efd - a hybrid knowledge statistical-based system for
                 the detection of fraud",
  crossref =     "ijis-special-issue:92",
  journal =      "International J. Of Intelligent Systems",
  year =         "1992",
  volume =       "7",
  issue =        "7",
  pages =        "687--703",
  abstract =     "EFD (Electronic Fraud Detection) assists Investigative
                 Consultants in the Managed Care \& Employee Benefits
                 Security Unit of The Travelers Insurance Companies in
                 the detection and preinvestigative analysis of
                 healthcare provider fraud. The task EFD performs,
                 scanning a large population of health insurance claims
                 in search of likely fraud, has never been done
                 manually. Furthermore, the available database has few
                 positive examples. Thus, neither existing knowledge
                 engineering techniques nor statistical methods are
                 sufficient for designing the identification process. To
                 overcome these problems, EFD uses knowledge discovery
                 techniques on two levels. First, EFD integrates expert
                 knowledge with statistical information assessment to
                 identify cases of unusual provider behavior. The heart
                 of EFD is 27 behavioral heuristics, knowledge-based
                 ways of viewing and measuring provider behavior. Rules
                 operate on them to identify providers whose behavior
                 merits a closer look by the Investigative Consultants.
                 Second, machine learning is used to develop new rules
                 and improve the identification process. Pilot
                 operations involved analysis of nearly 22 000 providers
                 in six metropolitan areas. The pilot is implemented in
                 SAS Institute's SAS(R) System, AICorp's Knowledge Base
                 Management System (KBMS(R)), and Borland
                 International's Turbo Prolog(R).",
  keywords =     "Statistics, Frontiers, Finance, natural language
                 reports",
}

CUPID - An Iterative Knowledge Discovery Framework, Max Bramer Jason Mallen
Available as
hypertext.
@Article{mallen:cupid-iterative:,
  URL =          "http://osiris.sis.port.ac.uk/technical_reports_index/kdpap.html",
  title =        "{CUPID} - An Iterative Knowledge Discovery Framework",
  note =         "Presented at ES94 (12/10/94)",
  author =       "Max Bramer Jason Mallen",
  address =      "University of Portsmouth, UK",
  abstract =     "This paper describes the novel Knowledge Discovery
                 system CUPID. Knowledge Discovery from Databases (KDD)
                 is concerned with utilising techniques borrowed from
                 fields such as machine learning (ML), statistics and
                 databases to search for relationships and global
                 patterns that may exist in large databases, but are
                 `hidden' among the vast amounts of data. The discovered
                 knowledge can be helpful for building knowledge based
                 systems and data analysis. The underlying principle
                 behind CUPID is the use of a quantitative measure for
                 the `interest' of a hypotheses. This measure provides a
                 method of ranking competing hypotheses and thus allows
                 the system to store the 'best' or 'most interesting'
                 rules describing a database. CUPID is based on the
                 ITRule algorithm of (Smyth \& Goodman, 1992) and
                 extends that algorithm with added functionality. CUPID
                 provides four fundamental features. One, background
                 knowledge in the form of attribute value generalisation
                 hierarchies may be utilised. Two, prior domain
                 knowledge which may be incorrect and incomplete may be
                 provided by a domain expert. Three, knowledge may be
                 re-used. Four, noise in the data set is handled in a
                 well founded manner.",
}

Induction of Decision trees from Complex Structured Data, Michel Manago and Yves Kodratoff
@InCollection{manago.ea:induction-decision:91,
  crossref =     "piatetsky-shapiro.ea:knowledge-discovery:91",
  editor =       "Gregory Piatetsky-Shapiro and William J. Frawley",
  booktitle =    "Knowledge Discovery in Databases",
  publisher =    "AAAI Press / The MIT Press",
  address =      "Menlo Park, California",
  edition =      "1st",
  year =         "1991",
  author =       "Michel Manago and Yves Kodratoff",
  title =        "Induction of Decision trees from Complex Structured
                 Data",
  pages =        "289--306",
}

Algorithms for inferring functional-dependencies from relations, H. Mannila and K. J. Raiha
@Article{mannila.ea:algorithms-inferring:94,
  author =       "H. Mannila and K. J. Raiha",
  address =      "Univ Helsinki, Dept Comp Sci, Pob 26, Sf-90014
                 Helsinki, Finland Univ Tampere, Dept Comp Sci, Sf-33101
                 Tampere, Finland",
  title =        "Algorithms for inferring functional-dependencies from
                 relations",
  journal =      "Data \& Knowledge Engineering",
  year =         "1994",
  volume =       "12",
  issue =        "1",
  pages =        "83--99",
  abstract =     "The dependency inference problem is to find a cover of
                 the set of functional dependencies that hold in a given
                 relation. The problem has applications in relational
                 database design, in query optimization, and in
                 artificial intelligence. The problem is exponential in
                 the number of attributes. We develop two algorithms
                 with better best case behavior than the simple one. One
                 algorithm reduces the problem to computing the
                 transversal of a hypergraph. The other is based on
                 repeatedly sorting the relation with respect to a set
                 of attributes.",
  keywords =     "ARMSTRONG RELATIONS, DESIGN, FUNCTIONAL DEPENDENCIES,
                 MACHINE DISCOVERY, DATA MINING, ALGORITHMS",
}

Discovering Frequent Episodes in Sequences, H. Mannila and H. Toivonen and A. I. Verkamo
Available as
compressed postscript.
@InProceedings{mannila.ea:discovering-frequent-episodes-in-sequences:95,
  author =       "H. Mannila and H. Toivonen and A. I. Verkamo",
  title =        "{Discovering Frequent Episodes in Sequences}",
  booktitle =    "Proceedings of the First International Conference on
                 Knowledge Discovery and Data Mining (KDD-95)",
  year =         "1995",
  address =      "Montreal, Canada",
  month =        aug,
  publisher =    "AAAI Press",
  editor =       "U. M. Fayyad and R. Uthurusamy",
  URL =          "ftp://ftp.cs.helsinki.fi/pub/Reports/by_Project/PMDM/Finding_Frequent_Episodes_in_Sequences.ps.gz",
  keywords =     "Knowledge discovery, Data mining, Sequence analysis,
                 Episode discovery",
  abstract =     "Sequences of events describing the behavior and
                 actions of users or systems can be collected in several
                 domains. In this paper we consider the problem of
                 recognizing frequent episodes in such sequences of
                 events. An episode is defined to be a collection of
                 events that occur within time intervals of a given size
                 in a given partial order. Once such episodes are known,
                 one can produce rules for describing or predicting the
                 behavior of the sequence. We describe an efficient
                 algorithm for the discovery of all frequent episodes
                 from a given class of episodes, and present
                 experimental results.",
}

Discovering Generalized Episodes Using Minimal Occurrences, Heikki Mannila and Hannu Toivonen
@InProceedings{mannila.ea:discovering-generalized:96,
  title =        "Discovering Generalized Episodes Using Minimal
                 Occurrences",
  pages =        "146",
  author =       "Heikki Mannila and Hannu Toivonen",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Efficient algorithms for discovering association rules, Heikki Mannila and Hannu Toivonen and A. Inkeri Verkamo
Available as
compressed postscript.
@InProceedings{mannila.ea:efficient-algorithms:94,
  author =       "Heikki Mannila and Hannu Toivonen and A. Inkeri
                 Verkamo",
  booktitle =    "AAAI Workshop on Knowledge Discovery in Databases
                 (KDD-94)",
  title =        "Efficient algorithms for discovering association
                 rules",
  year =         "1994",
  URL =          "ftp://ftp.cs.helsinki.fi/pub/Reports/by_Project/PMDM/Efficient_Algorithms_for_Discovering_Association_Rules.ps.gz",
  editor =       "Usama M. Fayyad and Ramasamy Uthurusamy",
  address =      "Seattle, Washington",
  publisher =    "AAAI Press",
  keywords =     "Knowledge discovery, Data mining, Association rules",
  month =        jul,
  pages =        "181--192",
  abstract =     "Association rules are statements of the form ``for 90
                 \% of the rows of the relation, if the row has value 1
                 in the columns in set $W$, then it has 1 also in column
                 $B$''. Agrawal, Imielinski, and Swami introduced the
                 problem of mining association rules from large
                 collections of data, and gave a method based on
                 successive passes over the database. We give an
                 improved algorithm for the problem. The method is based
                 on careful combinatorial analysis of the information
                 obtained in previous passes; this makes it possible to
                 eliminate unnecessary candidate rules. Experiments on a
                 university course enrollment database indicate that the
                 method outperforms the previous one by a factor of 5.
                 We also show that sampling is in general a very
                 efficient way of finding such rules.",
}

Multiple Uses of Frequent Sets and Condensed Representations: Extended Abstract, Heikki Mannila and Hannu Toivonen
@InProceedings{mannila.ea:multiple-uses:96,
  title =        "Multiple Uses of Frequent Sets and Condensed
                 Representations: Extended Abstract",
  pages =        "189",
  author =       "Heikki Mannila and Hannu Toivonen",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Data mining and machine learning (abstract), Heikki Mannila
@InProceedings{mannila:machine-learning:96,
  author =       "Heikki Mannila",
  title =        "Data mining and machine learning (abstract)",
  booktitle =    "Proc. 13th International Conference on Machine
                 Learning",
  publisher =    "Morgan Kaufmann",
  year =         "1996",
  pages =        "555",
}

Data mining - here we go again - guest-editors introduction, B. Mark
@Article{mark:here-we:96,
  author =       "B. Mark",
  address =      "Natl Semicond Architecture Lab, 2900 Semicond Dr, M-S
                 E-100, Santa Clara, Ca, 95052",
  title =        "Data mining - here we go again - guest-editors
                 introduction",
  journal =      "Ieee Expert-Intelligent Systems \& Their
                 Applications",
  year =         "1996",
  volume =       "11",
  issue =        "5",
  pages =        "18--19",
}

Inference in mls database-systems, D. G. Marks
@Article{marks:inference-mls:96,
  author =       "D. G. Marks",
  address =      "Us Dept Def, Off Infosec Comp Sci, Ft George G Meade,
                 Md, 20755",
  title =        "Inference in mls database-systems",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1996",
  volume =       "8",
  issue =        "1",
  pages =        "46--55",
  abstract =     "Database systems that contain information of varying
                 degrees of sensitivity pose the threat that some of the
                 Low data may infer High data. This study derives
                 conditions sufficient to identify such inference
                 threats. First, it is reasoned that a database can only
                 control material implications, as specified in formal
                 logic systems. These material implications are found
                 using Knowledge Discovery techniques. Material
                 implications allow reasoning about outside knowledge,
                 and provide the first assurance that outside knowledge
                 does not assist in circumventing the inference
                 controls. Database queries specify the properties of
                 sets of data and are compared to help determine
                 inferences. These queries are grouped into equivalence
                 classes based upon their inference characteristics. A
                 unique graph based model is developed for the
                 equivalence classes that 1) makes such comparisons
                 easy, and 2) allows implementation of an algorithm
                 capable of finding those material implication rules
                 where High data is inferred from Low data. This is the
                 first method that offers assurance and sufficiency
                 arguments that the mechanism is at least strong enough
                 to protect the High data in the database from inference
                 attacks that require Low data.",
  keywords =     "INFERENCE, DATABASE SECURITY, KNOWLEDGE DISCOVERY,
                 MLS, QUERY PATTERNS",
}

A Comparison of Approaches for Maximizing Business Payoff of Prediction Models, Brij Masand and Gregory Piatetsky-Shapiro
@InProceedings{masand.ea:comparison-approaches:96,
  title =        "A Comparison of Approaches for Maximizing Business
                 Payoff of Prediction Models",
  pages =        "195",
  author =       "Brij Masand and Gregory Piatetsky-Shapiro",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Systems for knowledge discovery in databases, C. J. Matheus and P. K. Chan and G. Piatetsky-Shapiro
@Article{matheus.ea:systems:93,
  crossref =     "cercone.ea:ieee-transactions:93",
  author =       "C. J. Matheus and P. K. Chan and G.
                 Piatetsky-Shapiro",
  address =      "Gte Labs Inc, Tech Staff, 40 Sylvan Rd, Waltham, Ma,
                 02254 Gte Labs Inc, Knowledge Discovery Databases
                 Project, Waltham, Ma, 02254 Columbia Univ, Dept Comp
                 Sci, New York, Ny, 10027",
  title =        "Systems for knowledge discovery in databases",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1993",
  month =        dec,
  volume =       "5",
  issue =        "6",
  pages =        "903--913",
  abstract =     "The automated discovery of knowledge in databases is
                 becoming increasingly important as the world's wealth
                 of data continues to grow exponentially,
                 Knowledge-discovery systems face challenging problems
                 from real-world databases which tend to be dynamic,
                 incomplete, redundant, noisy, sparse, and very large.
                 This paper addresses these problems and describes some
                 techniques for handling them. A model of an idealized
                 knowledge-discovery system is presented as a reference
                 for studying and designing new systems. This model is
                 used in the comparison of three systems: CoverStory,
                 EXPLORA, and the Knowledge Discovery Workbench. The
                 deficiencies of existing systems relative to the model
                 reveal several open problems for future research.",
  annote =       "Discusses Coverstory, Explora and KDW",
  keywords =     "DATABASES, DISCOVERY, KDD SYSTEMS, MACHINE LEARNING",
}

Data mining and the con in econometrics - the us demand for money revisited, M. McAleer and M. R. Veall
@Article{mcaleer.ea:con-econometrics:95,
  author =       "M. McAleer and M. R. Veall",
  address =      "Univ Western Australia, Dept Econ, Nedlands, Wa 6009,
                 Australia Mcmaster Univ, Dept Econ, Hamilton, On,
                 Canada",
  title =        "Data mining and the con in econometrics - the us
                 demand for money revisited",
  journal =      "Mathematics And Computers In Simulation",
  year =         "1995",
  volume =       "39",
  issue =        "3-4",
  pages =        "329--333",
}

MDL-Based Decision Tree Pruning, Manish Mehta and Jorma Rissanen and Rakesh Agrawal
Available as
postscript.
@InProceedings{mehta.ea:mdl-based-decision:95,
  author =       "Manish Mehta and Jorma Rissanen and Rakesh Agrawal",
  booktitle =    "Proceedings of the First International Conference on
                 Knowledge Discovery and Data Mining (KDD'95)",
  title =        "{MDL}-Based Decision Tree Pruning",
  year =         "1995",
  abstract-url = "http://www.almaden.ibm.com/cs/people/ragrawal/abstracts.html#mra95",
  URL =          "http://www.almaden.ibm.com/cs/people/ragrawal/papers/kdd95_mdl.ps",
  keywords =     "Data Mining, Classification, Decision-Trees, MDL",
  month =        aug,
  pages =        "216--221",
  abstract =     "This paper explores the application of the Minimum
                 Description Length principle for pruning decision
                 trees. We present a new algorithm that intuitively
                 captures the primary goal of reducing the
                 misclassification error. An experimental comparison is
                 presented with three other pruning algorithms. The
                 results show that the MDL pruning algorithm achieves
                 good accuracy, small trees, and fast execution times.",
}

SLIQ: A Fast Scalable Classifier for Data Mining, M. Mehta and R. Agrawal and J. Rissanen
@Article{mehta.ea:sliq-fast:96,
  author =       "M. Mehta and R. Agrawal and J. Rissanen",
  title =        "{SLIQ}: {A} Fast Scalable Classifier for Data Mining",
  journal =      "Lecture Notes in Computer Science",
  volume =       "1057",
  pages =        "18--??",
  year =         "1996",
  ISSN =         "0302-9743",
}

Mining geophysical-data for knowledge, E. Mesrobian and R. Muntz and E. Shek and S. Nittel and M. Larouche and M. Kriguer and C. Mechoso and J. Farrara and P. Stolorz and H. Nakamura
@Article{mesrobian.ea:geophysical-data:96,
  author =       "E. Mesrobian and R. Muntz and E. Shek and S. Nittel
                 and M. Larouche and M. Kriguer and C. Mechoso and J.
                 Farrara and P. Stolorz and H. Nakamura",
  address =      "Univ Calif Los Angeles, Dept Comp Sci, Los Angeles,
                 Ca, 90024 Univ Calif Los Angeles, Dept Atmospher Sci,
                 Los Angeles, Ca, 90024 Univ Tokyo, Dept Earth \&
                 Planetary Phys, Tokyo, Japan Univ Calif Los Angeles,
                 Data Min Lab, Los Angeles, Ca, 90024",
  title =        "Mining geophysical-data for knowledge",
  journal =      "Ieee Expert-Intelligent Systems \& Their
                 Applications",
  year =         "1996",
  volume =       "11",
  issue =        "5",
  pages =        "34--44",
}

The AQ15 inductive learning system: an overview and experiments, Ryszard S. Michalski and Igor Mozetic and Jiarong Hong and Nada Lavrac
@TechReport{michalski.ea:aq15-inductive:86,
  author =       "Ryszard S. Michalski and Igor Mozetic and Jiarong Hong
                 and Nada Lavrac",
  title =        "The {AQ15} inductive learning system: an overview and
                 experiments",
  institution =  "University of Illinois",
  number =       "UIUCDCS-R-86-1260",
  month =        jul,
  year =         "1986",
}

Clustering, R. S. Michalski and R. E. Stepp
@Article{michalski.ea:clustering:92,
  crossref =     "shapiro:encyclopedia-artificial:92",
  key =          "Encyclopedia-of-ai:clustering",
  author =       "R. S. Michalski and R. E. Stepp",
  title =        "Clustering",
  pages =        "168--176",
}

Mining for knowledge in Databases: The INLEN Architecture, Initial Implementation and First Results., R. S. Michalski and L. Kerschberg and K. A. Kaufman
@Article{michalski.ea:inlen-architecture:92,
  author =       "R. S. Michalski and L. Kerschberg and K. A. Kaufman",
  title =        "Mining for knowledge in Databases: The {INLEN}
                 Architecture, Initial Implementation and First
                 Results.",
  journal =      "Journal of Intelligent Information Systems",
  year =         "1992",
  pages =        "85--113",
}

Learning from observation: conceptual clustering, Ryszard S. Michalski and Robert E. Stepp
@InCollection{michalski.ea:learning-observation:83,
  author =       "Ryszard S. Michalski and Robert E. Stepp",
  title =        "Learning from observation: conceptual clustering",
  booktitle =    "Machine Learning, an {Artificial Intelligence}
                 approach",
  pages =        "331--363",
  crossref =     "michalski.ea:machine-learning:83",
}

Machine Learning, an Artificial Intelligence approach, Ryszard S. Michalski and Jaime G. Carbonell and Tom M. Mitchell (Eds)
@Book{michalski.ea:machine-learning:83,
  editor =       "Ryszard S. Michalski and Jaime G. Carbonell and Tom M.
                 Mitchell",
  title =        "Machine Learning, an {Artificial Intelligence}
                 approach",
  publisher =    "Morgan Kaufmann",
  year =         "1983",
  volume =       "1",
  address =      "San Mateo, California",
}

Machine Learning, an Artificial Intelligence approach, Ryszard S. Michalski and Jaime G. Carbonell and Tom M. Mitchell (Eds)
@Book{michalski.ea:machine-learning:86,
  editor =       "Ryszard S. Michalski and Jaime G. Carbonell and Tom M.
                 Mitchell",
  title =        "Machine Learning, an {Artificial Intelligence}
                 approach",
  publisher =    "Morgan Kaufmann",
  year =         "1986",
  volume =       "2",
  address =      "San Mateo, California",
}

The multi-purpose incremental learning system AQ15 and its testing application to three medical domains, Ryszard S. Michalski and Igor Mozetic and Jiarong Hong and Nada Lavrac
@InProceedings{michalski.ea:multi-purpose-incremental:86,
  author =       "Ryszard S. Michalski and Igor Mozetic and Jiarong Hong
                 and Nada Lavrac",
  title =        "The multi-purpose incremental learning system {AQ15}
                 and its testing application to three medical domains",
  booktitle =    "Proceedings of the 5th national conference on
                 Artificial Intelligence",
  pages =        "1041--1045",
  address =      "Philadelphia",
  year =         "1986",
}

A theory and methodology of inductive learning, Ryszard S. Michalski
@InCollection{michalski:theory-methodology:83,
  author =       "Ryszard S. Michalski",
  title =        "A theory and methodology of inductive learning",
  pages =        "83--134",
  crossref =     "michalski.ea:machine-learning:83",
}

Fast Robust Visual Data Mining, Ted Mihalisin and John Timlin
@InProceedings{mihalisin.ea:fast-robust:97,
  title =        "Fast Robust Visual Data Mining",
  author =       "Ted Mihalisin and John Timlin",
  pages =        "231",
  crossref =     "heckerman.ea:proceedings-third:97",
}

An empirical comparison of selection measures for decision tree induction, J. Mingers
@Article{mingers:empirical-comparison:89,
  author =       "J. Mingers",
  title =        "An empirical comparison of selection measures for
                 decision tree induction",
  publisher =    "Kluwer Academic",
  address =      "Boston",
  journal =      "Machine Learning,3",
  year =         "1989",
  volume =       "3 (?)",
  pages =        "319--342",
}

A framework for representating knowledge, Marvin Minsky
@InCollection{minsky:framework-representating:75,
  author =       "Marvin Minsky",
  title =        "A framework for representating knowledge",
  booktitle =    "The Psychology of Computer Vision",
  editor =       "Patrick Henry Winston",
  publisher =    "McGraw-Hill",
  address =      "New York",
  pages =        "211--277",
  year =         "1975",
}

Learning by experimentation: acquiring and refining problem-solving heuristics, Tom M. Mitchell and Paul E. Utgoff and Ranan Banerji
@InCollection{mitchell.ea:learning-by:83,
  author =       "Tom M. Mitchell and Paul E. Utgoff and Ranan Banerji",
  title =        "Learning by experimentation: acquiring and refining
                 problem-solving heuristics",
  crossref =     "michalski.ea:machine-learning:83",
  pages =        "163--190",
}

Learning-theory toward genome informatics, S. Miyano
@Article{miyano:learning-theory-toward:95,
  author =       "S. Miyano",
  address =      "Kyushu Univ, Fundamental Informat Sci Res Inst,
                 Fukuoka 812, Japan",
  title =        "Learning-theory toward genome informatics",
  journal =      "Ieice Trans. On Information And Systems",
  year =         "1995",
  volume =       "E78D",
  issue =        "5",
  pages =        "560--567",
  abstract =     "This paper discusses some problems in Molecular
                 Biology for which learning paradigms are strongly
                 desired. We also present a framework of knowledge
                 discovery by PAC-learning paradigm together with its
                 theory and practice developed in our work for discovery
                 from amino acid sequences.",
  keywords =     "PAC-LEARNING, COMPUTATIONAL COMPLEXITY, KNOWLEDGE
                 ACQUISITION, GENOME INFORMAICS",
}

A rough set framework for data mining of propositional default rules, T. Mollestad and A. Skowron
@Article{mollestad.ea:rough-set:96,
  author =       "T. Mollestad and A. Skowron",
  title =        "A rough set framework for data mining of propositional
                 default rules",
  journal =      "Lecture Notes in Computer Science",
  volume =       "1079",
  pages =        "448--??",
  year =         "1996",
  ISSN =         "0302-9743",
}

The Field Matching Problem: Algorithms and Applications, Alvaro E. Monge and Charles P. Elkan
@InProceedings{monge.ea:field-matching:96,
  title =        "The Field Matching Problem: Algorithms and
                 Applications",
  pages =        "267",
  author =       "Alvaro E. Monge and Charles P. Elkan",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Encouraging experimental results on learning CNF, Raymond J. Mooney
@TechReport{mooney:encouraging-experimental:92,
  author =       "Raymond J. Mooney",
  title =        "Encouraging experimental results on learning {CNF}",
  institution =  "University of Texas",
  month =        oct,
  year =         "1992",
}

A Multistrategy Approach to Relational Knowledge Discovery in Databases, K. Morik and P. Brockhausen
@InProceedings{morik.ea:multistrategy-approach:96,
  author =       "K. Morik and P. Brockhausen",
  booktitle =    "Proceedings of the 3nd International Workshop on
                 Multistrategy Learning",
  publisher =    "AAAI Press",
  title =        "A Multistrategy Approach to Relational Knowledge
                 Discovery in Databases",
  pages =        "17--28",
  year =         "1996",
}

Applications of Machine Learning, Katharina Morik
@InProceedings{morik:applications-machine:92,
  author =       "Katharina Morik",
  title =        "Applications of Machine Learning",
  booktitle =    "Proc.\ 6th European Knowledge Acquisition Workshop",
  year =         "1992",
  publisher =    "Springer-Verlag, Berlin",
  pages =        "9--13",
  annote =       "Brief overview of ML applications, Future directions
                 of ML - Intergration of ML into databases (data
                 mining), Multi-strategy learning, inductive logic
                 programming.",
}

Data mining using probabilistic structure analysis, J. A. Morrell
@Article{morrell:using-probabilistic:97,
  author =       "J. A. Morrell",
  address =      "Monsanto Co, St Louis, Mo, 63198",
  title =        "Data mining using probabilistic structure analysis",
  journal =      "Abstracts Of Papers Of The American Chemical Soc.",
  year =         "1997",
  volume =       "213",
  issue =        "Pt1",
  pages =        "69--CINF",
}

Advances in databases: 14th British National Conference on Databases, BNCOD 14, Edinburgh, Scotland, United Kingdom, July 3--5, 1996: proceedings, R. (Ronald) Morrison and Jessie Kennedy (Eds)
@Proceedings{morrison.ea:advances-14th:96,
  editor =       "R. (Ronald) Morrison and Jessie Kennedy",
  booktitle =    "Advances in databases: 14th British National
                 Conference on Databases, {BNCOD} 14, Edinburgh,
                 Scotland, United Kingdom, July 3--5, 1996:
                 proceedings",
  title =        "Advances in databases: 14th British National
                 Conference on Databases, {BNCOD} 14, Edinburgh,
                 Scotland, United Kingdom, July 3--5, 1996:
                 proceedings",
  volume =       "1094",
  publisher =    "Springer-Verlag Inc.",
  address =      "New York, NY, USA",
  pages =        "xi + 229",
  year =         "1996",
  ISBN =         "3-540-61442-7 (paperback)",
  ISSN =         "0302-9743",
  LCCN =         "QA76.9.D3 B75 1996",
  series =       "Lecture Notes in Computer Science",
  annote =       "Schema integration meta-knowledge classification and
                 reuse / R. M. Duwairi, N. J. Fiddian, W. A. Gray --
                 View mechanism for schema evolution in object-oriented
                 DBMS / Zohra Bellahsene -- An active rule language for
                 ROCK and ROLL / Andrew Dinn \ldots{} [et al.] --
                 Integrity constraints in multiversion databases / Anne
                 Doucet \ldots{} [et al.] -- The development of a
                 semantic integrity constraint subsystem for a
                 distributed database / H. Ibrahim, W. A. Gray, N. J.
                 Fiddian -- Understanding the tension between transition
                 rules and confidentiality / Xavier C. Delannoy --
                 extending ER for dynamic behaviour and refinement /
                 Simon Wiseman, Bryony Pomeroy -- Speeding up knowledge
                 discovery in large relational databases by means of a
                 new discretization algorithm / Alex Alves Freitas,
                 Simon H. Lavington -- Integration of load measurement
                 parameters into the cost evaluation of database queries
                 / Guntram Flach, Holger Meyer -- High performance OO
                 traversals in Monet / Peter A. Boncz, Fred Kwakkel,
                 Martin L. Kersten -- A modular compiler architecture
                 for a data manipulation language / Suzanne M. Embury,
                 Peter M. D. Gray -- Querying graph databases using a
                 functional language extended with second order
                 facilities / Robert Ayres, Peter J. H. King -- SQL+i:
                 adding temporal indeterminacy to the database language
                 SQL / Antony Griffiths, Babis Theodoulidis -- Pearls,
                 swines and sows' ears: interface research inside a
                 multinational bank / Matthew Chalmers --
                 Dissemination-based information systems: your data may
                 be where you least expect it / Stanley B. Zdonik --
                 Microsoft database technologies: an inside view / Nigel
                 Stanley - - Predicate maintained queries: an active
                 OODBMS for financial applications / Mark Butterfield,
                 Nicholas Caine, Stephen Ross- Talbot -- Universal data
                 management / A. Bailey.",
  keywords =     "Database management -- Congresses.",
}

Case-based reasoning - market, applications, and fit with other technologies, S. Mott
@Article{mott:case-based-reasoning:93,
  author =       "S. Mott",
  address =      "Cognit Syst Inc, 234 Church St, New Haven, Ct, 06903",
  title =        "Case-based reasoning - market, applications, and fit
                 with other technologies",
  journal =      "Expert Systems With Applications",
  year =         "1993",
  volume =       "6",
  issue =        "1",
  pages =        "97--104",
  abstract =     "Case-based reasoning (CBR), the hit of the American
                 Association of Artificial Intelligence annual
                 conference in 1991 and 1992 is now enjoying a surge of
                 interest in its first year of commercial availability.
                 Knowledge-based system designers, developers,
                 integrators, and tool vendors are now seriously
                 considering the role and utility of CBR in leveraging
                 the vast experience within organizations for more
                 effective decision making. The potential market for CBR
                 appears enormous, particularly in more complex problem-
                 solving domains, but the areas of most immediate
                 interest are in applications where efficient
                 information processing needs are urgent, such as
                 automated help desks. Early experiments pairing CBR
                 with rule-based systems will soon lead to hybrid
                 combinations with other ''close approximation''
                 technologies, such as neural networks, fuzzy logic
                 systems, genetic algorithms, and so forth. CBR appears
                 headed for a sustaining role not only as a useful
                 complement in knowledge-based information processing
                 technology but also as an engine for ''mainstream ''
                 information tasks of the future (e.g., intelligent text
                 processing and retrieval, data mining, and projective
                 reasoning). This article will discuss this emerging
                 role for CBR and its implications from a marketing
                 perspective.",
}

Fast Sequential and Parallel Algorithms for Association Rule Mining: A Comparison, Andreas Mueller
Available as
compressed postscript.
@TechReport{mueller:fast-sequential:95,
  author =       "Andreas Mueller",
  title =        "Fast Sequential and Parallel Algorithms for
                 Association Rule Mining: {A} Comparison",
  institution =  "Dept. of Computer Science, Univ. of Maryland",
  number =       "CS-TR-3515",
  address =      "College Park, MD",
  month =        aug,
  year =         "1995",
  URL =          "ftp://ftp.cs.umd.edu/pub/papers/papers/3515/3515.ps.Z",
  abstract =     "The field of knowledge discovery in databases, or
                 _Data Mining_, has received increasing attention during
                 recent years as large organizations have begun to
                 realize the potential value of the information that is
                 stored implicitly in their databases. One specific data
                 mining task is the mining of Association Rules,
                 particularly from retail data. The task is to determine
                 patterns (or rules) that characterize the shopping
                 behavior of customers from a large database of previous
                 consumer transactions. The rules can then be used to
                 focus marketing efforts such as product placement and
                 sales promotions.\par Because early algorithms required
                 an unpredictably large number of IO operations,
                 reducing IO cost has been the primary target of the
                 algorithms presented in the literature. One of the most
                 recent proposed algorithms, called PARTITION, uses a
                 new TID-list data representation and a new partitioning
                 technique. The partitioning technique reduces IO cost
                 to a constant amount by processing one database portion
                 at a time in memory. We implemented an algorithm called
                 SPTID that incorporates both TID-lists and partitioning
                 to study their benefits. For comparison, a
                 non-partitioning algorithm called SEAR, which is based
                 on a new prefix-tree data structure, is used. Our
                 experiments with SPTID and SEAR indicate that TID-lists
                 have inherent inefficiencies; furthermore, because all
                 of the algorithms tested tend to be CPU-boundn trading
                 CPU-overhead against I/O operations by partitioning did
                 not lead to better performance.\par In order to scale
                 mining algorithms to the huge databases (e.g., multiple
                 Terabytes) that large organizations will manage in the
                 near future, we implemented parallel versions of SEAR
                 and SPEAR (its partitioned counterpart). The
                 performance results show that, while both algorithms
                 parallelize easily and obtain good speedup and scale-up
                 results, the parallel SEAR version performs better than
                 parallel SPEAR, despite the fact that it uses more
                 communication.",
}

Neural Networks, an introduction,
@Book{muller.ea:neural-networks:91,
  author =       "Berndt M{\"u}ller and Joachim Reinhardt",
  title =        "Neural Networks, an introduction",
  publisher =    "Springer-Verlag",
  address =      "Berlin",
  series =       "Physics of Neural Networks",
  year =         "1991",
}

Exploring the Decision Forest: An Empirical Investigation of Occam's Razor in Decision Tree Induction, Patrick M. Murphy and Michael J. Pazzani
Available as
postscript.
@Article{murphy.ea:exploring-decision:,
  URL =          "gopher://P.GP.CS.CMU.EDU:70/00/volume1/murphy94a.ps",
  title =        "Exploring the Decision Forest: An Empirical
                 Investigation of Occam's Razor in Decision Tree
                 Induction",
  author =       "Patrick M. Murphy and Michael J. Pazzani",
  abstract =     "We report on a series of experiments in which all
                 decision trees consistent with the training data are
                 constructed. These experiments were run to gain an
                 understanding of the properties of the set of
                 consistent decision trees and the factors that affect
                 the accuracy of individual trees. In particular, we
                 investigated the relationship between the size of a
                 decision tree consistent with some training data and
                 the accuracy of the tree on test data. The experiments
                 were performed on a massively parallel Maspar computer.
                 The results of the experiments on several artificial
                 and two real world problems indicate that, for many of
                 the problems investigated, smaller consistent decision
                 trees are on average less accurate than the average
                 accuracy of slightly larger trees.",
}

OC1: Randomized Induction of Oblique Decision Trees, S. K. Murthy and S. Kasif and S. Salzberg and R. Beigel
@InProceedings{murthy.ea:oc1-randomized:93,
  author =       "S. K. Murthy and S. Kasif and S. Salzberg and R.
                 Beigel",
  title =        "{OC1}: Randomized Induction of Oblique Decision
                 Trees",
  booktitle =    "Proceedings of the Eleventh National Conference on
                 Artificial Intelligence",
  pages =        "322--327",
  address =      "Washington, D.C.",
  year =         "1993",
}

On Growing Better Decision Trees from Data, Sreerama K. Murthy
@PhdThesis{murthy:on-growing:,
  title =        "On Growing Better Decision Trees from Data",
  author =       "Sreerama K. Murthy",
  abstract =     "This thesis investigates the problem of growing
                 decision trees from data, for the purposes of
                 classification and prediction.

Belief Network Induction, Ron Musick
Available as
~musick.
@PhdThesis{musick:belief-network:,
  URL =          "http://http.cs.berkeley.edu/~musick",
  title =        "Belief Network Induction",
  author =       "Ron Musick",
  school =       "University of California, Berkeley",
  abstract =     "This dissertation describes BNI (Belief Network
                 Inductor), a tool that automatically induces a belief
                 network from a database. The fundamental thrust of this
                 research program has been to provide a theoretically
                 sound method of inducing a model from data, and
                 performing inference over that model. Along with a
                 solid grounding in probability theory, BNI has proven
                 to be a quick, practical method of inducing data models
                 that are highly accurate. The results include a belief
                 network that stores beta distributions in the
                 conditional probability tables, coupled with theorems
                 demonstrating how to maintain these distributions
                 through inference; techniques for applying neural
                 network and other learning techniques to the task of
                 conditional probability table learning; and a decision
                 theoretic sampling theory which addresses scalability
                 issues by characterizing the size of the sample needed
                 to produce high quality inferences. The setting for
                 this work is in database mining.",
}

Belief Network Induction, Ron Musick
Available as
postscript.
@TechReport{musick:belief-network:95,
  author =       "Ron Musick",
  title =        "Belief Network Induction",
  institution =  "EECS Computer Science Division, University of
                 California, Berkeley",
  type =         "Technical Report",
  number =       "UCB//CSD-95-863",
  pages =        "104",
  month =        dec,
  year =         "1995",
  URL =          "ftp://tr-ftp.cs.berkeley.edu/pub/tech-reports/csd/csd-95-863/all.ps",
  abstract =     "This dissertation describes BNI (Belief Network
                 Inductor), a tool that automatically induces a belief
                 network from a database. The fundamental thrust of this
                 research program has been to provide a theoretically
                 sound method of inducing a model from data, and
                 performing inference over that model. Along with a
                 solid grounding in probability theory, BNI has proven
                 to be a quick, practical method of inducing data models
                 that are highly accurate. The results include a belief
                 network that stores beta distributions in the
                 conditional probability tables, coupled with theorems
                 demonstrating how to maintain these distributions
                 through inference; techniques for applying neural
                 network and other learning techniques to the task of
                 conditional probability table learning; and a decision
                 theoretic sampling theory which addresses scalability
                 issues by characterizing the size of the sample needed
                 to produce high quality inferences. The setting for
                 this work is in database mining. Database mining is one
                 of the fastest growing topics in Artificial
                 Intelligence today, with industry providing at least as
                 much impetus as research labs and universities. The
                 general goal is to extract interesting quantities or
                 relationships that are ``hidden'' in large corporate or
                 scientific databases, with the potential benefits of a
                 successful technology being enormous. For example,
                 models can be built that characterize what types of
                 customers will respond to what types of marketing
                 schemes, retailers will be able to predict sales to
                 help determine correct inventory levels and
                 distribution schedules, and insurance companies will be
                 able to predict expected claim costs and better
                 classify who will buy what type of coverage.",
}

Rethinking the Learning of Belief Network Probabilities, Ron Musick
@InProceedings{musick:rethinking-learning:96,
  title =        "Rethinking the Learning of Belief Network
                 Probabilities",
  pages =        "120",
  author =       "Ron Musick",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Development of Multi-Criteria Metrics for Evaluation of Data Mining Algorithms, Gholamreza Nakhaeizadeh and Alexander Schnabl
@InProceedings{nakhaeizadeh.ea:development-multi-criteria:97,
  title =        "Development of Multi-Criteria Metrics for Evaluation
                 of Data Mining Algorithms",
  author =       "Gholamreza Nakhaeizadeh and Alexander Schnabl",
  pages =        "37",
  crossref =     "heckerman.ea:proceedings-third:97",
}

A logical language for data and knowledge bases, Shamim Naqvi and Shalom Tsur
@Book{naqvi.ea:logical-language:89,
  author =       "Shamim Naqvi and Shalom Tsur",
  title =        "A logical language for data and knowledge bases",
  publisher =    "Computer Science Press",
  year =         "1989",
  series =       "Principles of computer science",
  address =      "Rockville",
}

Revisable knowledge discovery in databases, A. Narayanan
@Article{narayanan:revisable:96,
  author =       "A. Narayanan",
  address =      "Univ Exeter, Dept Comp Sci, Exeter Ex4 4Pt, Devon,
                 England",
  title =        "Revisable knowledge discovery in databases",
  journal =      "International J. Of Intelligent Systems",
  year =         "1996",
  volume =       "11",
  issue =        "2",
  pages =        "75--96",
  abstract =     "This article introduces the idea of using nonmonotonic
                 inheritance networks for the storage and maintenance of
                 knowledge discovered in data (revisable knowledge
                 discovery in databases). While existing data mining
                 strategies for knowledge discovery in databases
                 typically involve initial structuring through the use
                 of identification trees and the subsequent extraction
                 of rules from these trees for use in rule-based expert
                 systems, such strategies have difficulty in coping with
                 additional information which may conflict with that
                 already used for the automatic generation of rules. In
                 the worst case, the entire automatic sequence may have
                 to be repeated. If nonmonotonic inheritance networks
                 are used instead of rules for storing knowledge
                 discovered in databases, additional conflicting
                 information can be inserted directly into such
                 structures, thereby bypassing the need for
                 recompilation. (C) 1996 John Wiley \& Sons, Inc.",
  keywords =     "LOGIC, CIRCUMSCRIPTION, INHERITANCE",
}

High-Performance Computing Approaches for Using the WWW to Access a Large-Scale Environmental Dataset Repository, Bahram Nassersharif and Richard Marciano and Sui-ky Ling and Eugene Ho and Curt Edmonds
@InProceedings{nassersharif.ea:high-performance-computing:95,
  author =       "Bahram Nassersharif and Richard Marciano and Sui-ky
                 Ling and Eugene Ho and Curt Edmonds",
  title =        "High-Performance Computing Approaches for Using the
                 {WWW} to Access a Large-Scale Environmental Dataset
                 Repository",
  booktitle =    "Proceedings of Supercomputing'95",
  publisher =    "ACM/IEEE",
  address =      "San Diego, CA",
  month =        dec,
  year =         "1995",
  keywords =     "data mining, mass storage, supercomputing, World Wide
                 Web, Oracle, data repositories, access tools, remote
                 sensing, environmental data, global warming, NALC,
                 ecology,",
  abstract =     "Simple html document on CD with MPEG.",
}

Efficient and Effective Clustering Methods for Spatial Data Mining, Raymond T. Ng and Jiawei Han
@InProceedings{ng.ea:efficient-and-effective-clustering-methods-for-spatial-data-mining:94,
  author =       "Raymond T. Ng and Jiawei Han",
  title =        "{Efficient and Effective Clustering Methods for
                 Spatial Data Mining}",
  booktitle =    "Proceedings of the Twentieth International Conference
                 on Very Large Databases",
  year =         "1994",
  address =      "Santiago, Chile",
  pages =        "144--155",
  abstract =     "Spatial data mining is the discovery of interesting
                 relationships and characteristics that may exist
                 implicitly in spatial databases. In this paper, we
                 explore whether clustering methods have a role to play
                 in spatial data mining. To this end, we develop a new
                 clustering method called CLARANS which is based on
                 randomized search. We also develop two spatial data
                 mining algorithms that use CLARANS. Our analysis and
                 experiments show that with the assistance of CLARANS,
                 these two algorithms are very effective and can lead to
                 discoveries that are difficult to find with current
                 spatial data mining algorithms. Furthermore,
                 experiments conducted to compare the performance of
                 CLARANS with that of existing clustering methods show
                 that CLARANS is the most efficient.",
}

Efficient and Effective Clustering Methods for Spatial Data Mining, R. T. Ng and J. Han
@InProceedings{ng.ea:efficient-effective:94,
  author =       "R. T. Ng and J. Han",
  title =        "Efficient and Effective Clustering Methods for Spatial
                 Data Mining",
  editor =       "Jorgeesh Bocca and Matthias Jarke and Carlo Zaniolo",
  booktitle =    "20th International Conference on Very Large Data
                 Bases, September 12--15, 1994, Santiago, Chile
                 proceedings",
  publisher =    "Morgan Kaufmann Publishers",
  address =      "Los Altos, CA 94022, USA",
  pages =        "144--155",
  year =         "1994",
  annote =       "Also known as VLDB'94",
  keywords =     "very large data bases; VLDB",
}

Principles of Artificial Intelligence, Nils J. Nilsson
@Book{nilsson:principles-artificial-intelligence:82,
  author =       "Nils J. Nilsson",
  title =        "Principles of {Artificial Intelligence}",
  publisher =    "Springer-Verlag",
  year =         "1982",
  series =       "Symbolic Computation",
}

Some privacy issues in knowledge discovery - the oecd personal privacy guidelines, D. E. Oleary
@Article{oleary:some-privacy:95,
  author =       "D. E. Oleary",
  address =      "Univ So Calif, 3660 Trousdale Pkwy, Los Angeles, Ca,
                 90089",
  title =        "Some privacy issues in knowledge discovery - the oecd
                 personal privacy guidelines",
  journal =      "Ieee Expert-Intelligent Systems \& Their
                 Applications",
  year =         "1995",
  volume =       "10",
  issue =        "2",
  pages =        "48--52",
  keywords =     "ethics, privacy, ethical issues",
}

Using domain knowledge to guide database knowledge discovery, M. M. Owrang and F. H. Grupe
@Article{owrang.ea:using-domain:96,
  author =       "M. M. Owrang and F. H. Grupe",
  address =      "American Univ, Dept Comp Sci, 400 Massachussets Ave
                 Nw, Washington, Dc, 20016 Univ Nevada, Dept Accounting
                 \& Comp Informat Syst, Reno, Nv, 89557",
  title =        "Using domain knowledge to guide database knowledge
                 discovery",
  journal =      "Expert Systems With Applications",
  year =         "1996",
  volume =       "10",
  issue =        "2",
  pages =        "173--180",
  abstract =     "Modern database technologies process large volumes of
                 data to discover new knowledge. Some large databases
                 make discovery computationally expensive. Additional
                 knowledge, known as domain or background knowledge,
                 hidden in the database can often guide and restrict the
                 search for interesting knowledge. This paper discusses
                 mechanisms by which domain knowledge can be used
                 effectively in discovering knowledge from databases. In
                 particular we look at the use of domain knowledge to
                 reduce the search as well as to optimize the hypotheses
                 which represent the interesting knowledge to be
                 discovered. Also, we discuss how to use domain
                 knowledge to test the validity of the discovered
                 knowledge. Although domain knowledge can be used to
                 improve database searches, it should not block the
                 discovery of unexpected knowledge. We provide some
                 guidelines to use domain knowledge properly.",
}

Pattern Discovery in Temporal Databases: A Temporal Logic Approach, Balaji Padmanabhan and Alexander Tuzhilin
@InProceedings{padmanabhan.ea:pattern-temporal:96,
  title =        "Pattern Discovery in Temporal Databases: {A} Temporal
                 Logic Approach",
  pages =        "351",
  author =       "Balaji Padmanabhan and Alexander Tuzhilin",
  crossref =     "simoudis.ea:proceedings-second:96",
}

An effective hash-based algorithm for mining association rules, Jong Soo Park and Ming-Syan Chen and P. S. Yu
@Article{park.ea:effective-hash-based:95,
  author =       "Jong Soo Park and Ming-Syan Chen and P. S. Yu",
  title =        "An effective hash-based algorithm for mining
                 association rules",
  journal =      "SIGMOD Record (ACM Special Interest Group on
                 Management of Data)",
  volume =       "24",
  number =       "2",
  pages =        "175--186",
  month =        jun,
  year =         "1995",
  ISSN =         "0163-5808",
  affiliation =  "IBM Thomas J. Watson Res. Center, Yorktown Heights,
                 NY, USA",
  classification = "C7170 (Marketing computing); C7180 (Retailing and
                 distribution computing); C6170K (Knowledge engineering
                 techniques); C6160 (Database management systems
                 (DBMS))",
  keywords =     "Effective hash-based algorithm; Association rules
                 mining; Sales transactions; Candidate set generation;
                 Performance bottleneck",
  thesaurus =    "Database management systems; Knowledge acquisition;
                 Pattern matching; Retail data processing; Sales
                 management",
  xxcrossref =   "Anonymous:1995:ASI",
}

An Effective Hash Based Algorithm for Mining Association Rules, Jong Soo Park and Ming-Syan Chen and Philip S. Yu
@InProceedings{park.ea:effective-hash:95,
  title =        "An Effective Hash Based Algorithm for Mining
                 Association Rules",
  author =       "Jong Soo Park and Ming-Syan Chen and Philip S. Yu",
  editor =       "Michael J. Carey and Donovan A. Schneider",
  booktitle =    "Proceedings of the 1995 {ACM} {SIGMOD} International
                 Conference on Management of Data",
  address =      "San Jose, California",
  month =        "22--25~" # may,
  year =         "1995",
  pages =        "175--186",
}

Intelligent Database Tools \& Applications, K. Parsaye and M. Chignell
@Book{parsaye.ea:intelligent-database:93,
  author =       "K. Parsaye and M. Chignell",
  title =        "Intelligent Database Tools \& Applications",
  publisher =    "John Wiley",
  year =         "1993",
}

OLAP and Data Mining: Bridging the Gap, Kamran Parsaye
Available as
parsfeb.htm.
@Article{parsaye:olap-bridging:97,
  author =       "Kamran Parsaye",
  title =        "{OLAP} and Data Mining: Bridging the Gap",
  journal =      "Database Programming and Design",
  year =         "1997",
  issue =        "February",
  URL =          "http://www.dbpd.com/parsfeb.htm",
  annote =       "OLAP and data mining--while very different--are both
                 integral to the decision-support process. By carefully
                 linking them, you can make sure one activity reinforces
                 the other.",
}

Knowledge Discovery from Epidemiological Databases, G. Pavilion
@Article{pavilion:epidemiological:96,
  author =       "G. Pavilion",
  title =        "Knowledge Discovery from Epidemiological Databases",
  journal =      "Lecture Notes in Computer Science",
  volume =       "1057",
  pages =        "201--??",
  year =         "1996",
  ISSN =         "0302-9743",
}

Beyond Concise and Colorful: Learning Intelligible Rules, Michael J. Pazzani and Subramani Mani and W. Rodman Shankle
@InProceedings{pazzani.ea:beyond-concise:97,
  title =        "Beyond Concise and Colorful: Learning Intelligible
                 Rules",
  author =       "Michael J. Pazzani and Subramani Mani and W. Rodman
                 Shankle",
  pages =        "235",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Conditional fuzzy c-means, W. Pedrycz
@Article{pedrycz:conditional-fuzzy:96,
  author =       "W. Pedrycz",
  address =      "Univ Manitoba, Dept Elect \& Comp Engn, Winnipeg, Mb
                 R3T 5V6, Canada",
  title =        "Conditional fuzzy c-means",
  journal =      "Pattern Recognition Letters",
  year =         "1996",
  volume =       "17",
  issue =        "6",
  pages =        "625--631",
  abstract =     "A Fuzzy C-Means-based clustering method guided by an
                 auxiliary (conditional) variable is introduced. The
                 method reveals a structure within a family of patterns
                 by considering their vicinity in a feature space along
                 with the similarity of the values assumed by a certain
                 conditional variable. The usefulness of the algorithm
                 is exemplified in the problems of data mining.",
  keywords =     "FUZZY CLUSTERING, FUZZY C-MEANS, CONDITIONAL VARIABLE,
                 DATA MINING, RADIAL BASIS FUNCTIONS",
}

Leveraging the information asset, J. Perna
@Article{perna:leveraging-information:95,
  author =       "J. Perna",
  title =        "Leveraging the information asset",
  journal =      "SIGMOD Record (ACM Special Interest Group on
                 Management of Data)",
  volume =       "24",
  number =       "2",
  pages =        "451--452",
  month =        jun,
  year =         "1995",
  ISSN =         "0163-5808",
  affiliation =  "IBM Canada Ltd., Toronto, Ont., Canada",
  classification = "C7100 (Business and administration); C6160 (Database
                 management systems (DBMS)); C7250L (Non-bibliographic
                 retrieval systems); C7250R (Information retrieval
                 techniques)",
  keywords =     "Information asset; Corporate asset; Database users;
                 Competitive advantage; Unwanted store inventory;
                 Capital outlay; Store sales data; Real time access;
                 Production point of sale information; Database mining;
                 Multivendor database connectivity; Heterogeneous
                 clients; Customer needs; Marketplace",
  thesaurus =    "Business data processing; Database management systems;
                 Information retrieval; Real-time systems",
  xxcrossref =   "Anonymous:1995:ASI",
}

Parallel Halo Finding in N-body Cosmology Simulations, David W. Pfitzner and John K. Salmon
@InProceedings{pfitzner.ea:parallel-halo:96,
  title =        "Parallel Halo Finding in {N}-body Cosmology
                 Simulations",
  pages =        "26",
  author =       "David W. Pfitzner and John K. Salmon",
  crossref =     "simoudis.ea:proceedings-second:96",
}

KDD-93: Progress and Challenges in Knowlege Discovery in Databases, G. Piatetsky-Shapiro and C. Matheus and P. Smyth and R. Uthurusamy
Available as
kdd-93-report.tex.
@Unpublished{piatetsky-shapiro.ea:kdd-93-progress:,
  title =        "{KDD}-93: Progress and Challenges in Knowlege
                 Discovery in Databases",
  author =       "G. Piatetsky-Shapiro and C. Matheus and P. Smyth and
                 R. Uthurusamy",
  URL =          "http://info.gte.com/~kdd/kdd-93-report.tex",
  keywords =     "Knowledge, Discovery, Databases",
  annote =       "(a long report on AAAI-93 KDD Workshop), to be
                 published in AI Magazine.",
}

Kdd-93 - progress and challenges in knowledge discovery in databases, G. Piatetsky-Shapiro and C. Matheus and P. Smyth and R. Uthurusamy
@Article{piatetsky-shapiro.ea:kdd-93-progress:94,
  author =       "G. Piatetsky-Shapiro and C. Matheus and P. Smyth and
                 R. Uthurusamy",
  address =      "Gte Labs Inc, Knowledge Discovery Databases Project,
                 Waltham, Ma, 02254 Jet Prop Lab, Tech Grp, Pasadena,
                 Ca, 91109 Gm Corp, Res Labs, Detroit, Mi, 48202",
  title =        "Kdd-93 - progress and challenges in knowledge
                 discovery in databases",
  journal =      "Ai Magazine",
  year =         "1994",
  volume =       "15",
  issue =        "3",
  pages =        "77--82",
  abstract =     "Over 60 researchers from 10 countries took part in the
                 Third Knowledge Discovery in Databases (KDD) Workshop,
                 held during the Eleventh National Conference on
                 Artificial Intelligence in Washington, D.C. A major
                 trend evident at the workshop was the transition to
                 applications in the core KDD area of discovery of
                 relatively simple patterns in relational databases; the
                 most successful applications are appearing in the areas
                 of greatest need, where the databases are so large that
                 manual analysis is impossible. Progress has been
                 facilitated by the availability of commercial KDD tools
                 for both generic discovery and domain-specific
                 applications such as marketing. At the same time,
                 progress has been slowed by problems such as lack of
                 statistical rigor, overabundance of patterns, and poor
                 integration. Besides applications, the main themes of
                 this workshop were (1) the discovery of dependencies
                 and models and (2) integrated and interactive KDD
                 systems.",
}

Knowledge Discovery in Databases, Gregory Piatetsky-Shapiro and William Frawley (Eds)
@Book{piatetsky-shapiro.ea:knowledge-discovery:91,
  editor =       "Gregory Piatetsky-Shapiro and William Frawley",
  title =        "Knowledge Discovery in Databases",
  publisher =    "The MIT Press",
  address =      "Cambridge, MA",
  pages =        "xii + 525",
  year =         "1991",
  ISBN =         "0-262-66070-9 (paper)",
  LCCN =         "Q325.5 .K68 1991",
}

An Overview of Issues in Developing Industrial Data Mining and Knowledge Discovery Applications, Gregory Piatetsky-Shapiro and Ron Brachman and Tom Khabaza and Willi Kloesgen and Evangelos Simoudis
@InProceedings{piatetsky-shapiro.ea:overview-issues:96,
  title =        "An Overview of Issues in Developing Industrial Data
                 Mining and Knowledge Discovery Applications",
  pages =        "89",
  author =       "Gregory Piatetsky-Shapiro and Ron Brachman and Tom
                 Khabaza and Willi Kloesgen and Evangelos Simoudis",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Knowledge discovery workbench for exploring business databases, G. Piatetsky-Shapiro and C. J. Matheus
@Article{piatetsky-shapiro.ea:workbench-exploring:92,
  author =       "G. Piatetsky-Shapiro and C. J. Matheus",
  address =      "Gte Labs Inc, Waltham, Ma, 02254",
  title =        "Knowledge discovery workbench for exploring business
                 databases",
  journal =      "International J. Of Intelligent Systems",
  year =         "1992",
  volume =       "7",
  issue =        "7",
  pages =        "675--686",
  abstract =     "We describe the Knowledge Discovery Workbench, an
                 interactive system for database exploration. We then
                 illustrate KDW capabilities in data clustering,
                 summarization, classification, and discovery of
                 changes. We also examine extracting dependencies from
                 data and using them to order the multitude of data
                 patterns.",
}

Discovery, Analysis and Presentation of Strong Rules, Gregory Piatetsky-Shapiro
@InCollection{piatetsky-shapiro:analysis-presentation:91,
  editor =       "Gregory Piatetsky-Shapiro and William J. Frawley",
  booktitle =    "Knowledge Discovery in Databases",
  publisher =    "AAAI Press / The MIT Press",
  address =      "Menlo Park, California",
  edition =      "1st",
  year =         "1991",
  author =       "Gregory Piatetsky-Shapiro",
  title =        "Discovery, Analysis and Presentation of Strong Rules",
  pages =        "229--248",
}

Discovery and Analysis of Strong Rules in Databases, G Piatetsky-Shapiro
@InProceedings{piatetsky-shapiro:analysis-strong:89,
  author =       "G Piatetsky-Shapiro",
  title =        "Discovery and Analysis of Strong Rules in Databases",
  booktitle =    "Advanced Database System Symposium, Kyoto",
  year =         "1989",
  month =        dec,
}

Data mining and knowledge discovery in business databases, G. Piatetsky-Shapiro
@Article{piatetsky-shapiro:business:96,
  author =       "G. Piatetsky-Shapiro",
  title =        "Data mining and knowledge discovery in business
                 databases",
  journal =      "Lecture Notes in Computer Science",
  volume =       "1079",
  pages =        "56--??",
  year =         "1996",
  ISSN =         "0302-9743",
}

Introduction, Gregory Piatetsky-Shapiro
@Article{piatetsky-shapiro:introduction:92,
  crossref =     "ijis-special-issue:92",
  year =         "1992",
  author =       "Gregory Piatetsky-Shapiro",
  title =        "Introduction",
  pages =        "587--589",
  annote =       "Definition of KDD. Scientific Discovery, Commercial
                 Discovery, overview of papers.",
}

KDD-93: Proceedings of AAAI-93 Knowledge Discovery in Databases workshop, G. Piatetsky-Shapiro (Ed)
@TechReport{piatetsky-shapiro:kdd-93-proceedings:93,
  key =          "piatetsky-shapiro:kdd-93-proceedings:93",
  editor =       "G. Piatetsky-Shapiro",
  title =        "{KDD}-93: Proceedings of {AAAI}-93 Knowledge Discovery
                 in Databases workshop",
  institution =  "AAAI",
  number =       "WS-02",
  month =        jul,
  year =         "1993",
  note =         "AAAI Press technical report",
}

KDD Frequently Asked Questions, Gregory Piatetsky-Shapiro (Ed)
@Misc{piatetsky-shapiro:kdd-frequently:94,
  key =          "piatetsky-shapiro:kdd-frequently:94",
  title =        "{KDD} Frequently Asked Questions",
  editor =       "Gregory Piatetsky-Shapiro",
  howpublished = "Published via WWW http://info.gte.com/~kdd/FAQ.txt",
  month =        "18th " # apr,
  year =         "1994",
}

KDD Nugget 94-7, Gregory Piatetsky-Shapiro (Ed)
@Misc{piatetsky-shapiro:kdd-nugget:94-7,
  title =        "{KDD} Nugget 94-7",
  key =          "piatetsky-shapiro:kdd-nugget:94-7",
  editor =       "Gregory Piatetsky-Shapiro",
  howpublished = "Mailing list",
  month =        "18th " # apr,
  year =         "1994",
  annote =       "Contents: * G. Piatetsky-Shapiro, Time: Attack of the
                 Data Miners Business Week: Gold Mine of Data in
                 Customer Service ComputerWorld: Data is money, but
                 people are special US Census Bureau is now on WWW at
                 http://www.census.gov/ * Tej Anand, AT&T Data Mining
                 Conference * Larry Ai, TRW Smart Charts for
                 Pharmaceuticals * Edwin Pednault, MDL workshop at
                 ML/COLT 94 * Roberto Zicari, CFP: Theory and Practice
                 of Object Systems",
}

Knowledge discovery in databases, G. Piatetsky-Shapiro
@Article{piatetsky-shapiro:knowledge-discovery:91b,
  key_modifier = "b",
  author =       "G. Piatetsky-Shapiro",
  address =      "Gte Labs Inc, Waltham, Ma, 02254",
  title =        "Knowledge discovery in databases",
  journal =      "Ieee Expert-Intelligent Systems \& Their
                 Applications",
  year =         "1991",
  month =        oct,
  volume =       "6",
  issue =        "5",
  pages =        "74--76",
  note =         "Discussion of second AAAI workshop on KDD",
}

Knowledge discovery in personal data vs privacy - a minisymposium, G. Piatetsky-Shapiro
@Article{piatetsky-shapiro:personal-vs:95,
  author =       "G. Piatetsky-Shapiro",
  address =      "Gte Labs Inc, Waltham, Ma, 02254",
  title =        "Knowledge discovery in personal data vs privacy - a
                 minisymposium",
  journal =      "Ieee Expert-Intelligent Systems \& Their
                 Applications",
  year =         "1995",
  volume =       "10",
  issue =        "2",
  pages =        "46--47",
}

Knowledge discovery in databases - progress report, G. Piatetsky-Shapiro
@Article{piatetsky-shapiro:progress-report:94,
  author =       "G. Piatetsky-Shapiro",
  address =      "Gte Labs Inc, 40 Sylvan Rd, Waltham, Ma, 01254",
  title =        "Knowledge discovery in databases - progress report",
  journal =      "Knowledge Engineering Review",
  year =         "1994",
  volume =       "9",
  issue =        "1",
  pages =        "57--60",
}

Knowledge Discovery in Real Databases: A Report on the IJCAI-89 Workshop, Gregory Piatetsky-Shapiro
@Article{piatetsky-shapiro:real-report:91a,
  key_modifier = "a",
  author =       "Gregory Piatetsky-Shapiro",
  title =        "Knowledge Discovery in Real Databases: {A} Report on
                 the {IJCAI}-89 Workshop",
  journal =      "AI Magazine",
  pages =        "68--70",
  volume =       "11",
  number =       "5",
  month =        jan,
  year =         "1991",
}

Special issue - knowledge discovery in data-bases and knowledge bases - introduction, G. Piatetsky-Shapiro
@Article{piatetsky-shapiro:special-issue:92,
  author =       "G. Piatetsky-Shapiro",
  address =      "Gte Labs Inc, 40 Sylvan Rd, Waltham, Ma, 02254",
  title =        "Special issue - knowledge discovery in data-bases and
                 knowledge bases - introduction",
  journal =      "International J. Of Intelligent Systems",
  year =         "1992",
  volume =       "7",
  issue =        "7",
  pages =        "587--589",
}

An application of ILP in a musical database: Learning to compose the two-voice counterpoint, U. Pompe and I. Kononenko and T. Mak\vse
@InProceedings{pompe.ea:application-ilp:96,
  author =       "U. Pompe and I. Kononenko and T. Mak\v{s}e",
  title =        "An application of {ILP} in a musical database:
                 {L}earning to compose the two-voice counterpoint",
  booktitle =    "Proceedings of the MLnet Familiarization Workshop on
                 Data Mining with Inductive Logic Programing",
  pages =        "1--11",
  year =         "1996",
}

Starlight, star bright - data-mining the cosmos, D. Price
@Article{price:starlight-star:95,
  author =       "D. Price",
  address =      "Univ Nevada, Reno, Nv, 89557 American Univ,
                 Washington, Dc, 20016",
  title =        "Starlight, star bright - data-mining the cosmos",
  journal =      "Ieee Expert-Intelligent Systems \& Their
                 Applications",
  year =         "1995",
  volume =       "10",
  issue =        "4",
  pages =        "10--13",
}

Comparative benchmarking studies of various algorithms, ELENA project
Available as
compressed postscript.
@Article{project:comparative-benchmarking:,
  author =       "ELENA project",
  title =        "Comparative benchmarking studies of various
                 algorithms",
  annote =       "Elena project is at:
                 http://www.dice.ucl.ac.be/neural-nets/ELENA/ELENA.html",
  URL =          "ftp://ftp.dice.ucl.ac.be/pub/neural-nets/ELENA/databases/Benchmarks.ps.Z",
}

Data Mining and Model Simplicity: A Case Study in Diagnosis, Gregory M. Provan and Moninder Singh
@InProceedings{provan.ea:model-simplicity:96,
  title =        "Data Mining and Model Simplicity: {A} Case Study in
                 Diagnosis",
  pages =        "57",
  author =       "Gregory M. Provan and Moninder Singh",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Analysis and Visualization of Classifier Performance: Comparison under Imprecise Class and Cost Distributions, Foster Provost and Tom Fawcett
@InProceedings{provost.ea:analysis-classifier:97,
  title =        "Analysis and Visualization of Classifier Performance:
                 Comparison under Imprecise Class and Cost
                 Distributions",
  author =       "Foster Provost and Tom Fawcett",
  pages =        "43",
  crossref =     "heckerman.ea:proceedings-third:97",
  abstract =     "When mining data with inductive methods, we often
                 experiment with a wide variety of learning algorithms,
                 using different algorithm parameters, varying output
                 threshold values, and using different training
                 regimens. Such experimentation yields a large number of
                 classifiers to be evaluated and compared. In order to
                 compare the performance of classifiers it is necessary
                 to know the conditions under which they will be used;
                 using accuracy alone is inadequate because class
                 distributions and misclassification costs are rarely
                 uniform.

Inductive Policy, F. J. Provost and B. G. Buchanan
@InProceedings{provost.ea:inductive-policy:92,
  author =       "F. J. Provost and B. G. Buchanan",
  title =        "Inductive Policy",
  year =         "1992",
  booktitle =    "Proc.\ of AAAI-92",
  pages =        "255--262",
}

Scaling Up Inductive Algorithms: An Overview, Foster Provost and Venkateswarlu Kolluri
@InProceedings{provost.ea:scaling-up:97,
  title =        "Scaling Up Inductive Algorithms: An Overview",
  author =       "Foster Provost and Venkateswarlu Kolluri",
  pages =        "239",
  crossref =     "heckerman.ea:proceedings-third:97",
}

The Royal Tree Problem, a Benchmark for Single and Multiple Population Genetic Programming, William F. Punch and Douglas Zongker and Erik D. Goodman
@InCollection{punch.ea:royal-tree:96,
  author =       "William F. Punch and Douglas Zongker and Erik D.
                 Goodman",
  title =        "The Royal Tree Problem, a Benchmark for Single and
                 Multiple Population Genetic Programming",
  booktitle =    "Advances in Genetic Programming 2",
  publisher =    "MIT Press",
  year =         "1996",
  editor =       "Peter J. Angeline and K. E. {Kinnear, Jr.}",
  pages =        "299--316",
  chapter =      "15",
  address =      "Cambridge, MA, USA",
  keywords =     "genetic algorithms, genetic programming",
  ISBN =         "0-262-01158-1",
  abstract =     "We have previously shown how a genetic algorithm (GA)
                 can be used to perform _data mining_, the discovery of
                 particular/important data within large datasets, by
                 finding optimal data classifications using known
                 examples. However, these approaches, while successful,
                 limited data relationships to those that were _fixed_
                 before the GA run. We report here on an extension of
                 our previous work, substituting a genetic program (GP)
                 for a GA. The GP could optimize data classification, as
                 did the GA, but could also determine the functional
                 relationships among the features. This gave improved
                 performance and new information on important relation
                 ships among features. We discuss the overall approach,
                 and compare the effectiveness of the GA vs. GP on a
                 biochemistry problem, the determination of the
                 involvement of bound water molecules in protein
                 interactions.",
  note =         "Also available as GARAGe96-01-01",
  size =         "18 pages",
}

FOIL: A Midterm Report, J. Ross Quinlan and R. M. Cameron-Jones
@Article{quinlan.ea:foil-midterm:,
  author =       "J. Ross Quinlan and R. M. Cameron-Jones",
  title =        "{FOIL}: {A} Midterm Report",
  abstract =     "FOIL is a learning system that constructs Horn clause
                 programs from examples. This paper summarises the
                 development of FOIL from 1989 up to early 1993 and
                 evaluates its effectiveness on a non-trivial sequence
                 of learning tasks taken from a Prolog programming text.
                 Although many of these are handled reasonably well, the
                 experiment highlights some weaknesses of the current
                 implementation. Areas for further research are
                 identified.",
}

C4.5: Programs for Machine Learning, J. Ross Quinlan
@Book{quinlan:c4-5:92,
  author =       "J. Ross Quinlan",
  title =        "{C4}.5: Programs for Machine Learning",
  publisher =    "Morgan Kaufmann",
  year =         "1992",
}

Comparing connectionist and symbolic learning methods, J. Ross Quinlan
@Unpublished{quinlan:comparing-connectionist:,
  author =       "J. Ross Quinlan",
  title =        "Comparing connectionist and symbolic learning
                 methods",
  institution =  "University of Sydney",
}

Determining literals in inductive logic programming, J. Ross Quinlan
@InProceedings{quinlan:determining-literals:91,
  author =       "J. Ross Quinlan",
  title =        "Determining literals in inductive logic programming",
  booktitle =    "Proceedings of the 12th International Joint Conference
                 on Artificial Intelligence",
  pages =        "746--750",
  address =      "Sydney, Austalia",
  year =         "1991",
}

The effect of noise on concept learning, J. Ross Quinlan
@InCollection{quinlan:effect-noise:86,
  author =       "J. Ross Quinlan",
  title =        "The effect of noise on concept learning",
  crossref =     "michalski.ea:machine-learning:86",
  pages =        "149--166",
}

An empirical comparision of genetic and decision-tree classifiers, J. Ross Quinlan
@InProceedings{quinlan:empirical-comparision:88,
  author =       "J. Ross Quinlan",
  title =        "An empirical comparision of genetic and decision-tree
                 classifiers",
  booktitle =    "Proceedings of the 5th International Conference on
                 Machine Learning",
  pages =        "135--141",
  address =      "Ann Arbor",
  year =         "1988",
}

Induction of Decision Trees, J. Ross Quinlan
@Article{quinlan:induction-decision:86,
  author =       "J. Ross Quinlan",
  title =        "Induction of Decision Trees",
  journal =      "Machine Learning",
  year =         "1986",
  volume =       "1",
  pages =        "81--106",
}

Learning efficient classification procedures and their application to chess end games, J. Ross Quinlan
@InCollection{quinlan:learning-efficient:83,
  author =       "J. Ross Quinlan",
  title =        "Learning efficient classification procedures and their
                 application to chess end games",
  crossref =     "michalski.ea:machine-learning:83",
  pages =        "463--482",
}

Cooperation through Hierarchical Competition in Genetic Data Mining, N J Radcliffe and P D Surry
Available as
compressed postscript.
@Article{radcliffe.ea:cooperation-through:,
  URL =          "ftp://ftp.epcc.ed.ac.uk/pub/tr/94/tr9409.ps.Z",
  title =        "Cooperation through Hierarchical Competition in
                 Genetic Data Mining",
  author =       "N J Radcliffe and P D Surry",
  note =         "Parallel Computing Centre,Edinburgh",
}

Information Sharing and Knowledge Discovery in Large Scientific Databases : Introduction, Sudha Ram
@InProceedings{ram:information-sharing:94,
  author =       "Sudha Ram",
  title =        "Information Sharing and Knowledge Discovery in Large
                 Scientific Databases : Introduction",
  pages =        "397--397",
  editor =       "Jay F. Nunamaker and Ralph H. Sprague",
  booktitle =    "Proceedings of the 27th Annual Hawaii International
                 Conference on System Science. Volume 3 : Information
                 Systems: {DSS}/Knowledge-Based Systems",
  month =        jan,
  publisher =    "IEEE Computer Society Press",
  address =      "Los Alamitos, CA, USA",
  year =         "1994",
}

Information sharing and knowledge discovery in large scientific databases - introduction, S. Ram
@InProceedings{ram:information-sharing:95,
  author =       "S. Ram",
  title =        "Information sharing and knowledge discovery in large
                 scientific databases - introduction",
  pages =        "252--252",
  editor =       "Jay F. Nunamaker and Ralph H. Sprague",
  booktitle =    "Proceedings of the 28th Annual Hawaii International
                 Conference on System Sciences. Volume 3: Information
                 Systems - Decision Support and Knowledge-Based
                 Systems",
  month =        jan,
  publisher =    "IEEE Computer Society Press",
  address =      "Los Alamitos, CA, USA",
  year =         "1995",
}

A Knowledge-Based Equation Discovery System for Engineering Domains, R. Bharat Rao and Stephen Y. Lu
@Article{rao.ea:knowledge-based-equation:93,
  author =       "R. Bharat Rao and Stephen Y. Lu",
  title =        "A Knowledge-Based Equation Discovery System for
                 Engineering Domains",
  journal =      "IEEE Expert",
  year =         "1993",
  pages =        "37--42",
  month =        aug,
  annote =       "Deals with KEDS system",
}

Visualizing Bagged Decision Trees, J. Sunil Rao and William J. E. Potts
@InProceedings{rao.ea:visualizing-bagged:97,
  title =        "Visualizing Bagged Decision Trees",
  author =       "J. Sunil Rao and William J. E. Potts",
  pages =        "243",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Calculating salience and breadth of knowledge, L. F. Rau
@Article{rau:calculating-salience:93,
  author =       "L. F. Rau",
  address =      "Ge Co, Ctr Res \& Dev, Artificial Intelligence Lab,
                 Schenectady, Ny, 12301",
  title =        "Calculating salience and breadth of knowledge",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1993",
  volume =       "5",
  issue =        "6",
  pages =        "996--998",
  abstract =     "As computer programs grow to contain more information,
                 it will become more important, when faced with a new
                 system, to be able to ask, ''What do you know about?''
                 This correspondence paper overviews some recently
                 completed research [1] investigating three questions:
                 1) what it means for a computer to know what it knows
                 about, 2) how a computer can construct a representation
                 of what it knows about, and 3) how such a
                 representation ran be used for practical applications
                 that advance the state-of-the-art in understanding the
                 content of large databases.",
  keywords =     "ARTIFICIAL INTELLIGENCE, COGNITIVE MODELING, COMPUTER
                 SCIENCE, DATABASE MANAGEMENT, INFORMATION MANAGEMENT,
                 INFORMATION RETRIEVAL, KNOWLEDGE DISCOVERY",
}

Genetic Programming for Improved Data Mining: An Application to the Biochemistry of Protein Interactions, M. L. Raymer and W. F. Punch and E. D. Goodman and L. A. Kuhn
Available as
postscript.
@InProceedings{raymer.ea:genetic-programming:96,
  author =       "M. L. Raymer and W. F. Punch and E. D. Goodman and L.
                 A. Kuhn",
  title =        "Genetic Programming for Improved Data Mining: An
                 Application to the Biochemistry of Protein
                 Interactions",
  booktitle =    "Genetic Programming 1996: Proceedings of the First
                 Annual Conference",
  editor =       "John R. Koza and David E. Goldberg and David B. Fogel
                 and Rick L. Riolo",
  year =         "1996",
  month =        "28--31 " # jul,
  keywords =     "Genetic Programming, Genetic Algorithms",
  pages =        "375--380",
  address =      "Stanford University, CA, USA",
  publisher =    "MIT Press",
  URL =          "http://isl.cps.msu.edu/GA/papers/GARAGe96-04-01.ps",
  size =         "6 pages",
  note =         "GP-96 Also available as TR GARAGe96-04-01",
}

Parallelism speeds data mining, S. Reese Hedberg
@Article{reese-hedberg:parallelism-speeds:95,
  author =       "S. {Reese Hedberg}",
  title =        "Parallelism speeds data mining",
  journal =      "IEEE parallel and distributed technology: systems and
                 applications",
  volume =       "3",
  number =       "4",
  pages =        "3--6",
  month =        "Winter",
  year =         "1995",
  ISSN =         "1063-6552",
  classification = "C6110P (Parallel programming); C6160K (Deductive
                 databases); C6170K (Knowledge engineering techniques);
                 C7120 (Financial computing); C7130 (Public
                 administration)",
  keywords =     "6-processor; artificial intelligence; banks; cash;
                 casinos; data mining; data pattern recognition; data
                 processing; deductive databases; financial data
                 processing; genetic algorithms; government; knowledge
                 acquisition; knowledge discovery; machine learning;
                 money laundering; multidimensional database querying;
                 neural networks; online application processing tools;
                 parallel processing; parallelism; pattern; prediction;
                 query processing; rule-based; siftware; statistical
                 techniques; stored data; Sun server; systems;
                 techniques; transactions; US Department of Treasury",
}

Performing Effective Feature Selection by Investigating the Deep Structure of the Data, Marco Richeldi and Pier Luca Lanzi
@InProceedings{richeldi.ea:performing-effective:96,
  title =        "Performing Effective Feature Selection by
                 Investigating the Deep Structure of the Data",
  pages =        "379",
  author =       "Marco Richeldi and Pier Luca Lanzi",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Approaches to Knowledge Representation: An Introduction, G. A. Ringland and D. A. Duce (Eds)
@Book{ringland.ea:approaches-to:88,
  editor =       "G. A. Ringland and D. A. Duce",
  title =        "Approaches to Knowledge Representation: An
                 Introduction",
  publisher =    "Research studies press Ltd.",
  year =         "1988",
  address =      "Letchworth, England",
}

Discretization of Numerical Attributes, Knut Magne Risvik
Available as
postscript.
@Misc{risvik:discretization-numerical:97,
  author =       "Knut Magne Risvik",
  title =        "Discretization of Numerical Attributes",
  year =         "1997",
  month =        apr,
  howpublished = "Unpublished article. Report from undergraduate student
                 project.",
  URL =          "http://www.pvv.ntnu.no/~kmr/report/discretization.ps",
  contributedby = "Knut Magne Risvik, kmr(at)idi.ntnu.no",
}

Learning Decision Lists, Ronald L. Rivest
@Article{rivest:learning-decision:87,
  author =       "Ronald L. Rivest",
  title =        "Learning Decision Lists",
  journal =      "Machine Learning",
  year =         "1987",
  volume =       "2",
  pages =        "229--246",
}

Dealing with Duplicate Tuples in Multi-Join Query Processing, Roberto J. Bayardo, Jr.
Available as
compressed postscript.
@TechReport{roberto-j-bayardo:dealing-with:96,
  author =       "Roberto J. {Bayardo, Jr.}",
  title =        "Dealing with Duplicate Tuples in Multi-Join Query
                 Processing",
  institution =  "The University of Texas at Austin, Department of
                 Computer Sciences",
  type =         "Technical Report",
  number =       "UTEXAS.CS//CS-TR-96-11",
  pages =        "7",
  month =        may,
  year =         "1996",
  keywords =     "query processing, multi-join queries, query
                 optimization",
  URL =          "ftp://ftp.cs.utexas.edu/pub/techreports/tr96-11.ps.Z",
  abstract =     "This paper presents and evaluates several schemes for
                 handling duplicate tuple elimination during
                 optimization and execution of large select-project-join
                 queries. The primary issues investigated are (1)
                 precisely when to apply duplicate tuple removal during
                 query evaluation, and (2) how an optimizer should
                 predict the effects of removing duplicates. We also
                 develop a realistic model of multiple join queries
                 inspired by a proposed data- mining application.
                 Through experiments on this model, we find two critical
                 techniques for high performance execution of
                 select-project-join queries: First, the optimizer
                 should decide where duplicates are removed within the
                 query plan independent of the projections creating
                 them. Second, join algorithms should remove duplicates
                 when sorting or hashing their input, and the optimizer
                 should be capable of predicting its effects.",
}

Handling discovered structure in database-systems, J. F. Roddick and N. G. Craske and T. J. Richards
@Article{roddick.ea:handling-discovered:96,
  author =       "J. F. Roddick and N. G. Craske and T. J. Richards",
  address =      "Univ S Australia, Sch Comp \& Informat Sci, Adv Comp
                 Res Ctr, Levels Campus, the Levels, Sa 5095, Australia
                 Monash Univ, Dept Comp Technol, Caulfield, Vic 3145,
                 Australia Qualitat Solut \& Res Pty Ltd, Bundoora, Vic
                 3083, Australia",
  title =        "Handling discovered structure in database-systems",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1996",
  volume =       "8",
  issue =        "2",
  pages =        "227--240",
  abstract =     "Most database systems research assumes that the
                 database schema is determined by a database
                 administrator. With the recent increase in interest in
                 knowledge discovery from databases and the predicted
                 increase in the volume of data expected to be stored it
                 is appropriate to reexamine this assumption and
                 investigate how derived or induced, rather than
                 database administrator supplied, structure can be
                 accommodated and used by database systems. This paper
                 investigates some of the characteristics of inductive
                 learning and knowledge discovery as they pertain to
                 database systems and the constraints that would be
                 imposed on appropriate inductive learning algorithms is
                 discussed. A formal method of defining induced
                 dependencies (both static and temporal) is proposed as
                 the inductive analogue to functional dependencies. The
                 Boswell database system exemplifying some of these
                 characteristics is also briefly discussed.",
  keywords =     "DESIGN, RULE, INDUCTIVE DATA MODELS, KNOWLEDGE
                 DISCOVERY, TEMPORAL INFERENCE, BOSWELL",
}

Knowledge Discovery in Reaction Databases, John R. Rose and Herbert Gelernter
@InProceedings{rose.ea:reaction:93,
  author =       "John R. Rose and Herbert Gelernter",
  title =        "Knowledge Discovery in Reaction Databases",
  pages =        "714--716",
  editor =       "Bharat Bhargava and Timothy Finin and Yelena Yesha",
  booktitle =    "Proceedings of the 2nd International Conference on
                 Information and Knowledge Management",
  month =        nov,
  publisher =    "ACM Press",
  address =      "New York, NY, USA",
  year =         "1993",
}

How Good Were Those Probability Predictions?), The Expected Recommendation Loss (ERL) Scoring Rule, David B. Rosen
@Article{rosen:how-good:,
  title =        "How Good Were Those Probability Predictions?), The
                 Expected Recommendation Loss ({ERL}) Scoring Rule",
  author =       "David B. Rosen",
  note =         "To appear in: Maximum Entropy and Bayesian Methods.
                 (Proceedings of the Thirteenth International Workshop,
                 August 1993.) G. Heidbreder, ed. Kluwer, Dordrecht, The
                 Netherlands, 1996. 8 pages.",
  abstract =     "We present a new way to choose an appropriate scoring
                 rule for evaluating the performance of a _soft
                 classifier_, i.e. of a supplier of predicted
                 (inferred/estimated/learned/guessed) probabilities. A
                 scoring rule (probability loss function) is a function
                 of a single such prediction and the corresponding
                 outcome event (true class); its expectation over the
                 data space is the generalization performance of
                 ultimate interest, while its sum or average over some
                 benchmark test data set is an empirical performance
                 measure.

Discriminative vs Informative Learning, Y. Dan Rubinstein and Trevor Hastie
@InProceedings{rubinstein.ea:discriminative-vs:97,
  title =        "Discriminative vs Informative Learning",
  author =       "Y. Dan Rubinstein and Trevor Hastie",
  pages =        "49",
  crossref =     "heckerman.ea:proceedings-third:97",
}

SE-Trees Outperform Decision Trees in Noisy Domains, Ron Rymon
@InProceedings{rymon:se-trees-outperform:96,
  title =        "{SE}-Trees Outperform Decision Trees in Noisy
                 Domains",
  pages =        "331",
  author =       "Ron Rymon",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Deriving Queries From Examples Using Genetic Programming, Tae-Wan Ryu and Christoph F. Eick
Available as
postscript.
@InProceedings{ryu.ea:deriving-queries:96,
  author =       "Tae-Wan Ryu and Christoph F. Eick",
  title =        "Deriving Queries From Examples Using Genetic
                 Programming",
  booktitle =    "The Second International Conference on Knowledge
                 Discovery and Data Mining (KDD-96)",
  editor =       "Evangelos Simoudis and Jia Wei Han and Usama Fayyad",
  year =         "1996",
  month =        aug # " 2-4",
  keywords =     "Genetic Programming, Genetic Algorithms, MASSON",
  pages =        "303",
  address =      "Portland, Oregon, USA",
  publisher =    "AAAI",
  URL =          "http://www.cs.uh.edu/~twryu/papers/kdd96.ps",
  size =         "14 pages",
  abstract =     "This paper centers on the problem of extracting
                 intensional information for a set of objects from an
                 object-oriented database. In our approach, the
                 extracted intensional information for the given set of
                 objects are described by object- oriented queries that
                 compute this set of objects. The paper discusses the
                 architecture of a knowledge discovery system, called
                 MASSON, which employs genetic programming to find such
                 queries, moreover, we will show how interesting queries
                 that describe commonalities within a set of objects are
                 automatically generated, modified, evaluated, and
                 selected; we will also discuss how the search for the
                 _best_ query is conducted by the MASSON system. We also
                 report on an experiment that evaluated the knowledge
                 discovery capability of MASSON.",
  annote =       "KDD-96
                 http://www.aaai.org:80/Press/Proceedings/KDD/1996/kdd-96.html",
  affiliation =  "University of Houston",
  crossref =     "simoudis.ea:proceedings-second:96",
}

MASSON: Discovering Commonalties in Collection of Objects using Genetic Programming, Tae-Wan Ryu and Christoph F. Eick
Available as
postscript.
@InProceedings{ryu.ea:masson--discovering:96,
  author =       "Tae-Wan Ryu and Christoph F. Eick",
  title =        "{MASSON:} Discovering Commonalties in Collection of
                 Objects using Genetic Programming",
  booktitle =    "Genetic Programming 1996: Proceedings of the First
                 Annual Conference",
  editor =       "John R. Koza and David E. Goldberg and David B. Fogel
                 and Rick L. Riolo",
  year =         "1996",
  month =        "28--31 " # jul,
  keywords =     "Genetic Programming, Genetic Algorithms",
  pages =        "200--208",
  address =      "Stanford University, CA, USA",
  publisher =    "MIT Press",
  URL =          "http://www.cs.uh.edu/~twryu/papers/gp96.ps",
  size =         "9 pages",
  abstract =     "For the current flood of data, automatic tools for
                 searching or analyzing data are necessary, especially
                 for complex databases. Accordingly, knowledge discovery
                 in databases is getting more and more attention. This
                 paper centers on the problem of discovering the common
                 characteristics that are shared by a set of objects
                 belonging to an object-oriented database. In our
                 approach, commonalities within a set of objects are
                 described by object-oriented queries that compute this
                 set of objects. The paper discusses the architecture of
                 a knowledge discovery system, called MASSON, which
                 employs genetic programming to find such queries, and
                 presents an example run of the system to illustrate how
                 the system works; we will show how interesting queries
                 that describe commonalities within a set of objects are
                 automatically generated, modified, evaluated, and
                 selected; we will also discuss how the search for the
                 _best_ query is conducted by the MASSON system.
                 Specific problems such as the generation of constants
                 in queries, how to cope with type violations and other
                 constraints when creating object-oriented queries, and
                 query evaluation are discussed in some detail.",
}

Learning Limited Dependence Bayesian Classifiers, Mehran Sahami
@InProceedings{sahami:learning-limited:96,
  title =        "Learning Limited Dependence Bayesian Classifiers",
  pages =        "335",
  author =       "Mehran Sahami",
  crossref =     "simoudis.ea:proceedings-second:96",
}

On Comparing Classifiers: A Critique of Current Research and Methods, Steven Salzberg
Available as
postscript.
@Article{salzberg:on-comparing:,
  URL =          "http://www.cs.jhu.edu/salzberg/critique.ps",
  title =        "On Comparing Classifiers: {A} Critique of Current
                 Research and Methods",
  author =       "Steven Salzberg",
  abstract =     "Experimental machine learning research needs to
                 scrutinize its approach to experimental design. If not
                 done very carefully, comparative studies of
                 classification algorithms can easily result in
                 statistically invalid conclusions. This paper describes
                 several phenomena that can, if ignored, invalidate an
                 experimental comparison. It also divides machine
                 learning research into several different types, and
                 discusses why comparative analysis is more important
                 for some than for others.",
  annote =       "homepage with decision tree papers is at:
                 http://www.cs.jhu.edu/salzberg/home.html",
}

Learning concepts by asking questions, Claude Sammut and Ranan B. Banerji
@InCollection{sammut.ea:learning-concepts:86,
  author =       "Claude Sammut and Ranan B. Banerji",
  title =        "Learning concepts by asking questions",
  crossref =     "michalski.ea:machine-learning:86",
  pages =        "167--191",
}

Knowledge Discovery in Temporal Databases: The Initial Step, Mohamed H. Saraee and Babis Theodoulidis
@InProceedings{saraee.ea:temporal-initial:95,
  author =       "Mohamed H. Saraee and Babis Theodoulidis",
  title =        "Knowledge Discovery in Temporal Databases: The Initial
                 Step",
  booktitle =    "Knowledge Discovery Workshop of the International
                 Conference on Deductive and Object Oriented Databases
                 Workshop (DOOD)",
  address =      "Singapore",
  month =        dec,
  year =         "1995",
}

Data mining and forecasting in large-scale telecommunication networks, R. Sasisekharan and V. Seshadri and S. M. Weiss
@Article{sasisekharan.ea:forecasting-large-scale:96,
  author =       "R. Sasisekharan and V. Seshadri and S. M. Weiss",
  address =      "At\&T Bell Labs, Tech Staff, Middletown, Nj, 07748
                 Rutgers State Univ, Dept Comp Sci, New Brunswick, Nj,
                 08903",
  title =        "Data mining and forecasting in large-scale
                 telecommunication networks",
  journal =      "Ieee Expert-Intelligent Systems \& Their
                 Applications",
  year =         "1996",
  volume =       "11",
  issue =        "1",
  pages =        "37--43",
}

Bottom-up induction of functional dependencies from relations, I. Savnik and P. A. Flach
Available as
compressed postscript.
@InProceedings{savnik.ea:bottom-up-induction:93,
  author =       "I. Savnik and P. A. Flach",
  booktitle =    "Proc. of AAAI-93 Workshop: Knowledge Discovery in
                 Databases",
  title =        "Bottom-up induction of functional dependencies from
                 relations",
  year =         "1993",
  URL =          "ftp://martin.ijs.si/pub/CSD/Reports/CSD-TR-93-3.ps.gz",
  editor =       "G. Piatetsky-Shapiro",
  keywords =     "Functional dependency, Knowledge Discovery,
                 Databases",
  month =        jul,
  pages =        "174--185",
}

Finding Latent Variable Models in Large Databases, Richard Scheines and Peter Spirtes
@Article{scheines.ea:finding-latent:92,
  crossref =     "ijis-special-issue:92",
  author =       "Richard Scheines and Peter Spirtes",
  title =        "Finding Latent Variable Models in Large Databases",
  pages =        "609--621",
}

CoverStory- Automated news finding in marketing, J. Armstrong Schmitz and Little. J. D. C.
@Article{schmitz.ea:coverstory--automated:90,
  author =       "J. Armstrong Schmitz and Little. J. D. C.",
  title =        "CoverStory- Automated news finding in marketing",
  journal =      "Decision Support Systems Transaction",
  year =         "1990",
  page =         "46--54",
  keywords =     "marketing, sales data, cranberry, ocean spray",
}

Extracting support data for a given task,
@InCollection{scholkopf.ea:extracting-support:95,
  author =       "B. Sch{\"o}lkopf and C. Burges and V. Vapnik",
  title =        "Extracting support data for a given task",
  booktitle =    "Proceedings, First International Conference on
                 Knowledge Discovery and Data Mining",
  publisher =    "AAAI Press",
  address =      "Menlo Park, CA",
  editor =       "U. M. Fayyad and R. Uthurusamy",
  year =         "1995",
}

Book Review of Computer Systems That Learn., Alberto Serge and Geoffrey Gordon
@Article{serge.ea:book-review:93,
  author =       "Alberto Serge and Geoffrey Gordon",
  title =        "Book Review of Computer Systems That Learn.",
  journal =      "Artificial Intelligence",
  year =         "1993",
  number =       "62",
  pages =        "363--378",
  annote =       "Review / overview of Computer Systems That Learn by
                 Sholom M. Weiss and Casimir A. Kulikowski. Identifies
                 dimensions of classification of learning methods. Looks
                 at Neural Nets, Statistical Methods and Machine
                 Learning approaches (ID3, CART, C4). 17 References.",
}

Using single layered neural networks for the extraction of conjunctive rules and hierarchical classifications, Sabrina Sestito and Tharam Dillon
@Article{sestito.ea:using-single:91,
  author =       "Sabrina Sestito and Tharam Dillon",
  title =        "Using single layered neural networks for the
                 extraction of conjunctive rules and hierarchical
                 classifications",
  journal =      "Journal of Applied Intelligence",
  year =         "1991",
  pages =        "157--173",
  volume =       "1",
}

Advances in knowledge acquisition: 9th European Knowledge Acquisition Workshop, EKAW '96, Nottingham, United Kingdom, May 14--17, 1996: proceedings, Nigel Shadbolt and Kieron O'Hara and Guus Schreiber (Eds)
@Proceedings{shadbolt.ea:advances-acquisition:96,
  editor =       "Nigel Shadbolt and Kieron O'Hara and Guus Schreiber",
  booktitle =    "Advances in knowledge acquisition: 9th European
                 Knowledge Acquisition Workshop, {EKAW} '96, Nottingham,
                 United Kingdom, May 14--17, 1996: proceedings",
  title =        "Advances in knowledge acquisition: 9th European
                 Knowledge Acquisition Workshop, {EKAW} '96, Nottingham,
                 United Kingdom, May 14--17, 1996: proceedings",
  volume =       "1076",
  publisher =    "Springer-Verlag Inc.",
  address =      "New York, NY, USA",
  pages =        "xii + 369",
  year =         "1996",
  ISBN =         "3-540-61273-4 (softcover)",
  ISSN =         "0302-9743",
  LCCN =         "QA76.73.E95 E92 1996",
  series =       "Lecture Notes in Artificial Intelligence and Lecture
                 Notes in Computer Science",
  annote =       "Assumptions of problem-solving methods / Richard
                 Benjamins and Christine Pierret-Golbreich --
                 Problem-solving methods: making assumptions for
                 efficiency reasons / Dieter Fensel and Remco Straatman
                 -- The thin end of the wedge: efficiency and the
                 generalised directive model methodology / Kieron O'Hara
                 and Nigel Shadbolt -- Principles for libraries of task
                 decomposition methods: conclusions from a case-study /
                 Klas Orsvarn -- A purpose driven method for language
                 comparison / The REVISE Project -- A conceptual and
                 formal model of a diagnostic reasoner / Richard
                 Benjamins and Manfred Aben -- Ontology construction for
                 technical domains / Jan Benjamin \ldots{} [et al.] --
                 Text clustering to help knowledge acquisition from
                 documents / Stephane Lapalut -- A quality-based
                 terminological reasoning model for text knowledge
                 acquisitions / Udo Hahn, Manfred Klenner and Klemens
                 Schnattinger -- Extracting conceptual knowledge from
                 text using explicit relation markers / Paul R. Bowden,
                 Peter Halstead and Tony G. Rose -- Structuring
                 information in a distributed hypermedia system / Celia
                 Ghedini Ralha -- Diagrammatic knowledge acquisition:
                 elicitation, analysis and issues / Peter C.-H. Cheng --
                 An approach to measuring theory quality / Edgar Sommer
                 -- Some late- breaking news from the data mines and a
                 preview of the KOALA system: a prospector's report /
                 Franz Schmalhofer and Christoph Kozieja -- A knowledge
                 acquisition tool for multi-perspective concept
                 formation / Joao Jose Furtado Vasco, Colette Faucher
                 and Eugene Chouraqui -- Knowledge discovery in
                 databases: exploiting knowledge-level redescription /
                 James Cupit and Nigel Shadbolt -- Towards painless
                 knowledge acquisition / Derek Sleeman and Fraser
                 Mitchell -- The acquisition of a shared task model /
                 Frances Brazier, Jan Treur and Niek Wijngaards -- The
                 group elicitation method: an introduction / Guy Boy --
                 Formalising the repair of schedules through knowledge
                 acquisition / Janet Efstathiou -- Intelligent tools for
                 planning knowledge base development and verification /
                 Steve A. Chien -- Configuring service recovery planning
                 with the CommonKADS library / V. Arlanzon, A. Bernaras
                 and I. Laresgoiti -- Domain and system influences in
                 problem solving models for planning / Hugh Cottam and
                 Nigel Shadbolt.",
  keywords =     "Knowledge acquisition (Expert systems) --
                 Congresses.",
}

Data-based acquisition and incremental modification of classification rules, N. Shan and W. Ziarko
@Article{shan.ea:data-based-acquisition:95,
  author =       "N. Shan and W. Ziarko",
  address =      "Univ Regina, Dept Comp Sci, Regina, Sk S4S 0A2,
                 Canada",
  title =        "Data-based acquisition and incremental modification of
                 classification rules",
  journal =      "Computational Intelligence",
  year =         "1995",
  volume =       "11",
  issue =        "2",
  pages =        "357--370",
  abstract =     "One of the most important problems in the application
                 of knowledge discovery systems is the identification
                 and subsequent updating of rules. Many applications
                 require that the classification rules be derived from
                 data representing exemplar occurrences of data patterns
                 belonging to different classes. The problem of
                 identifying such rules in data has been researched
                 within the field of machine learning, and more recently
                 in the context of rough set theory and knowledge
                 discovery in databases. In this paper we present an
                 incremental methodology for finding all maximally
                 generalized rules and for adaptive modification of them
                 when new data become available. The methodology is
                 developed in the context of rough set theory and is
                 based on the earlier idea of discernibility matrix
                 introduced by Skowron.",
  keywords =     "ROUGH SETS, DECISION RULES, KNOWLEDGE DISCOVERY,
                 MACHINE LEARNING, INCREMENTAL LEARNING, ADAPTIVE
                 SYSTEMS",
}

Discovering Classification Knowledge in Databases Using Rough Sets, Ning Shan and Wojciech Ziarko and Howard J. Hamilton and Nick Cercone
@InProceedings{shan.ea:discovering-classification:96,
  title =        "Discovering Classification Knowledge in Databases
                 Using Rough Sets",
  pages =        "271",
  author =       "Ning Shan and Wojciech Ziarko and Howard J. Hamilton
                 and Nick Cercone",
  crossref =     "simoudis.ea:proceedings-second:96",
}

The mathematical theory of communication, Claude E. Shannon and Warren Weaver
@Book{shannon.ea:mathematical-theory:49,
  author =       "Claude E. Shannon and Warren Weaver",
  title =        "The mathematical theory of communication",
  publisher =    "University of Illinois Press",
  year =         "1949",
  annote =       "Book by the father of information theory.",
}

Encyclopedia of artificial intelligence, Stuart C. Shapiro (Ed)
@Book{shapiro:encyclopedia-artificial:92,
  editor =       "Stuart C. Shapiro",
  title =        "Encyclopedia of artificial intelligence",
  publisher =    "Wiley",
  year =         "1992",
}

Combining Explanation-based and Neural Learning: An algorithm and Emperical Results, Jude W. Shavlik and Geoffrey G. Towell
@TechReport{shavlik.ea:combining-explanation-based:89,
  author =       "Jude W. Shavlik and Geoffrey G. Towell",
  title =        "Combining Explanation-based and Neural Learning: An
                 algorithm and Emperical Results",
  institution =  "University of Wisconsin",
  number =       "859",
  month =        jun,
  year =         "1989",
}

Symbolic and Neural Learning Algorithms: An Experimental Comparison (Revised), Jude W. Shavlik and Raymond J. Mooney and Geoffrey G. Towell
@TechReport{shavlik.ea:symbolic-neural:90,
  author =       "Jude W. Shavlik and Raymond J. Mooney and Geoffrey G.
                 Towell",
  title =        "Symbolic and Neural Learning Algorithms: An
                 Experimental Comparison (Revised)",
  institution =  "Department of Computer Sciences, University of
                 Wisconsin",
  number =       "Technical Report No. 955 (August 1990)",
  year =         "1990",
  keywords =     "Empirical Learning, Connectionism, Neural Networks,
                 Inductive Learning, ID3, Perceptron, Backpropagation",
  annote =       "To Appear in Machine Learning, Volume 6, 1991.
                 Comparison of ID3, Backprop and Perceptron on 5 large,
                 real-world data sets.",
}

Scalable Exploratory Data Mining of Distributed Geoscientific Data, Eddie C. Shek and Richard R. Muntz and Edmond Mesrobian and Kenneth Ng
@InProceedings{shek.ea:scalable-exploratory:96,
  title =        "Scalable Exploratory Data Mining of Distributed
                 Geoscientific Data",
  pages =        "32",
  author =       "Eddie C. Shek and Richard R. Muntz and Edmond
                 Mesrobian and Kenneth Ng",
  crossref =     "simoudis.ea:proceedings-second:96",
}

A metapattern-based automated discovery loop for integrated data mining - unsupervised learning of relational patterns, W. M. Shen and B. Leng
@Article{shen.ea:metapattern-based-automated:96,
  author =       "W. M. Shen and B. Leng",
  address =      "Univ So Calif, Inst Informat Sci, 4676 Admiralty Way,
                 Marina Del Rey, Ca, 90292 Univ So Calif, Dept Comp Sci,
                 Marina Del Rey, Ca, 90292 Inference Corp, Chicago, Il,
                 60631",
  title =        "A metapattern-based automated discovery loop for
                 integrated data mining - unsupervised learning of
                 relational patterns",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1996",
  month =        dec,
  volume =       "8",
  issue =        "6",
  pages =        "898--910",
  abstract =     "Metapattern (also known as metaquery) is a new
                 approach for integrated data mining systems. Different
                 from a typical ''tool-box'' like integration, where
                 components must be picked and chosen by users without
                 much help, metapatterns provide a common representation
                 for intercomponent communication as well as a human
                 interface for hypothesis development and search
                 control. One weakness of this approach, however, is
                 that the task of generating fruitful metapatterns is
                 still a heavy burden for human users. In this paper, we
                 describe a metapattern generator and an integrated
                 discovery loop that can automatically generate
                 metapatterns. Experiments in both artificial and
                 real-world databases have shown that this new system
                 goes beyond the existing machine learning technologies,
                 and can discover relational patterns without requiring
                 humans to prelabel the data as positive or negative
                 examples for some given target concepts. With this
                 technology, future data mining systems could discover
                 high- quality, human comprehensible knowledge in a much
                 more efficient and focused manner, and data mining
                 could be managed easily by both expert and less expert
                 users.",
  keywords =     "induction, deduction, human interaction, integration,
                 unsupervised learning, relational concepts, metaquery,
                 metapattern",
}

Metapattern Generation for Integrated Data Mining, Wei-Min Shen and Bing Leng
@InProceedings{shen.ea:metapattern-generation:96,
  title =        "Metapattern Generation for Integrated Data Mining",
  pages =        "152",
  author =       "Wei-Min Shen and Bing Leng",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Complementary Discrimination Learning with Decision Lists, W. M. Shen
@InProceedings{shen:complementary-discrimination:92,
  author =       "W. M. Shen",
  title =        "Complementary Discrimination Learning with Decision
                 Lists",
  year =         "1992",
  booktitle =    "Proc.\ of AAAI-92",
  pages =        "153--158",
}

Discovering Regularities from Knowledge Bases, Wei-Min Shen
@Article{shen:discovering-regularities:92,
  crossref =     "ijis-special-issue:92",
  author =       "Wei-Min Shen",
  title =        "Discovering Regularities from Knowledge Bases",
  pages =        "623--635",
}

A survey of techniques for inference under uncertainty, F. K. J. Sheridan
@Article{sheridan:survey-techniques:91,
  author =       "F. K. J. Sheridan",
  title =        "A survey of techniques for inference under
                 uncertainty",
  journal =      "Artificial Intelligence Review",
  year =         "1991",
  pages =        "89--119",
  volume =       "5",
}

Trend Recognition with Time Series Database, Hiromitsu Shimakawa and Kouji Kikkawa
@InProceedings{shimakawa.ea:trend-recognition:,
  author =       "Hiromitsu Shimakawa and Kouji Kikkawa",
  title =        "Trend Recognition with Time Series Database",
  booktitle =    "FUTURE DATABASES 92",
  volume =       "3",
  publisher =    "WORLD SCIENTIFIC PUBL CO PTE LTD,SINGAPORE",
  chapter =      "46",
  pages =        "373--383",
  annote =       "2ND FAR-EAST WORKSHOP ON FUTURE DATABASE SYSTEMS
                 KYOTO,JAPAN D920426-28",
}

Evaluating the Interestingness of Characteristic Rules, Micheline Kamberand Rajjan Shinghal
@InProceedings{shinghal:evaluating-interestingness:96,
  title =        "Evaluating the Interestingness of Characteristic
                 Rules",
  pages =        "263",
  author =       "Micheline Kamberand Rajjan Shinghal",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Data mining applications in bt, R. Shortland and R. Scarfe
@Article{shortland.ea:applications-bt:94,
  author =       "R. Shortland and R. Scarfe",
  address =      "British Telecommun Labs, Martlesham Heath, Ipswich 1P5
                 7Re, Suffolk, England",
  title =        "Data mining applications in bt",
  journal =      "Bt Technology J.",
  year =         "1994",
  volume =       "12",
  issue =        "4",
  pages =        "17--22",
  abstract =     "With the increased use of computers there is an ever
                 increasing volume of data being generated and stored.
                 This can lead to companies becoming 'data rich and
                 information poor'. This paper describes how BT has used
                 data mining techniques to convert volume data into
                 high- value information which can be used to aid
                 decision making in a number of key business processes.
                 The benefit of actively using data, as opposed to
                 passively storing it, is demonstrated via a number of
                 case studies which cover areas as diverse as fault
                 diagnosis, fraud detection, market segmentation, credit
                 vetting and litigation assessment.",
}

KESO: Minimizing Database Interaction, Arno Siebes and Martin L. Kersten
@InProceedings{siebes.ea:keso-minimizing:97,
  title =        "{KESO}: Minimizing Database Interaction",
  author =       "Arno Siebes and Martin L. Kersten",
  pages =        "247",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Homogeneous Discoveries Contain no Surprises: Inferring Risk-profiles from Large Databases, Arno Siebes
Available as
compressed postscript.
@TechReport{siebes:homogeneous-discoveries:,
  URL =          "ftp://ftp.cwi.nl/pub/CWIreports/AA/CS-R9430.ps.Z",
  title =        "Homogeneous Discoveries Contain no Surprises:
                 Inferring Risk-profiles from Large Databases",
  author =       "Arno Siebes",
  abstract =     "Many models of reality are probabilistic. For example,
                 not everyone orders crisps with their beer, but a
                 certain percentage does. Inferring such probabilistic
                 knowledge from databases is one of the major challenges
                 for data mining.

Recently Agrawal et al. investigated a class of such problems. In this paper a new class of such problems is investigated, viz., inferring risk-profiles. The proto-typical example of this class is: ``what is the probability that a given policy-holder will file a claim with the insurance company in the next year''. A risk-profile is then a description of a group of insurants that have the same probability for filing a claim.

It is shown in this paper that homogeneous descriptions are the most plausible risk-profiles. Moreover, under modest assumptions it is shown that covers of such homogeneous descriptions are essentially unique. A direct consequence of this result is that it suffices to search for the homogeneous description with the highest associated probability.

The main result of this paper is thus that we show that the inference problem for risk-profiles reduces to the well studied problem of maximising a quality function.", annote = "CR subject classification (1991): Computer based methods in probability and statistics (G.3), Database applications (H.2.8), Information search and retrieval (H.3.3) clustering, search process, Learning (I.2.6) concept learning, induction, knowledge acquisition", keywords = "Data Mining, Probabilistic Knowledge, Probabilistic Search, Probability Theory", }

What makes patterns interesting in knowledge discovery systems, A. Silberschatz and A. Tuzhilin
@Article{silberschatz.ea:what-makes:96,
  author =       "A. Silberschatz and A. Tuzhilin",
  address =      "At\&T Bell Labs, Lucent Technol, 600 Mt Ave, Murray
                 Hill, Nj, 07974 Nyu, Stern Sch Business, Dept Informat
                 Syst, New York, Ny, 10012",
  title =        "What makes patterns interesting in knowledge discovery
                 systems",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1996",
  volume =       "8",
  issue =        "6",
  pages =        "970--974",
  abstract =     "One of the central problems in the field of knowledge
                 discovery is the development of good measures of
                 interestingness of discovered patterns. Such measures
                 of interestingness are divided into objective
                 measures-those that depend only on the structure of a
                 pattern and the underlying data used in the discovery
                 process, and the subjective measures-those that also
                 depend on the class of users who examine the pattern.
                 The focus of this paper is on studying subjective
                 measures of interestingness. These measures are
                 classified into actionable and unexpected, and the
                 relationship between them is examined. The unexpected
                 measure of interestingness is defined in terms of the
                 belief system that the user has. Interestingness of a
                 pattern is expressed in terms of how it affects the
                 belief system. The paper also discusses how this
                 unexpected measure of interestingness can be used in
                 the discovery process.",
  keywords =     "measures of interestingness, patterns, actionability,
                 unexpectedness, belief systems",
}

Proceedings of the Second International Conference on Knowledge Discovery and Data Mining (KDD-96), Evangelos Simoudis and Jia Wei Han and Usama Fayyad (Eds)
@Proceedings{simoudis.ea:proceedings-second:96,
  title =        "Proceedings of the Second International Conference on
                 Knowledge Discovery and Data Mining ({KDD}-96)",
  year =         "1996",
  editor =       "Evangelos Simoudis and Jia Wei Han and Usama Fayyad",
  publisher =    "AAAI Press",
}

Reality check for data mining, E. Simoudis
@Article{simoudis:reality-check:96,
  author =       "E. Simoudis",
  address =      "Ibm Corp, Almaden Res Ctr, 650 Harry Rd, San Jose, Ca,
                 95120",
  title =        "Reality check for data mining",
  journal =      "Ieee Expert-Intelligent Systems \& Their
                 Applications",
  year =         "1996",
  volume =       "11",
  issue =        "5",
  pages =        "26--33",
}

Extracting laws from decision tables - a rough set approach, A. Skowron
@Article{skowron:extracting-laws:95,
  author =       "A. Skowron",
  address =      "Warsaw Univ, Inst Math, Banacha 2, Pl-02097 Warsaw,
                 Poland",
  title =        "Extracting laws from decision tables - a rough set
                 approach",
  journal =      "Computational Intelligence",
  year =         "1995",
  volume =       "11",
  issue =        "2",
  pages =        "371--388",
  abstract =     "We present some methods, based on the rough set and
                 Boolean reasoning approaches, for extracting laws from
                 decision tables. First we discuss several procedures
                 for decision rules synthesis from decision tables. Next
                 we show how to apply some near-to-functional relations
                 between data to data filtration. Two methods of
                 searching for new classifiers (features) are described:
                 searching for new classifiers in a given set of logical
                 formulas, and searching for some functions
                 approximating near-to-functional relations.",
  keywords =     "REASONING UNDER UNCERTAINTY, ROUGH SETS, KNOWLEDGE
                 DISCOVERY, MACHINE LEARNING",
}

Anytime Exploratory Data Analysis for Massive Data Sets, Padhraic Smyth and David Wolpert
@InProceedings{smyth.ea:anytime-exploratory:97,
  title =        "Anytime Exploratory Data Analysis for Massive Data
                 Sets",
  author =       "Padhraic Smyth and David Wolpert",
  pages =        "54",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Detecting Atmospheric Regimes Using Cross-Validated Clustering, Padhraic Smyth and Michael Ghil and Kayo Ide and Joe Roden and Andrew Fraser
@InProceedings{smyth.ea:detecting-atmospheric:97,
  title =        "Detecting Atmospheric Regimes Using Cross-Validated
                 Clustering",
  author =       "Padhraic Smyth and Michael Ghil and Kayo Ide and Joe
                 Roden and Andrew Fraser",
  pages =        "61",
  crossref =     "heckerman.ea:proceedings-third:97",
}

An information theoretic approach to rule induction from databases, P. Smyth and R. M. Goodman
@Article{smyth.ea:information-theoretic:92,
  author =       "P. Smyth and R. M. Goodman",
  address =      "Caltech, Jet Propuls Lab 238420, Commun Syst Res Sect,
                 Pasadena, Ca, 91109 Caltech, Dept Elect Engn, Pasadena,
                 Ca, 91125",
  title =        "An information theoretic approach to rule induction
                 from databases",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1992",
  volume =       "4",
  issue =        "4",
  pages =        "301--316",
  abstract =     "The knowledge acquisition bottleneck in obtaining
                 rules directly from an expert is well known. Hence, the
                 problem of automated rule acquisition from data is a
                 well-motivated one, particularly for domains where a
                 database of sample data exists. In this paper we
                 introduce a novel algorithm for the induction of rules
                 from examples. The algorithm is novel in the sense that
                 it not only learns rules for a given concept
                 (classification), but it simultaneously learns rules
                 relating multiple concepts. This type of learning,
                 known as generalized rule induction is considerably
                 more general than existing algorithms which tend to be
                 classification oriented. Initially we focus on the
                 problem of determining a quantitative, well-defined
                 rule preference measure. In particular, we propose a
                 quantity called the J-measure as an information
                 theoretic alternative to existing approaches. The
                 J-measure quantifies the information content of a rule
                 or a hypothesis. We will outline the information
                 theoretic origins of this measure and examine its
                 plausibility as a hypothesis preference measure. We
                 then define the ITRULE algorithm which uses the newly
                 proposed measure to learn a set of optimal rules from a
                 set of data samples, and we conclude the paper with an
                 analysis of experimental results on real-world data.",
  keywords =     "PRINCIPLE, INFERENCE, EXPERT, CROSS ENTROPY, EXPERT
                 SYSTEMS, INFORMATION THEORY, MACHINE LEARNING,
                 KNOWLEDGE ACQUISITION, KNOWLEDGE DISCOVERY, RULE-BASED
                 SYSTEMS, RULE INDUCTION",
  month =        aug,
  annote =       "Describes the ITRULE system which uses the J-measure
                 of average information content of a rule. Also develops
                 a bound on the J-measure which is used to stop
                 specialisation of a rule when no further improvement
                 can be made. Info on the application of ITRULE to
                 computer analysis of Bach can be found at
                 http://www.gold.net/online/archive/940929_Bach.html",
}

Clustering Using Monte Carlo Cross-Validation, Padhraic Smyth
@InProceedings{smyth:clustering-using:96,
  title =        "Clustering Using Monte Carlo Cross-Validation",
  pages =        "126",
  author =       "Padhraic Smyth",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Learning to Extract Text-Based Information from the World Wide Web, Stephen Soderland
@InProceedings{soderland:learning-to:97,
  title =        "Learning to Extract Text-Based Information from the
                 World Wide Web",
  author =       "Stephen Soderland",
  pages =        "251",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Neural and Intelligent Systems Integration: Fifth and Sixth Generation Integrated Reasoning Information Systems, Branko Soucek
@Book{soucek:neural-intelligent:91,
  author =       "Branko Soucek",
  title =        "Neural and Intelligent Systems Integration: Fifth and
                 Sixth Generation Integrated Reasoning Information
                 Systems",
  series =       "Sixth Generation Computer Technologies Series",
  pages =        "688",
  publisher =    "Wiley-Interscience",
  year =         "1991",
  keywords =     "book, text,",
  abstract =     "** Description ** Combines new techniques of software
                 automation, system adaptation, module selection,
                 self-organization and automated discovery. Presents
                 results from the IRIS Group--findings from American,
                 European, Korean and Japanese projects on this emerging
                 discipline. Explores methods of combining well-defined
                 intelligent modules for integration into intelligent
                 systems. Modules include intelligent algorithms and
                 programs, neural networks and computing elements, fuzzy
                 data comparators and correlators, spare distributed
                 memories, expert systems, intelligent databases,
                 associative and parallel processing units, and data
                 acquisition, control and robot units.\par ** Partial
                 Contents ** NEURAL, GENETIC, AND INTELLIGENT ALGORITHMS
                 AND COMPUTING ELEMENTS. From Modules to
                 Application-Oriented Integrated Systems (B. Soucek).
                 Neural Network Models of Concept Learning (P. Schyns).
                 Teaching Network Connections for Real-Time Object
                 Recognition (S. Wilson). Neural Networks on Parallel
                 Computers (H. Yoon, et al.). Neural Bit-Slice Computing
                 Element (J. Yestrebsky, et al.). INTEGRATED
                 NEURAL-KNOWLEDGE-FUZZY HYBRIDS. Fuzzy Data Comparator
                 with Neural Network Postprocessor: A Hardware
                 Implementation (P. Basehore, et al.). Injecting Symbol
                 Processing Into a Connectionist Model (S. Romaniuk \&
                 L. Hall). INTEGRATED REASONING, INFORMING, AND SERVING
                 SYSTEMS. An Advanced Software Paradigm for Intelligent
                 Systems Integration (T. Ichiko). Intelligent Data Base
                 and Automatic Discovery (K. Parsaye, et al.).
                 Index.\par ** Market ** Practicing Engineers and
                 Scientists, Students, Researchers.",
  note =         "I-0-471-53676-8 1991cloth \$89.95",
}

Using genetic algorithms for supervised concept learning, William M. Spears and Kenneth De Jong
@InProceedings{spears.ea:using-genetic:90,
  author =       "William M. Spears and Kenneth De Jong",
  title =        "Using genetic algorithms for supervised concept
                 learning",
  booktitle =    "Proceedings of tools for AI",
  organisation = "IEEE",
  year =         "1990",
}

Mining Association Rules with Item Constraints, Ramakrishnan Srikant and Quoc Vu and Rakesh Agrawal
@InProceedings{srikant.ea:association-rules:97,
  title =        "Mining Association Rules with Item Constraints",
  author =       "Ramakrishnan Srikant and Quoc Vu and Rakesh Agrawal",
  pages =        "67",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Mining Quantitative Association Rules in Large Relational Tables, Ramakrishnan Srikant and Rakesh Agrawal
@InProceedings{srikant.ea:quantitative-association:96,
  title =        "Mining Quantitative Association Rules in Large
                 Relational Tables",
  author =       "Ramakrishnan Srikant and Rakesh Agrawal",
  editor =       "H. V. Jagadish and Inderpal Singh Mumick",
  booktitle =    "Proceedings of the 1996 {ACM} {SIGMOD} International
                 Conference on Management of Data",
  address =      "Montreal, Quebec, Canada",
  month =        "4--6~" # jun,
  year =         "1996",
  pages =        "1--12",
}

JAM: Java Agents for Meta-Learning over Distributed Databases, Salvatore Stolfo and Andreas L. Prodromidis and Shelley Tselepis and Wenke Lee and Dave W. Fan and Philip K. Chan
@InProceedings{stolfo.ea:jam-java:97,
  title =        "{JAM}: Java Agents for Meta-Learning over Distributed
                 Databases",
  author =       "Salvatore Stolfo and Andreas L. Prodromidis and
                 Shelley Tselepis and Wenke Lee and Dave W. Fan and
                 Philip K. Chan",
  pages =        "74",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Harnessing Graphical Structure in Markov Chain Monte Carlo Learning, Paul E. Stolorz and Philip C. Chew
@InProceedings{stolorz.ea:harnessing-graphical:96,
  title =        "Harnessing Graphical Structure in Markov Chain Monte
                 Carlo Learning",
  pages =        "134",
  author =       "Paul E. Stolorz and Philip C. Chew",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Quakefinder: A Scalable Data Mining System for Detecting Earthquakes from Space, Paul Stolorz and Christopher Dean
@InProceedings{stolorz.ea:quakefinder-scalable:96,
  title =        "Quakefinder: {A} Scalable Data Mining System for
                 Detecting Earthquakes from Space",
  pages =        "208",
  author =       "Paul Stolorz and Christopher Dean",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Fast Spatio-Temporal Data Mining of Large Geophysical Datasets, P. Stolorz
@InProceedings{stolorz:fast-spatio-temporal-data-mining-of-large-geophysical-datasets:95,
  author =       "P. Stolorz",
  title =        "{Fast Spatio-Temporal Data Mining of Large Geophysical
                 Datasets}",
  booktitle =    "Proceedings of the First International Conference on
                 Knowledge Discovery and Data Mining (KDD)",
  year =         "1995",
  address =      "Montreal, Canada",
  month =        aug,
  publisher =    "AAAI Press",
  editor =       "U. M. Fayyad and R. Uthurusamy",
}

Image Feature Reduction through Spoiling: Its Application to Multiple Matched Filters for Focus of Attention, Timothy M. Stough and Carla E. Brodley
@InProceedings{stough.ea:image-feature:97,
  title =        "Image Feature Reduction through Spoiling: Its
                 Application to Multiple Matched Filters for Focus of
                 Attention",
  author =       "Timothy M. Stough and Carla E. Brodley",
  pages =        "255",
  crossref =     "heckerman.ea:proceedings-third:97",
}

A Visual Interactive Framework for Attribute Discretization, Ramesh Subramonian and Ramana Venkata and Joyce Chen
@InProceedings{subramonian.ea:visual-interactive:97,
  title =        "A Visual Interactive Framework for Attribute
                 Discretization",
  author =       "Ramesh Subramonian and Ramana Venkata and Joyce Chen",
  pages =        "82",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Exceptional Knowledge Discovery in Databases Based on Information Theory, Einoshin Suzuki and Masamichi Shimura
@InProceedings{suzuki.ea:exceptional-based:96,
  title =        "Exceptional Knowledge Discovery in Databases Based on
                 Information Theory",
  pages =        "275",
  author =       "Einoshin Suzuki and Masamichi Shimura",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Autonomous Discovery of Reliable Exception Rules, Einoshin Suzuki
@InProceedings{suzuki:autonomous-reliable:97,
  title =        "Autonomous Discovery of Reliable Exception Rules",
  author =       "Einoshin Suzuki",
  pages =        "259",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Data Mining with Silicon Graphics Technology, Arun Swami
Available as
hypertext.
@Misc{swami:with-silicon:,
  title =        "Data Mining with Silicon Graphics Technology",
  author =       "Arun Swami",
  URL =          "http://www-europe.sgi.com/Technology/data-mining.html",
  howpublished = "www publication",
}

Undiscovered Public Knowledge: A Ten-Year Update, Don R. Swanson and Neil R. Smalheiser
@InProceedings{swanson.ea:undiscovered-public:96,
  title =        "Undiscovered Public Knowledge: {A} Ten-Year Update",
  pages =        "295",
  author =       "Don R. Swanson and Neil R. Smalheiser",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Uncertainty and decisions in medical informatics, P. Szolovits
@Article{szolovits:uncertainty-decisions:95,
  author =       "P. Szolovits",
  address =      "Mit, Comp Sci Lab, 545 Technol Sq, Cambridge, Ma,
                 02139",
  title =        "Uncertainty and decisions in medical informatics",
  journal =      "Methods Of Information In Medicine",
  year =         "1995",
  volume =       "34",
  issue =        "1-2",
  pages =        "111--121",
  abstract =     "This paper presents a tutorial introduction to the
                 handling of uncertainty and decision-making in medical
                 reasoning systems. It focuses on the central role of
                 uncertainty in all of medicine and identifies the major
                 themes that arise in research papers. It then reviews
                 simple Bayesian formulations of the problem and pursues
                 the generalization to the Bayesian network methods that
                 are popular today. Decision making is presented from
                 the decision analysis viewpoint, with brief mention of
                 recently-developed methods. The paper concludes with
                 review of more abstract characterization of
                 uncertainty, and anticipates the growing importance of
                 analytic and ''data mining'' techniques as growing
                 amounts of clinical data become widely available.",
  keywords =     "COMPUTER, NETWORKS, MODEL, DECISION SUPPORT,
                 UNCERTAINTY, BAYES, GRAPH MODELS, DECISION TREES,
                 INFLUENCE DIAGRAMS",
}

Visualization techniques for data mining, G. D. Tattersall and P. R. Limb
@Article{tattersall.ea:techniques:94,
  author =       "G. D. Tattersall and P. R. Limb",
  address =      "British Telecommun Labs, Ipswich 1P5 7Re, Suffolk,
                 England",
  title =        "Visualization techniques for data mining",
  journal =      "Bt Technology J.",
  year =         "1994",
  volume =       "12",
  issue =        "4",
  pages =        "23--31",
  abstract =     "BT collects and stores large quantities of data from a
                 variety of sources. These large data sets typically
                 describe different states of a system and are difficult
                 to interpret because there is no obvious way of
                 abstracting and presenting data features in a
                 meaningful way for a human observer. Data mining is a
                 term which has recently become popular to describe
                 techniques for the exploration and exploitation of
                 data. In particular, a large part of data mining
                 involves the visualisation of data and subsequent
                 utilisation of machine- learning techniques for
                 classification of data. This paper describes some
                 techniques for data visualisation which enable the user
                 to enhance understanding of the structure and
                 properties of (often multidimensional) data prior to
                 applying machine-learning techniques for further
                 analysis and exploration.",
}

Program Evolution for Data Mining, Astro Teller and Manuela Veloso
Available as
postscript.
@Article{teller.ea:program-evolution:95,
  author =       "Astro Teller and Manuela Veloso",
  title =        "Program Evolution for Data Mining",
  editor =       "Sushil Louis",
  publisher =    "JAI Press",
  journal =      "The International Journal of Expert Systems",
  year =         "1995",
  volume =       "8",
  number =       "3",
  pages =        "216--236",
  keywords =     "genetic algorithms, genetic programming, memory",
  URL =          "http://www.cs.cmu.edu/afs/cs/usr/astro/public/papers/Astro-ESJ.ps",
  url_2 =        "ftp://cs.ucl.ac.uk/genetic/papers/Astro-ESJ.ps.Z",
  abstract =     "Around the world there are innumerable databases of
                 information. The quantity of information available has
                 created a high demand for automatic methods for
                 searching these databases and extracting specific kinds
                 of information. Unfortunately, the information in these
                 databases increasingly contains signals that have no
                 corresponding classification symbols. Examples include
                 databases of images, sounds, etc. A few systems have
                 been written to help solve these search and retrieve
                 issues. But we can not write a new system for every
                 kind of signal we want to recognize and extract. Some
                 work has been done on automating (i.e. learning) the
                 task of identifying desired signal elements. It would
                 be useful to automate (learn) not just a part of the
                 classification function, but the entire signal
                 identification program. It would be helpful if we could
                 use the same learning architecture to automatically
                 create these programs for distinguishing many different
                 classes of the same signal type. It would be better
                 still if we could use the same learning architecture to
                 create these programs even for signal types as
                 different as images and sound waves. We introduce PADO
                 (Parallel Architecture Discovery and Orchestration), a
                 learning architecture designed to deliver this. PADO
                 has at its core a variant of genetic programming (GP)
                 that extends the paradigm to explore the space of
                 algorithms. PADO learns the entire classification
                 algorithm for an arbitrary signal type with arbitrary
                 signal class distinctions. This architecture has been
                 designed specifically for signal understanding and
                 classification. The architecture of PADO and its
                 achievements on the recovery of visual and acoustic
                 signal classes from test databases are the subjects of
                 this article.",
  note =         "Third Quarter. Special Issue on Genetic Algorithms and
                 Knowledge Bases.",
}

Interactive Knowledge Discovery from Marketing Questionnaire Using Simulated Breeding and Inductive Learning Methods, Takao Terano and Yoko Ishino
@InProceedings{terano.ea:interactive-marketing:96,
  title =        "Interactive Knowledge Discovery from Marketing
                 Questionnaire Using Simulated Breeding and Inductive
                 Learning Methods",
  pages =        "279",
  author =       "Takao Terano and Yoko Ishino",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Data Mining \& Visualization: It's All In the Interaction, Kurt Thearling
Available as
dmviz.shtml.
@InProceedings{thearling:its-all:97,
  author =       "Kurt Thearling",
  title =        "Data Mining \& Visualization: It's All In the
                 Interaction",
  booktitle =    "Integration of Data Mining and Data Visualization
                 workshop , held in conjunction with both the KDD'97 and
                 Visualization '97",
  year =         "1997",
  URL =          "http://www.santafe.edu/~kurt/dmviz.shtml",
  note =         "Position Paper",
  annote =       "What is the point of visualization? It's pretty
                 simple: to let the user understand what is going on.
                 Since data mining usually involves extracting
                 ``hidden'' information from a database, the
                 understanding process can get a bit complicated. The
                 key is to put the user in a context they feel
                 comfortable in and then let them poke and prod until
                 they understand what they didn't see before.",
}

An Efficient Algorithm for the Incremental Updation of Association Rules in Large Databases, Shiby Thomas and Sreenath Bodagala and Khaled Alsabti and Sanjay Ranka
@InProceedings{thomas.ea:efficient-algorithm:97,
  title =        "An Efficient Algorithm for the Incremental Updation of
                 Association Rules in Large Databases",
  author =       "Shiby Thomas and Sreenath Bodagala and Khaled Alsabti
                 and Sanjay Ranka",
  pages =        "263",
  crossref =     "heckerman.ea:proceedings-third:97",
}

The MONK's Problems, A Performance Comparison of Different Learning Algorithms, S. B. Thrun and et al.
@TechReport{thrun.ea:monks-problems:91,
  author =       "S. B. Thrun and et al.",
  title =        "The {MONK}'s Problems, {A} Performance Comparison of
                 Different Learning Algorithms",
  institution =  "Carnegie Mellon University",
  number =       "CMU-CS-91-197",
  month =        dec,
  year =         "1991",
  annote =       "Over 100 pages, this report is really 9 short reports
                 each evaluating the performance of a set of learning
                 algorithms on standard test data. Contains descriptions
                 of all the algorithms used. Packed full of references.
                 [from the abstract]This report summarizes a comparison
                 of different learning techniques which was performed at
                 the 2nd European Summer School on Machine Learning,
                 held in Belgium during summer 1991. A variety of
                 symbolic and non-symbolic learning techniques - namely
                 AQ17-DCL, AQ17-HCI, AQ17-FCLS, AQ14-NT, AQ15-GA,
                 Assistant Professional, mFOIL, ID5R, IDL,ID5R-hat,
                 TDIDT, ID3, AQR, CN2, CLASSWEB, ECOBWEB, PRISM,
                 Backpropagation, and Cascade Correlation - are compared
                 on three classification problems, the MONK's
                 problems.",
}

The Extraction of Refined Rules from Knowledge-Based Neural Networks, Geoffrey G. Towell and Jude W. Shavlik
@Article{towell.ea:extraction-refined:93,
  author =       "Geoffrey G. Towell and Jude W. Shavlik",
  title =        "The Extraction of Refined Rules from Knowledge-Based
                 Neural Networks",
  journal =      "Machine Learning",
  year =         "1993",
  volume =       "13",
  number =       "1",
  pages =        "71--101",
}

Knowledge Acquisition Driven by Constructive and Interactive Induction, Katsuhiko Tsujino and Vlad G. Dabija and Shogo Nishida
@Article{tsujino.ea:acquisition-driven:,
  author =       "Katsuhiko Tsujino and Vlad G. Dabija and Shogo
                 Nishida",
  title =        "Knowledge Acquisition Driven by Constructive and
                 Interactive Induction",
  keywords =     "Kaiser, meta-knowledge",
}

The application of rough sets-based data mining technique to differential diagnosis of meningoenchephalitis, S. Tsumoto and W. Ziarko
@Article{tsumoto.ea:application-rough:96,
  author =       "S. Tsumoto and W. Ziarko",
  title =        "The application of rough sets-based data mining
                 technique to differential diagnosis of
                 meningoenchephalitis",
  journal =      "Lecture Notes in Computer Science",
  volume =       "1079",
  pages =        "438--??",
  year =         "1996",
  ISSN =         "0302-9743",
}

Automated Discovery of Medical Expert System Rules from Clinical Databases Based on Rough Sets, Shusaku Tsumoto and Hiroshi Tanaka
@InProceedings{tsumoto.ea:automated-medical:96,
  title =        "Automated Discovery of Medical Expert System Rules
                 from Clinical Databases Based on Rough Sets",
  pages =        "63",
  author =       "Shusaku Tsumoto and Hiroshi Tanaka",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Bayesian Inference for Identifying Solar Active Regions, Michael Turmon and Saleem Mukhtar and Judit Pap
@InProceedings{turmon.ea:bayesian-inference:97,
  title =        "Bayesian Inference for Identifying Solar Active
                 Regions",
  author =       "Michael Turmon and Saleem Mukhtar and Judit Pap",
  pages =        "267",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Deductive databases: achievements and future directions, Jeffrey D. Ullman and Carlo Zaniolo
@Article{ullman.ea:deductive-achievements:90,
  author =       "Jeffrey D. Ullman and Carlo Zaniolo",
  title =        "Deductive databases: achievements and future
                 directions",
  journal =      "SIGMOD Record (ACM Special Interest Group on
                 Management of Data)",
  volume =       "19",
  number =       "4",
  pages =        "75--82",
  month =        dec,
  year =         "1990",
  ISSN =         "0163-5808",
  abstract =     "The key concepts behind deductive databases and their
                 newly developed enabling technology are reserved. The
                 declarative programming approach used for such
                 databases is examined at length. Current research on
                 extending the functionality and usability of deductive
                 databases and on providing a synthesis of deductive
                 databases with procedural and object-oriented
                 approaches are described.",
  affiliation =  "Stanford Univ",
  affiliationaddress = "Stanford, CA, USA",
  classification = "723; C6160Z (Other DBMS); C6170 (Expert systems)",
  keywords =     "Database Systems; Reviews; Computer Programming;
                 Deductive Databases; Declarative Programming;
                 Objected-Oriented Programming; Procedural Programming,
                 Procedural databases; Declarative queries; Deductive
                 databases; Rule-based style; Knowledge mining;
                 Computer-aided design; Enabling technology;
                 Object-oriented approaches",
  thesaurus =    "Deductive databases",
}

Efficient Implementation of Data Cubes Via Materialized Views, Jeffrey D. Ullman
@InProceedings{ullman:efficient-implementation:96,
  title =        "Efficient Implementation of Data Cubes Via
                 Materialized Views",
  pages =        "386",
  author =       "Jeffrey D. Ullman",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Papers on Rough Sets Theory and Information Systems, a team working in University of Warsaw and Warsaw Uniwersity of Technology.
Available as
Reports.
@Misc{university-of-warsaw.ea:papers-on:,
  URL =          "ftp://ftp.ii.pw.edu.pl/pub/Reports",
  title =        "Papers on Rough Sets Theory and Information Systems",
  author =       "a team working in University of Warsaw and Warsaw
                 Uniwersity of Technology.",
  annote =       "The main field of our work is Rough Sets Theory and
                 Information Systems. Roughly speaking, our work is to
                 find dependances in experimental datasets to simulate
                 decision processes with high quality.",
}

RITIO - Rule Induction Two In One, David Urpani and Xindong Wu and Jim Sykes
@InProceedings{urpani.ea:ritio-rule:96,
  title =        "{RITIO} - Rule Induction Two In One",
  pages =        "339",
  author =       "David Urpani and Xindong Wu and Jim Sykes",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Incremental Induction of Decision Trees, P. E. Utgoff
@Article{utgoff:incremental-induction:89,
  author =       "P. E. Utgoff",
  title =        "Incremental Induction of Decision Trees",
  journal =      "Machine Learning",
  year =         "1989",
  volume =       "4",
  month =        "161-186",
  keywords =     "ID3, ID5R",
}

Shift of Bias for Inductive Concept Learning, Paul E. Utgoff
@InCollection{utgoff:shift-bias:86,
  author =       "Paul E. Utgoff",
  title =        "Shift of Bias for Inductive Concept Learning",
  crossref =     "michalski.ea:machine-learning:86",
  pages =        "107--148",
}

A Theory of the Learnable, Leslie G. Valiant
@Article{valiant:theory-learnable:84,
  author =       "Leslie G. Valiant",
  title =        "A Theory of the Learnable",
  journal =      "Communications of the ACM",
  volume =       "27",
  number =       "11",
  pages =        "1134--1142",
  month =        nov,
  year =         "1984",
  ISSN =         "0001-0782",
  abstract =     "Humans appear to be able to learn new concepts without
                 needing to be programmed explicitly in any conventional
                 sense. In this paper we regard learning as the
                 phenomenon of knowledge acquisition in the absence of
                 specific programming. We give a precise methodology for
                 studying this phenomenon from a computational
                 viewpoint. It consists of choosing an appropriate
                 information gathering mechanism, the learning protocol,
                 and exploring the class of concepts that can be learned
                 using it in a reasonable (polynomial) number of steps.
                 Although inherent algorithmic complexity appears to set
                 serious limits to the range of concepts that can be
                 learned, we show that there are some important
                 nontrivial classes of propositional concepts that can
                 be learned in a realistic sense.",
  comment =      "Defines `learnability' wrt EXAMPLES and ORACLE using
                 arbitrary probability measure on event space. Shows
                 k-CNF learnable from examples only.",
}

Interpretation and knowledge discovery from the multilayer perceptron network - opening the black-box, M. L. Vaughn
@Article{vaughn:interpretation-multilayer:96,
  author =       "M. L. Vaughn",
  address =      "Cranfield Univ, Comp Informat Syst Management Grp,
                 Rmcs, Swindon Sn6 8La, Wilts, England",
  title =        "Interpretation and knowledge discovery from the
                 multilayer perceptron network - opening the black-box",
  journal =      "Neural Computing \& Applications",
  year =         "1996",
  volume =       "4",
  issue =        "2",
  pages =        "72--82",
  abstract =     "This paper interprets the outputs from the multilayer
                 perceptron (MLP) network by finding the input data
                 features at the input layer of the network which
                 activate the hidden layer feature detectors. This leads
                 directly to the deduction of the significant data
                 inputs, the inputs that the network actually uses to
                 perform the input/output mapping for a classification
                 task, and the discovery of the most significant of
                 these data inputs. The analysis presents a method for
                 providing explanations for the network outputs and for
                 representing the knowledge learned by the network in
                 the form of significant input data relationships.
                 During network development the explanation facilities
                 and data relationships can be used for network
                 validation and verification, and after development, for
                 rule induction and data mining where this method
                 provides a potential tool for knowledge discovery in
                 databases (KDD).",
  keywords =     "DATA MINING, EXPLANATION FACILITIES, INTERPRETATION,
                 KNOWLEDGE DISCOVERY, RULE INDUCTION, VALIDATION AND
                 VERIFICATION",
}

Knowledge discovery from databases: an introductory review, B. Vickery
@Article{vickery:introductory-review:97,
  author =       "B. Vickery",
  address =      "Univ Coll London, Mortimer St, London Wc1E 6Bt,
                 England",
  title =        "Knowledge discovery from databases: an introductory
                 review",
  journal =      "J. Of Documentation",
  year =         "1997",
  volume =       "53",
  issue =        "2",
  pages =        "107--122",
  abstract =     "The paper aims to provide a non-technical introduction
                 to the new procedures being used to extract knowledge
                 from databases. The reasons for developing knowledge
                 discovery methods are discussed - primarily, the
                 current production of very large databases that may
                 include many data relations not explicit in the
                 database structure. The background in machine learning
                 is indicated. The methods used are described for such
                 techniques as classification (sorting data into
                 predefined classes), clustering (developing ab initio a
                 data classification) and the detection of deviations
                 from pre- established norms. Examples of the
                 applications of these methods are given. The paper
                 concludes with some brief thoughts about the potential
                 use of knowledge discovery in the information field.",
  keywords =     "KDD, introduction, review",
}

Proceedings of the 22nd International Conference on Very Large Data Bases, T. M. Vijayaraman and A. Buchmann and C. Mohan and N. L. Sarda (Eds)
@Proceedings{vijayaraman.ea:proceedings-22nd:96,
  editor =       "T. M. Vijayaraman and A. Buchmann and C. Mohan and N.
                 L. Sarda",
  title =        "Proceedings of the 22nd International Conference on
                 Very Large Data Bases",
  address =      "San Francisco",
  year =         "1996",
  ISBN =         "1-55860-382-4",
  descriptor =   "Data Mining, Anfragenbearbeitung, Raeumlicher
                 Zugriffspfad, Datenbank, VLDB",
}

How feasible is automated discovery, Michael G. Walker
@Article{walker:how-feasible:87,
  author =       "Michael G. Walker",
  title =        "How feasible is automated discovery",
  journal =      "IEEE Expert",
  year =         "1987",
  pages =        "69--82",
  volume =       "Spring 1987",
  annote =       "Looks at Meta-dendral, RX (radix), Bacon, Prospector,
                 AM. Compares them on Domain Knowledge, Search method,
                 Search Representation, Data Driven vs Model driven
                 discovery, Hypothesis Testing, Signal to Noise Ratio",
}

Automated Discovery of Active Motifs in Multiple RNA Secondary Structures, Jason T. L. Wang and Bruce A. Shapiro and Dennis Shasha and Kaizhong Zhang and Chia-Yo Chang
@InProceedings{wang.ea:automated-active:96,
  title =        "Automated Discovery of Active Motifs in Multiple {RNA}
                 Secondary Structures",
  pages =        "70",
  author =       "Jason T. L. Wang and Bruce A. Shapiro and Dennis
                 Shasha and Kaizhong Zhang and Chia-Yo Chang",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Automated Discovery of Active Motifs in Three Dimensional Molecules, Xiong Wang and Jason T. L. Wang and Dennis Shasha and Bruce Shapiro and Sitaram Dikshitulu and Isidore Rigoutsos and Kaizhong Zhang
@InProceedings{wang.ea:automated-active:97,
  title =        "Automated Discovery of Active Motifs in Three
                 Dimensional Molecules",
  author =       "Xiong Wang and Jason T. L. Wang and Dennis Shasha and
                 Bruce Shapiro and Sitaram Dikshitulu and Isidore
                 Rigoutsos and Kaizhong Zhang",
  pages =        "89",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Combinatorial Pattern Discovery for Scientific Data: Some Preliminary Results, J. Tsong-Li Wang and Gung-Wei Chirn and T. G. Marr and B. Shapiro and D. Shasha and K. Zhang
@Article{wang.ea:combinatorial-pattern:94,
  author =       "J. Tsong-Li Wang and Gung-Wei Chirn and T. G. Marr and
                 B. Shapiro and D. Shasha and K. Zhang",
  title =        "Combinatorial Pattern Discovery for Scientific Data:
                 Some Preliminary Results",
  journal =      "SIGMOD Record (ACM Special Interest Group on
                 Management of Data)",
  volume =       "23",
  number =       "2",
  pages =        "115--125",
  month =        jun,
  year =         "1994",
  ISSN =         "0163-5808",
  affiliation =  "Dept. of Comput. and Inf. Sci., New Jersey Inst. of
                 Technol., Newark, NJ, USA",
  classification = "C7330 (Biology and medicine); C1250 (Pattern
                 recognition); C1180 (Optimisation techniques); C1160
                 (Combinatorial mathematics)",
  keywords =     "Combinatorial pattern discovery; Scientific data;
                 Natural entities; Distance metric; Protein databases;
                 String edit distance; Common externally observable
                 properties; Structural description; Variable-length
                 don't cares; String matching algorithms; Discovery
                 heuristics; Optimization heuristics; Protein
                 classification; Data mining",
  thesaurus =    "Biology computing; Combinatorial mathematics; Natural
                 sciences computing; Optimisation; Pattern recognition;
                 Proteins",
  xxcrossref =   "Anonymous:1994:ASI",
}

Discovering Active Motifs in Sets of Related Protein Sequences and Using Them for Classification, J. T. L. Wang and T. G. Marr and D. Shasha and B. A. Shapiro and G.-W. Chirn
Available as
hypertext.
@Article{wang.ea:discovering-active:94,
  author =       "J. T. L. Wang and T. G. Marr and D. Shasha and B. A.
                 Shapiro and G.-W. Chirn",
  journal =      "Nucleic Acids Research",
  title =        "Discovering Active Motifs in Sets of Related Protein
                 Sequences and Using Them for Classification",
  year =         "1994",
  abstract-url = "http://hertz.njit.edu/~jason/nar94.html",
  URL =          "http://hertz.njit.edu/~jason/nar94.html",
  keywords =     "Data mining, combinatorial pattern discovery,
                 proteins, biochemisty",
  month =        aug,
  number =       "14",
  pages =        "2769--2775",
  volume =       "22",
}

Representing Discovered Patterns Using Attributed Hypergraph, Yang Wang and Andrew K. C. Wong
@InProceedings{wang.ea:representing-discovered:96,
  title =        "Representing Discovered Patterns Using Attributed
                 Hypergraph",
  pages =        "283",
  author =       "Yang Wang and Andrew K. C. Wong",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Schema Discovery for Semistructured Data, Ke Wang and Huiqing Liu
@InProceedings{wang.ea:schema-semistructured:97,
  title =        "Schema Discovery for Semistructured Data",
  author =       "Ke Wang and Huiqing Liu",
  pages =        "271",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Selecting Features by Vertical Compactness of Data, Ke Wang and Suman Sundaresh
@InProceedings{wang.ea:selecting-features:97,
  title =        "Selecting Features by Vertical Compactness of Data",
  author =       "Ke Wang and Suman Sundaresh",
  pages =        "275",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Optimized Rule Induction, Sholom M. Weiss and Nitin Indurkhya
@Article{weiss.ea:optimized-rule:93,
  author =       "Sholom M. Weiss and Nitin Indurkhya",
  title =        "Optimized Rule Induction",
  journal =      "IEEE Expert",
  year =         "1993",
  pages =        "61--69",
  month =        dec,
  keywords =     "Swap-1, decision rules, comparison.",
  annote =       "Discusses the Swap-1 algorithm for learning decision
                 rules. It is tested on 4 real world datasets - Nettalk,
                 Heart, DNA and Rheum. Comparison with published info on
                 NN, Linear Discriminents and desision trees applied to
                 same problems. Possible extension through using a GA?
                 33 references.",
}

Rule-based Machine Learning Methods for Functional Prediction, S. M. Weiss and N. Indurkhya
Available as
hypertext.
@Article{weiss.ea:rule-based-machine:95,
  author =       "S. M. Weiss and N. Indurkhya",
  year =         "1995",
  title =        "Rule-based Machine Learning Methods for Functional
                 Prediction",
  journal =      "JAIR",
  pages =        "383--403",
  abstract =     "We describe a machine learning method for predicting
                 the value of a real-valued function, given the values
                 of multiple input variables. The method induces
                 solutions from samples in the form of ordered
                 disjunctive normal form (DNF) decision rules. A central
                 objective of the method and representation is the
                 induction of compact, easily interpretable solutions.
                 This rule-based decision model can be extended to
                 search efficiently for similar cases prior to
                 approximating function values. Experimental results on
                 real-world data demonstrate that the new techniques are
                 competitive with existing machine learning and
                 statistical methods and can sometimes yield superior
                 regression performance",
  URL =          "http://www.cs.washington.edu/research/jair/table-of-contents-vol3.html",
}

Acquisition of Knowledge from Data, Gio C. M. Wiederhold and Michael G. Walker and Robert L. Blum and Stephen M. Downs
@InProceedings{wiederhold.ea:acquisition:86,
  author =       "Gio C. M. Wiederhold and Michael G. Walker and Robert
                 L. Blum and Stephen M. Downs",
  title =        "Acquisition of Knowledge from Data",
  booktitle =    "{ACM SIGART} International Symposium on Methodologies
                 for Intelligent Systems",
  pages =        "74--84",
  address =      "Knoxville, Tennessee",
  year =         "1986",
}

A Bi-directional ILP Algorithm, M. Wiese
@InProceedings{wiese:bi-directional-ilp:96,
  author =       "M. Wiese",
  title =        "A Bi-directional {ILP} Algorithm",
  booktitle =    "Proceedings of the MLnet Familiarization Workshop on
                 Data Mining with Inductive Logic Programing",
  pages =        "61--72",
  year =         "1996",
}

Detecting Early Indicator Cars in an Automotive Database: A Multi-Strategy Approach, Ruediger Wirth and Thomas P. Reinartz
@InProceedings{wirth.ea:detecting-early:96,
  title =        "Detecting Early Indicator Cars in an Automotive
                 Database: {A} Multi-Strategy Approach",
  pages =        "76",
  author =       "Ruediger Wirth and Thomas P. Reinartz",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Computing as compression - an overview of the sp theory and system, J. G. Wolff
@Article{wolff:computing-as:95,
  author =       "J. G. Wolff",
  address =      "Univ Coll N Wales, Sch Electr Engn \& Comp Syst, Dean
                 St, Bangor Ll57 1Ut, Gwynedd, Wales",
  title =        "Computing as compression - an overview of the sp
                 theory and system",
  journal =      "New Generation Computing",
  year =         "1995",
  volume =       "13",
  issue =        "2",
  pages =        "187--214",
  abstract =     "This article is an overview of a programme of research
                 based on the conjecture that all kinds of computing and
                 formal reasoning may usefully be understood as
                 information compression by pattern matching,
                 unification and metrics- guided search. The research
                 aims to develop this idea into a theory of computing to
                 integrate and simplify diverse concepts in the field.
                 The research also aims to develop a 'new generation'
                 computing system, based on the theory, to integrate and
                 simplify diverse kinds of computing and to achieve more
                 flexibility and 'intelligence' than conventional
                 computers. Software simulations of the proposed new
                 system provide a concrete expression of the developing
                 theory and a test-bed for the ideas. The background to
                 the research is briefly reviewed including evidence
                 that information compression is a significant element
                 in biological information processing systems. Concepts
                 of information and redundancy are described as a basis
                 for describing how information compression may be
                 achieved by the comparison or matching of patterns, the
                 merging or unification of patterns which are the same,
                 together with metrics-guided search (e.g., 'hill
                 climbing', 'beam search') to maximise compression for a
                 given computational effort. The main elements of the SP
                 theory and of the proposed SP system are described with
                 a summary of developments to date. Some of the kinds of
                 computing which be interpreted as information
                 compression are briefly reviewed. These include: the
                 'low level' workings of conventional computers;
                 information retrieval, pattern recognition and
                 de-referencing of identifiers; unsupervised inductive
                 learning (grammatical inference, data mining, automatic
                 organisation of software and of knowledge bases); the
                 execution of mathematical or computing functions;
                 deductive and probabilistic inference; parsing and
                 natural language processing; planning and problem
                 solving. Areas of uncertainty where further work is
                 needed are indicated at appropriate points throughout
                 the article.",
  keywords =     "KOLMOGOROV, COMPLEXITY, MODEL, INFORMATION
                 COMPRESSION, THEORY OF COMPUTING, LEARNING, INFORMATION
                 RETRIEVAL, PATTERN RECOGNITION, DEDUCTION, ABDUCTION",
}

Extensibility in data mining systems, Stefan Wrobel and Dietrich Wettschereck and Edgar Sommer and Werner Emde
Available as
compressed postscript.
@InProceedings{wrobel.ea:extensibility-systems:96,
  author =       "Stefan Wrobel and Dietrich Wettschereck and Edgar
                 Sommer and Werner Emde",
  title =        "Extensibility in data mining systems",
  booktitle =    "Proc. 2nd International Conference On Knowledge
                 Discovery and Data Mining",
  editor =       "Evangelos Simoudis and Jia Wei Han and Usama Fayyad",
  publisher =    "AAAI Press",
  address =      "Menlo Park, CA, USA",
  month =        aug,
  year =         "1996",
  pages =        "214--219",
  URL =          "ftp://ftp.gmd.de/ml-archive/GMD/papers/ML75.ps.gz",
}

The ILP description learning problem: Towards a general model-level definition of data mining in ILP, Stefan Wrobel and Saso Dzeroski
Available as
compressed postscript.
@InProceedings{wrobel.ea:ilp-description:95,
  author =       "Stefan Wrobel and Saso Dzeroski",
  title =        "The {ILP} description learning problem: Towards a
                 general model-level definition of data mining in
                 {ILP}",
  booktitle =    "Proc. Fachgruppentreffen Maschinelles Lernen
                 (FGML-95)",
  editor =       "K. Morik and J. Herrmann",
  publisher =    "Univ. Dortmund",
  address =      "44221 Dortmund",
  note =         "Research Report 580",
  URL =          "ftp://ftp.gmd.de/ml-archive/GMD/papers/ML68.ps.gz",
  year =         "1995",
}

User Interactivity in Very Large Scale Data Mining,
Available as
compressed postscript.
@InProceedings{wrobel.ea:user-interactivity:96,
  author =       "Stefan Wrobel and Dietrich Wettschereck and A. Inkeri
                 Verkamo and Arno Siebes and Heikki Mannila and Fred
                 Kwakkel and Willi Kl{\"o}sgen",
  title =        "User Interactivity in Very Large Scale Data Mining",
  booktitle =    "Proc. FGML-96 (Annual Meeting of the GI Special
                 Interest Group Machine Learning)",
  editor =       "W. Dilger and M. Schlosser and J. Zeidler and A.
                 Ittner",
  month =        aug,
  year =         "1996",
  pages =        "125--130",
  publisher =    "TU Chemnitz-Zwickau",
  address =      "09111 Chemnitz",
  note =         "Computer Science Technical Report No. CSR-96-06.",
  URL =          "ftp://ftp.gmd.de/ml-archive/GMD/papers/ML74.ps.gz",
}

Extensibility in Data Mining Systems, Stefan Wrobel
@InProceedings{wrobel:extensibility-systems:96,
  title =        "Extensibility in Data Mining Systems",
  pages =        "214",
  author =       "Stefan Wrobel",
  crossref =     "simoudis.ea:proceedings-second:96",
}

A graphical user-interface for knowledge discovery in databases, X. Wu and N. Cercone
@Article{wu.ea:graphical-user-interface:96,
  author =       "X. Wu and N. Cercone",
  address =      "Hiroshima Univ, Fac Engn, Dept Elect Engn, Ai
                 Architecture Lab, 1-4-1 Kagamiyama, Higashihiroshima
                 739, Japan Univ Regina, Regina, Sk S4S 0A2, Canada",
  title =        "A graphical user-interface for knowledge discovery in
                 databases",
  journal =      "Engineering Applications Of Artificial Intelligence",
  year =         "1996",
  volume =       "9",
  issue =        "6",
  pages =        "691--700",
  abstract =     "This paper describes a graphical user-interface for
                 database-oriented knowledge discovery systems, DBLEARN,
                 which has been developed for extracting knowledge rules
                 from relational databases. The interface, designed
                 using a query- by-example approach, provides a
                 graphical means of specifying knowledge-discovery
                 tasks. The interface supplies a graphical browsing
                 facility to help users to perceive the nature of the
                 target database structure. In order to guide users'
                 task specification, a cooperative, menu-based guidance
                 facility has been integrated into the interface. The
                 interface also supplies a graphical interactive
                 adjusting facility for helping users to refine the task
                 specification to improve the quality of learned
                 knowledge rules. Copyright (C) 1996 Elsevier Science
                 Ltd",
  keywords =     "LANGUAGE, EXAMPLE, graphical user-interfaces,
                 knowledge discovery systems, database mining, database
                 query processing, AI applications, visualisation",
}

Integration of Heuristic and Bayesian Approaches in a Pattern-Classification System, Q. Wu and P. Suetens and A. Oosterlinck
@InCollection{wu.ea:integration-heuristic:91,
  crossref =     "piatetsky-shapiro.ea:knowledge-discovery:91",
  editor =       "Gregory Piatetsky-Shapiro and William J. Frawley",
  booktitle =    "Knowledge Discovery in Databases",
  publisher =    "AAAI Press / The MIT Press",
  address =      "Menlo Park, California",
  edition =      "1st",
  year =         "1991",
  author =       "Q. Wu and P. Suetens and A. Oosterlinck",
  title =        "Integration of Heuristic and Bayesian Approaches in a
                 Pattern-Classification System",
  pages =        "249--260",
}

Knowledge Discovery in Databases, B. Wuethrich
Available as
hypertext.
@TechReport{wuethrich:knowledge-discovery:,
  author =       "B. Wuethrich",
  URL =          "http://www.cs.ust.hk/faculty/beat/bio.html",
  title =        "Knowledge Discovery in Databases",
  abstract =     "[FROM DRAFT - anp] This is a draft of a manuscript of
                 a postgraduate course taught at the Hong Kong
                 University of Science and Technology in Spring 94. The
                 course gives an introduction into the young and
                 fascinating field of knowledge discovery in databases.
                 The manuscript is suited for beginners who can leave
                 out the more advanced sections, as well as people who
                 would like to do research in this area. This manuscript
                 is partly incomplete.

Table of Contents [edited - Andy]

1. Introduction 2. Rule Languages 3. Uncertainty 4. Time 5. Learning Propostional Rules and Decision Trees 6. Learning Datalog Rules 7. Learning Probabilistic Knowledge

Dr. Beat Wuethrich The Hong Kong University of Science and Technology CS Dept (room 3512) Clear Water Bay Kowloon, Hong Kong email: beat(at)cs.ust.hk", annote = "The link above has report divided into sections, the Full report in one file is also available at ftp://ftp.cs.ust.hk/pub/techreport/95/tr95-04.ps.gz", }

Probabilistic knowledge bases, B. Wuthrich
@Article{wuthrich:probabilistic-bases:95,
  author =       "B. Wuthrich",
  address =      "Hong Kong Univ Sci \& Technol, Kowloon, Hong Kong",
  title =        "Probabilistic knowledge bases",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1995",
  volume =       "7",
  issue =        "5",
  pages =        "691--698",
  abstract =     "We define a new fixpoint semantics for rule-based
                 reasoning in the presence of weighted information. The
                 semantics is illustrated on a real-world application
                 requiring such reasoning. Optimizations and
                 approximations of the semantics are shown so as to make
                 the semantics amenable to very large scale real-world
                 applications, We finally prove that the semantics is
                 probabilistic and reduces to the usual fixpoint
                 semantics of stratified Datalog if all information is
                 certain. We implemented various knowledge discovery
                 systems which automatically generate such probabilistic
                 decision rules. In collaboration with a bank in Hong
                 Kong we use one such system to forecast currency
                 exchange rates.",
  keywords =     "LOGIC, AXIOMATIC PROBABILITY THEORY, DATA MINING,
                 INCOMPLETE INFORMATION, KNOWLEDGE DISCOVERY IN
                 DATABASES, QUERY OPTIMIZATION AND APPROXIMATION,
                 STRATIFIED DATALOG",
}

Knowledge Discovery in Integrated Call Centers: A Framework for Effective Customer-Driven Marketing, Paul Xia
@InProceedings{xia:integrated-call:97,
  title =        "Knowledge Discovery in Integrated Call Centers: {A}
                 Framework for Effective Customer-Driven Marketing",
  author =       "Paul Xia",
  pages =        "279",
  crossref =     "heckerman.ea:proceedings-third:97",
}

2nd international workshop on rough sets and knowledge discovery - banff, canada, 10-15 october 1993, R. Yasdi
@Article{yasdi:2nd-international:94,
  author =       "R. Yasdi",
  address =      "Hsch Bremerhaven, Karlstadt 8, D-27568 Bremerhaven,
                 Germany",
  title =        "2nd international workshop on rough sets and knowledge
                 discovery - banff, canada, 10-15 october 1993",
  journal =      "Ai Comm.",
  year =         "1994",
  volume =       "7",
  issue =        "2",
  pages =        "128--129",
}

Learning Classification Rules from Database in the Context of Knowledge Acquisition and Representation, Ramin Yasdi
@Article{yasdi:learning-classification:91,
  author =       "Ramin Yasdi",
  title =        "Learning Classification Rules from Database in the
                 Context of Knowledge Acquisition and Representation",
  journal =      "IEEE Transactions on Knowledge and Data Engineering",
  year =         "1991",
  volume =       "3",
  number =       "3",
  pages =        "293--306",
  month =        sep,
}

Computing Optimized Rectilinear Regions for Association Rules, Kunikazu Yoda and Takeshi Fukuda and Yasuhiko Morimoto and Shinichi Morishita and Takeshi Tokuyama
@InProceedings{yoda.ea:computing-optimized:97,
  title =        "Computing Optimized Rectilinear Regions for
                 Association Rules",
  author =       "Kunikazu Yoda and Takeshi Fukuda and Yasuhiko Morimoto
                 and Shinichi Morishita and Takeshi Tokuyama",
  pages =        "96",
  crossref =     "heckerman.ea:proceedings-third:97",
}

A Framework for Knowledge Discovery and Evolution in Databases (78K), Jong P. Yoon and Larry Kerschberg
Available as
compressed postscript.
@TechReport{yoon.ea:framework-evolution:,
  URL =          "ftp://isse.gmu.edu/pub/techrep/by_index/ISSE-TR-93-109.ps.Z",
  title =        "A Framework for Knowledge Discovery and Evolution in
                 Databases (78{K})",
  author =       "Jong P. Yoon and Larry Kerschberg",
  note =         "George Mason U, ISSE. July 03, 1994.",
}

A framework for knowledge discovery and evolution in databases, J. P. Yoon and L. Kerschberg
@Article{yoon.ea:framework-evolution:93,
  author =       "J. P. Yoon and L. Kerschberg",
  address =      "George Mason Univ, Sch Informat Technol \& Engn, Ctr
                 Artificial Intelligence, Fairfax, Va, 22030",
  title =        "A framework for knowledge discovery and evolution in
                 databases",
  journal =      "Ieee Trans. On Knowledge And Data Engineering",
  year =         "1993",
  month =        dec,
  volume =       "5",
  issue =        "6",
  pages =        "973--979",
  abstract =     "Although knowledge discovery is increasingly important
                 in databases, discovered knowledge is not always useful
                 to users. It is mainly because the discovered knowledge
                 does not fit the user's interests, or it may be
                 redundant or inconsistent with a priori knowledge.
                 Knowledge discovery in databases depends critically on
                 how well a database is characterized and how
                 consistently the existing and discovered knowledge is
                 evolved. This paper describes a novel concept for
                 knowledge discovery and evolution in databases. The key
                 issues of this work include: using a database query to
                 discover new rules; using not only positive examples
                 (answer to a query) but also negative examples to
                 discover new rules; harmonizing existing rules with the
                 new rules. The main contribution of this paper is the
                 development of a new tool for 1) characterizing the
                 exceptions in databases and 2) evolving knowledge as a
                 database evolves.",
  keywords =     "ACTIVE DATABASE EVOLUTION, DATABASE MINING, EXPERTISE
                 TRANSFER, KNOWLEDGE DISCOVERY, KNOWLEDGE REFINEMENT",
}

Evaluation of Sampling for Data Mining of Association Rules, Mohammed Javeed Zaki and Srinivasan Parthasarathy and Wei Li and Mitsunori Ogihara
Available as
compressed postscript.
@TechReport{zaki.ea:evaluation-sampling:96,
  author =       "Mohammed Javeed Zaki and Srinivasan Parthasarathy and
                 Wei Li and Mitsunori Ogihara",
  title =        "Evaluation of Sampling for Data Mining of Association
                 Rules",
  institution =  "University of Rochester, Computer Science Department",
  number =       "TR 617",
  month =        may,
  year =         "1996",
  keywords =     "data mining; association rules; random sampling;
                 Chernoff bounds",
  URL =          "ftp://ftp.cs.rochester.edu/pub/papers/systems/96.tr617.Sampling_for_data_mining_of_association_rules.ps.gz",
  abstract =     "Data mining is an emerging research area, whose goal
                 is to extract significant patterns or interesting rules
                 from large databases. High-level inference from large
                 volumes of routine business data can provide valuable
                 information to businesses, such as customer buying
                 patterns, shelving criterion in supermarkets, and stock
                 trends. However, many algorithms proposed for data
                 mining of association rules make repeated passes over
                 the database to determine the commonly occurring {\em
                 itemsets} (or set of items). For large databases, the
                 I/O overhead in scanning the database can be extremely
                 high. .pp In this paper we show that random sampling of
                 transactions in the database is an effective method for
                 finding association rules. Sampling can speed up the
                 mining process by more than an order of magnitude by
                 reducing I/O costs and drastically shrinking the number
                 of transactions to be considered. We may also be able
                 to make the sampled database resident in main-memory.
                 Furthermore, we show that sampling can accurately
                 represent the data patterns in the database with high
                 confidence. We experimentally evaluate the
                 effectiveness of sampling on three databases.",
}

New Algorithms for Fast Discovery of Association Rules, M. J. Zaki and S. Parthasarathy and M. Ogihara and W. Li
@InProceedings{zaki.ea:new-algorithms:97,
  title =        "New Algorithms for Fast Discovery of Association
                 Rules",
  author =       "M. J. Zaki and S. Parthasarathy and M. Ogihara and W.
                 Li",
  pages =        "283",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Parallel Data Mining for Association Rules on Shared-memory Multi-processors, M. J. Zaki and M. Ogihara and S. Parthasarathy and W. Li
@InProceedings{zaki.ea:parallel-association:96a,
  key_modifier = "a",
  author =       "M. J. Zaki and M. Ogihara and S. Parthasarathy and W.
                 Li",
  title =        "Parallel Data Mining for Association Rules on
                 Shared-memory Multi-processors",
  booktitle =    "CD-ROM Proceedings of Supercomputing'96",
  publisher =    "IEEE",
  address =      "Pittsburgh, PA",
  month =        nov,
  year =         "1996",
}

Parallel Data Mining for Association Rules on Shared-Memory Multiprocessors, Mohammed Javeed Zaki and Mitsunori Ogihara and Srinivasan Parthasarathy and Wei Li
Available as
compressed postscript.
@TechReport{zaki.ea:parallel-association:96b,
  key_modifier = "b",
  author =       "Mohammed Javeed Zaki and Mitsunori Ogihara and
                 Srinivasan Parthasarathy and Wei Li",
  title =        "Parallel Data Mining for Association Rules on
                 Shared-Memory Multiprocessors",
  institution =  "University of Rochester, Computer Science Department",
  number =       "TR 618",
  month =        may,
  pages =        "22",
  year =         "1996",
  keywords =     "data mining; association rules; load balancing; hash
                 tree balancing; hashing; shared-memory multiprocessor",
  URL =          "ftp://ftp.cs.rochester.edu/pub/papers/systems/96.tr618.Parallel_data_mining_for_association_rules.ps.gz",
  abstract =     "Data mining is an emerging research area, whose goal
                 is to extract significant patterns or interesting rules
                 from large databases. High-level inference from large
                 volumes of routine business data can provide valuable
                 information to businesses, such as customer buying
                 patterns, shelving criterion in supermarkets, and stock
                 trends. Many algorithms have been proposed for data
                 mining of association rules. However, research so far
                 has mainly focused on sequential algorithms. .pp In
                 this paper we present parallel algorithms for data
                 mining of association rules, and study the degree of
                 parallelism, synchronization, and data locality issues
                 on the SGI Power Challenge shared-memory
                 multi-processor. We further present a set of
                 optimizations for the sequential and parallel
                 algorithms. Experiments show that a significant
                 improvement of performance is achieved using our
                 proposed optimizations. We also achieved good speed-up
                 for the parallel algorithm, but we observe a need for
                 parallel I/O techniques for further performance
                 gains.",
}

Fast and Intuitive Clustering of Web Documents, Oren Zamir and Oren Etzioni and Omid Madani and Richard M. Karp
@InProceedings{zamir.ea:fast-intuitive:97,
  title =        "Fast and Intuitive Clustering of Web Documents",
  author =       "Oren Zamir and Oren Etzioni and Omid Madani and
                 Richard M. Karp",
  pages =        "287",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Discovering concept clusters by decomposing databases, N. Zhong and S. Ohsuga
@Article{zhong.ea:discovering-concept:94,
  author =       "N. Zhong and S. Ohsuga",
  address =      "Univ Tokyo, Adv Sci \& Technol Res Ctr, 4-6-1 Komaba,
                 Meguro Ku, Tokyo 153, Japan",
  title =        "Discovering concept clusters by decomposing
                 databases",
  journal =      "Data \& Knowledge Engineering",
  year =         "1994",
  volume =       "12",
  issue =        "2",
  pages =        "223--244",
  abstract =     "This paper introduces an approach of discovering
                 concept clusters by decomposing databases. This
                 approach is the fundamental one for developing DBI
                 which is one of sub- systems of the GLS discovery
                 system implemented by us. A key feature of this
                 approach is the formation of concept clusters or
                 sub-databases through analysis and deletion of noisy
                 data in decomposing a database. Its development is
                 based on the concept of Simon and Ando's near-complete
                 decomposability that has been most explicitly used in
                 economic theory. In this approach, the process of
                 discovering concept clusters from databases is a
                 process based on incipient hypothesis generation and
                 refinement, and many kinds of learning methods, in
                 which the methods of data-driven and knowledge- driven
                 are included, are cooperatively used in multiple
                 learning phases, so that a more robust, general
                 discovery system can be developed.",
  keywords =     "KNOWLEDGE DISCOVERY, KNOWLEDGE DISCOVERY IN DATABASES,
                 CONCEPTUAL CLUSTERING, NEAR- COMPLETE DECOMPOSABILITY,
                 MULTIPLE LEARNING PHASES, INTEGRATION",
}

A hierarchical model learning approach for refining and managing concept clusters discovered from databases, N. Zhong and S. Ohsuga
@Article{zhong.ea:hierarchical-model:96,
  author =       "N. Zhong and S. Ohsuga",
  address =      "Yamaguchi Univ, Fac Engn, Dept Comp Sci \& Syst Engn,
                 2557 Tokiwadai, Ube, Yamaguchi 755, Japan Waseda Univ,
                 Sch Sci \& Engn, Dept Informat \& Comp Sci, Shinjuku
                 Ku, Tokyo 169, Japan",
  title =        "A hierarchical model learning approach for refining
                 and managing concept clusters discovered from
                 databases",
  journal =      "Data \& Knowledge Engineering",
  year =         "1996",
  volume =       "20",
  issue =        "2",
  pages =        "227--252",
  abstract =     "The contents of most databases are ever-changing, and
                 erroneous data can be a significant problem in
                 real-world databases. Therefore, the process of
                 discovering knowledge from databases is a process based
                 on incipient hypothesis generation/evaluation and
                 refinement/management. Although many systems for
                 knowledge discovery in databases have been proposed,
                 most systems have not addressed the capabilities of
                 refining/managing the discovered knowledge. This paper
                 describes a hierarchical model learning approach for
                 refining/managing concept clusters discovered from
                 databases. This approach is the basic one for
                 developing HML (Hierarchical Model Learning), which is
                 one sub- system of our GLS (Global Learning Scheme)
                 discovery system and can be cooperatively used with
                 other sub-systems of GLS such as DBI (Decomposition
                 Based Induction). By means of HML, concept clusters
                 discovered from a database by DBI can be represented as
                 the Multi- Layer Logic formulae with hierarchical
                 models in a knowledge-base and can be easily
                 refined/managed according to data change in a database
                 and/or domain knowledge. HML is based on the model
                 representation of Multi- Layer Logic (MLL). Its key
                 feature is the quantitative evaluation for selecting
                 the best representation of the MLL formulae by using
                 cooperatively a criterion based on information theory
                 and domain knowledge. Experience with a prototype of
                 HML implemented by the knowledge-based system KAUS is
                 discussed.",
  keywords =     "KNOWLEDGE DISCOVERY, INDUCTION, SYSTEMS, RULES,
                 KNOWLEDGE DISCOVERY IN DATABASES, MULTILAYER LOGIC,
                 MACHINE LEARNING, INFORMATION THEORY, HIERARCHICAL
                 MODELING, REFINEMENT, MANAGEMENT",
}

KDD Process Planning, Ning Zhong and Chunnian Liu and Yoshitsugu Kakemoto and Setsuo Ohsuga
@InProceedings{zhong.ea:kdd-process:97,
  title =        "{KDD} Process Planning",
  author =       "Ning Zhong and Chunnian Liu and Yoshitsugu Kakemoto
                 and Setsuo Ohsuga",
  pages =        "291",
  crossref =     "heckerman.ea:proceedings-third:97",
}

System for managing and refining structural characteristics discovered from databases, N. Zhong and S. Ohsuga
@Article{zhong.ea:system-managing:96,
  author =       "N. Zhong and S. Ohsuga",
  address =      "Univ Tokyo, Adv Sci \& Technol Res Ctr, Meguro Ku,
                 4-6-1 Komaba, Tokyo 153, Japan Waseda Univ, Dept
                 Informat \& Comp Sci, Shinjuku Ku, Tokyo 169, Japan",
  title =        "System for managing and refining structural
                 characteristics discovered from databases",
  journal =      "Knowledge-Based Systems",
  year =         "1996",
  volume =       "9",
  issue =        "4",
  pages =        "267--279",
  abstract =     "Systems that allow automatic knowledge discovery from
                 databases will play an increasingly important role in
                 building/sharing large scale knowledge bases. Although
                 many systems for knowledge discovery in databases have
                 been proposed, few of them have addressed the
                 capabilities of managing and refining the discovered
                 knowledge. In particular, the contents of most
                 databases are ever changing and erroneous data can be a
                 significant problem in real-world databases. Hence, the
                 process of discovering knowledge from databases is a
                 process based on incipient hypothesis
                 generation/evaluation and refinement/management. The
                 paper describes a system named IIBR (Inheritance
                 Inference Based Refinement) for managing and refining
                 structural characteristics discovered from databases.
                 Structural characteristics are a kind of important
                 regularity hidden in databases, and are denoted by
                 regression models for describing three kinds of
                 functional relations: the exact, strong and weak ones.
                 IIBR is one subsystem of the authors' GLS (Global
                 Learning Scheme) discovery system, and can be
                 cooperatively used with other subsystems of GLS such as
                 KOSI (Knowledge Oriented Statistic Inference). By means
                 of IIBR, the structural characteristics discovered by
                 KOSI can be added to a knowledge base as the deductive
                 rules and the sets of data for showing their errors,
                 and can be easily managed and refined according to data
                 change in a database. IIBR is based on inheritance
                 inference and error analysis, as well as the model
                 representation of knowledge, multiple worlds/levels,
                 and metareasoning in the knowledge-based system KAUS.
                 Experience with a prototype of IIBR implemented by KAUS
                 is discussed.",
  keywords =     "KNOWLEDGE DISCOVERY, KNOWLEDGE DISCOVERY IN DATABASES,
                 INHERITANCE INFERENCE, ERROR ANALYSIS, DATA CHANGE,
                 KNOWLEDGE REPRESENTATION",
}

A method for computing all maximally general rules in attribute-value systems, W. Ziarko and N. Shan
@Article{ziarko.ea:method-computing:96,
  author =       "W. Ziarko and N. Shan",
  address =      "Univ Regina, Dept Comp Sci, Regina, Sk S4S 0A2,
                 Canada",
  title =        "A method for computing all maximally general rules in
                 attribute-value systems",
  journal =      "Computational Intelligence",
  year =         "1996",
  volume =       "12",
  issue =        "2",
  pages =        "223--234",
  abstract =     "A method for finding all deterministic and maximally
                 general rules for a target classification is explained
                 in detail and illustrated with examples: Maximally
                 general rules are rules with minimal numbers of
                 conditions. The method has been developed within the
                 context of the rough sets model and is based on the
                 concepts of a decision matrix and a decision function.
                 The problem of finding ail the rules is reduced to the
                 problem of computing prime implicants of a group of
                 associated Boolean expressions. The method is
                 particularly applicable to identifying all potentially
                 interesting deterministic rules in a knowledge
                 discovery system but can also be used to produce
                 possible rules or nondeterministic rules with decision
                 probabilities, by adapting the method to the
                 definitions of the variable precision rough sets
                 model.",
  keywords =     "KNOWLEDGE DISCOVERY, MACHINE LEARNING, ROUGH SETS,
                 RULES",
}

Introduction to the special issue on rough sets and knowledge discovery, W. Ziarko
@Article{ziarko:introduction-to:95,
  author =       "W. Ziarko",
  address =      "Univ Regina, Dept Comp Sci, Regina, Sk S4S 0A2,
                 Canada",
  title =        "Introduction to the special issue on rough sets and
                 knowledge discovery",
  journal =      "Computational Intelligence",
  year =         "1995",
  volume =       "11",
  issue =        "2",
  pages =        "223--226",
}

Some privacy issues in knowledge discovery - oecd personal privacy guidelines - response, W. Ziarko
@Article{ziarko:some-privacy:95,
  author =       "W. Ziarko",
  address =      "Univ Regina, Dept Comp Sci, Regina, Sk S4S 0A2,
                 Canada",
  title =        "Some privacy issues in knowledge discovery - oecd
                 personal privacy guidelines - response",
  journal =      "Ieee Expert-Intelligent Systems \& Their
                 Applications",
  year =         "1995",
  volume =       "10",
  issue =        "2",
  pages =        "59--59",
  keywords =     "ethics, ethical",
}

Optimal Multiple Intervals Discretization of Continuous Attributes for Supervised Learning, D. A. Zighed and R. Rakotomalala and F. Feschet
@InProceedings{zighed.ea:optimal-multiple:97,
  title =        "Optimal Multiple Intervals Discretization of
                 Continuous Attributes for Supervised Learning",
  author =       "D. A. Zighed and R. Rakotomalala and F. Feschet",
  pages =        "295",
  crossref =     "heckerman.ea:proceedings-third:97",
}

A Dataset Decomposition Approach to Data Mining and Machine Discovery, Blaz Zupan and Marko Bohanec and Ivan Bratko and Bojan Cestnik
@InProceedings{zupan.ea:dataset-decomposition:97,
  title =        "A Dataset Decomposition Approach to Data Mining and
                 Machine Discovery",
  author =       "Blaz Zupan and Marko Bohanec and Ivan Bratko and Bojan
                 Cestnik",
  pages =        "299",
  crossref =     "heckerman.ea:proceedings-third:97",
}

Automated Pattern Mining with a Scale Dimension, Jan M. Zytkow and Robert Zembowicz
@InProceedings{zytkow.ea:automated-pattern:96,
  title =        "Automated Pattern Mining with a Scale Dimension",
  pages =        "158",
  author =       "Jan M. Zytkow and Robert Zembowicz",
  crossref =     "simoudis.ea:proceedings-second:96",
}

Interactive mining for regularities in Databases, Jan M. Zytkow and John Baker
@InCollection{zytkow.ea:interactive-regularities:91,
  author =       "Jan M. Zytkow and John Baker",
  title =        "Interactive mining for regularities in Databases",
  booktitle =    "Knowledge Discovery in Databases",
  editor =       "Gregory Piatetsky-Shapiro and William J. Frawley",
  publisher =    "{AAAI Press}",
  year =         "1991",
  address =      "Menlo Park, California",
  pages =        "31--53",
}

Mining patterns at each scale in massive data, J. Zytkow and R. Zembowicz
@Article{zytkow.ea:patterns-at:96,
  author =       "J. Zytkow and R. Zembowicz",
  title =        "Mining patterns at each scale in massive data",
  journal =      "Lecture Notes in Computer Science",
  volume =       "1079",
  pages =        "139--??",
  year =         "1996",
  ISSN =         "0302-9743",
}

Combining many searches in the FAHRENHEIT discovery system, Jan M. Zytkow
@InProceedings{zytkow:combining-many:87,
  author =       "Jan M. Zytkow",
  title =        "Combining many searches in the {FAHRENHEIT} discovery
                 system",
  booktitle =    "Proceedings of the fourth international workshop on
                 machine learning",
  year =         "1987",
  address =      "San Mateo, California",
  publisher =    "Morgan Kaufmann",
  pages =        "281--287",
}

Knowledge = Concepts: A Harmful Equation, Jan M. Zytkow
@InProceedings{zytkow:concepts-harmful:97,
  title =        "Knowledge = Concepts: {A} Harmful Equation",
  author =       "Jan M. Zytkow",
  pages =        "104",
  crossref =     "heckerman.ea:proceedings-third:97",
}