% Data Mining Bibliographies Copyright Information % % The author reserves the % % Copyright (C) 1997 Andy Pryke. All rights reserved. % % for the compilation of this KDD bibliography collection. % % If you find the bibliography collection useful for your work, I would % be happy if you acknowledge it and me. You could also send me a % postcard if you wish (address below). % % I usually give my consent that the collection may be copied and % distributed with the following conditions: % % 1) It may be used only for research or educational purposes % % and % % 2) Any copy must be accompanied by a reference to the original % collection and its author. % % and % % 3) This information must always accompany every copy of a bibliograhy. % % I reserve the right to revoke the above permission at any time. % % Any other use must be negotiated in advance. % % Any commercial use of the bibliographies is strictly prohibited. In % particular, the whole or derived bibliographies may not be sold for % profit or included in commercial documents (e.g., published on CD-ROM, % floppy disks, books, magazines, or other print form) without the prior % written permission of the copyright holder. % % Please contact the author if the intended usage is not covered by the % above statement. % % Abstracts of publications published by the ACM and the IEEE are also % subject to the respective "interim" or "provisional" copyright % policies: % % ACM copyright policy (http://www.acm.org/pubs/copyright_policy/) % IEEE copyright policy (http://www.ieee.org/copyright/policies.htm) % % This copyright notice is derived from one by Alf-Christian Achilles % for his (massive) Computer Science Bibliography Collection at % (http://liinwww.ira.uka.de/bibliography/index.html). % % -------------------------------------------------------------------- % % My address: % % My postal address is: % % Andy Pryke, % Department of Computer Science, % The University of Birmingham, % Edgbaston, % Birmingham. % B15 2TT % % Fax : 0121 414 4281 % Phone: 0121 414 3736 % Email: A.N.Pryke(at)cs.bham.ac.uk % Web: http://www.cs.bham.ac.uk/~anp/ %,
@Article{machine_learning_journal_special:93,
key = "Machine_Learning_Journal_Special:93",
journal = "Machine Learning Journal",
year = "1993",
volume = "5",
number = "6",
month = dec,
note = "Special issue on Learning and Discovery in Databases",
}
Improved Methods for Finding Association Rules,
@TechReport{no_author:improved-methods:,
URL = "ftp://ftp.cs.helsinki.fi/pub/Reports/by_Project/PMDM/Improved_Methods_for_Finding_Association_Rules.ps.gz",
title = "Improved Methods for Finding Association Rules",
abstract = "Association rules are statements of the form for 90 %
of the rows of the relation, if the row has value 1 in
the columns in set W , then it has 1 also in column B .
Agrawal, Imielinski, and Swami introduced the problem
of mining association rules from large collections of
data, and gave a method based on successive passes over
the database. We give an improved algorithm for the
problem. The method is based on careful combinatorial
analysis of the information obtained in previous
passes; this makes it possible to eliminate unnecessary
candidate rules. Experiments on a university course
enrollment database indicate that the method
outperforms the previous one by a factor of 5. We also
give simple information-theoretic lower bounds for the
problem of finding association rules, and show that
sampling is in general a very efficient way of finding
such rules. Computing Reviews Categories and Subject
Descriptors: H.3.3[Information Systems]: Information
Storage and Retrieval - Information Search and
Retrieval I.2.6 [Computing Methodologies]: Artificial
Intelligence - Learning I.2.8 [Computing
Methodologies]: Artificial Intelligence - Problem
Solving, Control Methods, and Search General Terms:
Databases, machine learning, artificial intelligence.
Additional Key Words and Phrases: Database mining,
knowledge discovery in databases, association rules,
covering sets.",
}
Learning Decision Trees for Mapping the Local Environment in Mobile Robot Navigation,
@TechReport{no_author:learning-decision:,
URL = "ftp://ftp.cs.helsinki.fi/pub/Reports/by_Project/PMDM/Learning_Decision_Trees_for_Mapping_the_Local_Environment_in_Mobile_Robot_Navigation.ps.gz",
title = "Learning Decision Trees for Mapping the Local
Environment in Mobile Robot Navigation",
abstract = "This paper describes the use of the C4.5 decision tree
learning algorithm in the design of a classifier for a
new approach to the mapping of a mobile robot's local
environment. The decision tree uses the features from
the echoes of an ultrasonic array mounted on the robot
to classify the contours of its local environment. The
contours are classified into a finite number of two
dimensional shapes to form a primitive map which is to
be used for navigation. The nature of the problem,
noise and the practical timing constraints,
distinguishes it from those typically used in machine
learning applications and highlights some of the
advantages of decision tree learning in robotic
applications.",
}
Overheads for the AI'94 Tutorial on Intelligent Learning Database Systems,
@Misc{no_author:overheads-ai94:,
URL = "ftp://coral.cs.jcu.edu.au/pub/research/HCV/KDD.ps",
title = "Overheads for the {AI}'94 Tutorial on Intelligent
Learning Database Systems",
abstract = "This full-day tutorial presents and discusses
techniques for the following 3 interconnected phases in
constructing intelligent learning database systems: (1)
Translation of standard database information into a
form suitable for use by a rule-based system; (2) Using
machine learning techniques to produce rule bases from
databases; and (3) Interpreting the rules produced to
solve users' problems and/or reduce data spaces. It
suits a wide audience including postgraduate students
and industrial people from databases, expert systems,
and machine learning.",
annote = "Comments and suggestions for improvements are
solicited! Comments to Xindong Wu
(xindong(at)INSECT.SD.MONASH.EDU.AU),",
}
State Of The Art,
@Article{no_author:state-art:95,
title = "State Of The Art",
journal = "Byte",
year = "1995",
month = oct,
annote = "A number of articles, good introduction to data
mining",
URL = "http://www.byte.com/art/9510/sec8/sec8.htm",
}
Discovery of Actionable Patterns in Databases: The Action Hierarchy Approach, Gediminas Adomavicius and Alexander Tuzhilin
@InProceedings{adomavicius.ea:actionable-patterns:97,
title = "Discovery of Actionable Patterns in Databases: The
Action Hierarchy Approach",
author = "Gediminas Adomavicius and Alexander Tuzhilin",
pages = "111",
crossref = "heckerman.ea:proceedings-third:97",
}
Mining Association Rules between Sets of Items in Large Databases, Rakesh Agrawal and Tomasz Imielinski and Arun N. Swami
@InProceedings{agrawal.ea:association-rules:93a,
key_modifier = "a",
title = "Mining Association Rules between Sets of Items in
Large Databases",
author = "Rakesh Agrawal and Tomasz Imielinski and Arun N.
Swami",
editor = "Peter Buneman and Sushil Jajodia",
booktitle = "Proceedings of the 1993 {ACM} {SIGMOD} International
Conference on Management of Data",
address = "Washington, D.C.",
month = "26--28~" # may,
year = "1993",
pages = "207--216",
URL = "http://www.almaden.ibm.com/cs/people/ragrawal/papers/sigmod93.ps",
abstract = "We are given a large database of customer
transactions. Each transaction consists of items
purchased by a customer in a visit. We present an
efficient algorithm that generates all significant
association rules between items in the database. The
algorithm incorporates buffer management and novel
estimation and pruning techniques. We also present
results of applying this algorithm to sales data
obtained from a large retailing company, which shows
the effectiveness of the algorithm.",
}
Mining association rules between sets of items in large databases, Rakesh Agrawal and Tomasz Imielinski and Arun Swami
@Article{agrawal.ea:association-rules:93b,
key_modifier = "b",
author = "Rakesh Agrawal and Tomasz Imielinski and Arun Swami",
title = "Mining association rules between sets of items in
large databases",
journal = "SIGMOD Record (ACM Special Interest Group on
Management of Data)",
volume = "22",
number = "2",
pages = "207--216",
month = jun,
year = "1993",
ISBN = "0-89791-592-5",
ISSN = "0163-5808",
abstract = "We are given a large database of customer
transactions. Each transaction consists of items
purchased by a customer in a visit. We present an
efficient algorithm that generates all significant
association rules between items in the database. The
algorithm incorporates buffer management and novel
estimation and pruning techniques. We also present
results of applying this algorithm to sales data
obtained from a large retailing company, which shows
the effectiveness of the algorithm.",
affiliation = "IBM Almaden Research Cent",
affiliationaddress = "San Jose, CA, USA",
classification = "723.3; 921.6; 911.4; 723.2; 722.1; 922.1; C6160Z
(Other DBMS); C6130 (Data handling techniques); C6170
(Expert systems); C6120 (File organisation); C7170
(Marketing)",
conference = "Proceedings of the 1993 ACM SIGMOD International
Conference on Management of Data",
conferenceyear = "1993",
keywords = "Database systems; Algorithms; Marketing; Data
handling; Data storage equipment; Probability;
Estimation; Query languages; Large scale systems;
Associative processing; Administrative data processing;
Large databases; Mining association rules; Pruning
technique; Basket data, Large database; Customer
transactions; Efficient algorithm; Association rules;
Buffer management; Novel estimation; Pruning
techniques; Sales data; Large retailing company",
meetingaddress = "Washington, DC, USA",
meetingdate = "May 26--28 1993",
meetingdate2 = "05/26--28/93",
publisherinfo = "Fort Collins Computer Center",
sponsor = "ACM, SIGMOD; Minerals, Metals \& Materials Society",
thesaurus = "Knowledge based systems; Marketing data processing;
Storage management; Transaction processing; Very large
databases",
xxcrossref = "Anonymous:1993:SAS",
}
Database mining - a performance perspective, R. Agrawal and T. Imielinski and A. Swami
@Article{agrawal.ea:database-performance:93,
author = "R. Agrawal and T. Imielinski and A. Swami",
address = "Ibm Corp, Almaden Res Ctr, 650 Harry Rd, San Jose, Ca,
95120",
title = "Database mining - a performance perspective",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1993",
volume = "5",
issue = "6",
pages = "914--925",
abstract = "We present our perspective of database mining as the
confluence of machine learning techniques and the
performance emphasis of database technology. We
describe three classes of database mining problems
involving classification, associations, and sequences,
and argue that these problems can be uniformly viewed
as requiring discovery of rules embedded in massive
data. We describe a model and some basic operations for
the process of rule discovery. We show how the database
mining problems we consider map to this model and how
they can be solved by using the basic operations we
propose. We give an example of an algorithm for
classification obtained by combining the basic rule
discovery operations. This algorithm not only is
efficient in discovering classification rules but also
has accuracy comparable to ID3, one of the current best
classifiers.",
annote = "Identification and unification of 3 classes of data
mining problem, Classification, Association and
Sequences. They then go on to propose a unifying
framework for these three problems, and five basic
operators for rule discovery. These are then used to
construct an algorithm CDP (Classifier with Dynamic
Pruning) which out performs ID3 in classifier accuracy
and efficiency on a test problem.",
keywords = "ASSOCIATIONS, CLASSIFICATION, DATABASE MINING,
DECISION TREES, KNOWLEDGE DISCOVERY, SEQUENCES",
}
Developing Tightly-Coupled Data Mining Applications on a Relational Database System, Rakesh Agrawal and Kyuseok Shim
@InProceedings{agrawal.ea:developing-tightly-coupled:96,
title = "Developing Tightly-Coupled Data Mining Applications on
a Relational Database System",
pages = "287",
author = "Rakesh Agrawal and Kyuseok Shim",
crossref = "simoudis.ea:proceedings-second:96",
}
Fast Algorithms for Mining Association Rules in Large Databases, R. Agrawal and R. Srikant
@InProceedings{agrawal.ea:fast-algorithms:94,
author = "R. Agrawal and R. Srikant",
title = "Fast Algorithms for Mining Association Rules in Large
Databases",
editor = "Jorgeesh Bocca and Matthias Jarke and Carlo Zaniolo",
booktitle = "20th International Conference on Very Large Data
Bases, September 12--15, 1994, Santiago, Chile
proceedings",
publisher = "Morgan Kaufmann Publishers",
address = "Los Altos, CA 94022, USA",
pages = "487--499",
year = "1994",
annote = "Also known as VLDB'94",
keywords = "very large data bases; VLDB",
}
Parallel mining of association rules, R. Agrawal and J. C. Shafer
@Article{agrawal.ea:parallel-association:96,
author = "R. Agrawal and J. C. Shafer",
address = "Ibm Corp, Almaden Res Ctr, 650 Harry Rd, San Jose, Ca,
95120",
title = "Parallel mining of association rules",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1996",
volume = "8",
issue = "6",
pages = "962--969",
abstract = "We consider the problem of mining association rules on
a shared- nothing multiprocessor. We present three
algorithms that explore a spectrum of trade-offs
between computation, communication, memory usage,
synchronization, and the use of problem-specific
information. The best algorithm exhibits near perfect
scaleup behavior, yet requires only minimal overhead
compared to the current best serial algorithm.",
keywords = "data mining, association rules, parallel algorithms",
}
Quest: A Project on Database Mining, R. Agrawal and M. Carey and C. Faloutson and S. Ghosh and A. Houtsma and T. Imielinski and B. Iyer and A. Mahboob and H. Miranda and R. Srikant and A. Swami
@Article{agrawal.ea:quest-project:94a,
key_modifier = "a",
author = "R. Agrawal and M. Carey and C. Faloutson and S. Ghosh
and A. Houtsma and T. Imielinski and B. Iyer and A.
Mahboob and H. Miranda and R. Srikant and A. Swami",
title = "{Quest}: {A} Project on Database Mining",
journal = "SIGMOD Record (ACM Special Interest Group on
Management of Data)",
volume = "23",
number = "2",
pages = "514--514",
month = jun,
year = "1994",
ISSN = "0163-5808",
affiliation = "IBM Almaden Res. Center, San Jose, CA, USA",
classification = "C6160 (Database management systems (DBMS))",
keywords = "Quest project; Database mining; Tertiary storage; Data
model construction; Data model verification",
thesaurus = "Very large databases",
xxcrossref = "Anonymous:1994:ASI",
}
Quest: A Project on Database Mining, Rakesh Agrawal and Michael J. Carey and Christos Faloutsos and Sakti P. Ghosh and Maurice A. W. Houtsma and Tomasz Imielinski and Balakrishna R. Iyer and A. Mahboob and H. Miranda and Ramakrishnan Srikant and Arun N. Swami
@InProceedings{agrawal.ea:quest-project:94b,
key_modifier = "b",
title = "Quest: {A} Project on Database Mining",
author = "Rakesh Agrawal and Michael J. Carey and Christos
Faloutsos and Sakti P. Ghosh and Maurice A. W. Houtsma
and Tomasz Imielinski and Balakrishna R. Iyer and A.
Mahboob and H. Miranda and Ramakrishnan Srikant and
Arun N. Swami",
editor = "Richard T. Snodgrass and Marianne Winslett",
booktitle = "Proceedings of the 1994 {ACM} {SIGMOD} International
Conference on Management of Data",
address = "Minneapolis, Minnesota",
month = "24--27~" # may,
year = "1994",
pages = "514",
}
The Quest Data Mining System, Rakesh Agrawal and Manish Mehta and John Shafer and Ramakrishnan Srikant and Andreas Arning and Toni Bollinger
@InProceedings{agrawal.ea:quest-system:96,
title = "The Quest Data Mining System",
pages = "244",
author = "Rakesh Agrawal and Manish Mehta and John Shafer and
Ramakrishnan Srikant and Andreas Arning and Toni
Bollinger",
crossref = "simoudis.ea:proceedings-second:96",
}
Mining Sequential Patterns, R. Agrawal and R. Srikant
@InProceedings{agrawal.ea:sequential-patterns:95,
author = "R. Agrawal and R. Srikant",
title = "Mining Sequential Patterns",
booktitle = "International Conference on Database Engineering",
organization = "ieee",
year = "1995",
pages = "3--14",
abstract = "We are given a large database of customer
transactions, where each transaction consists of
customer-id, transaction time, and the items bought in
the transaction. We introduce the problem of mining
sequential patterns over such databases. We present
three algorithms to solve this problem, and empirically
evaluate their performance using synthetic data. Two of
the proposed algorithms, AprioriSome and AprioriAll,
have comparable performance, albeit AprioriSome
performs a little better when the minimum number of
customers that must support a sequential pattern is
low. Scale-up experiments show that both AprioriSome
and AprioriAII scale linearly with the number of
customer transactions. They also have excellent
scale-up properties with respect to the number of
transactions per customer and the number of items in a
transaction.",
}
Data Mining, Rakesh Agrawal
@InProceedings{agrawal:data-mining:94,
author = "Rakesh Agrawal",
title = "Data Mining",
pages = "75--76",
booktitle = "Proceedings of the 13th Symposium on Principles of
Database Systems",
month = may,
publisher = "ACM Press",
address = "New York, NY, USA",
year = "1994",
}
Tutorial: Data Mining, R. Agrawal
@InProceedings{agrawal:tutorial:94,
author = "R. Agrawal",
title = "Tutorial: Data Mining",
editor = "{ACM}",
booktitle = "13th Symposium --- 1994 May: Minneapolis; {MN}",
volume = "13",
publisher = "ACM Press",
address = "New York, NY 10036, USA",
series = "PROCEEDINGS OF THE ACM SIGACT SIGMOD SIGART SYMPOSIUM
ON PRINCIPLES OF DATABASE SYSTEMS 1994",
pages = "75--76",
year = "1994",
keywords = "database systems; ACM; SIGACT; SIGMOD; SIGART;
computability; theory",
}
Machine Learning tutorial (Slides and Anotated Bibliography), David Aha
@Misc{aha:machine-learning:,
URL = "http://www.aic.nrl.navy.mil/~aha/slides.html",
title = "Machine Learning tutorial (Slides and Anotated
Bibliography)",
author = "David Aha",
annote = "David Aha presented the Machine Learning tutorial at
AI \& Stats 1995. He's kindly put his slides online",
}
Temporal aspects in data mining, Salem Al-naemi
@TechReport{al-naemi:temporal-aspects:92,
author = "Salem Al-naemi",
title = "Temporal aspects in data mining",
institution = "Computer Science Department, University of
Birmingham",
year = "1992/3",
annote = "Sections on RdB's, other temporal models and time
series",
}
Mine for Gold with Parallel Systems, Michael Alexander
@Article{alexander:mine-gold:94,
author = "Michael Alexander",
title = "Mine for Gold with Parallel Systems",
journal = "Datamation",
volume = "40",
number = "22",
pages = "65--??",
day = "15",
month = nov,
year = "1994",
ISSN = "0011-6963",
abstract = "Parallel computing technology has become more
accessible to IS shops with the release of parallelized
versions of popular RDBMSs. With such off-the-shelf
tools, your company can gain competitive advantage
through techniques like data mining that allow you to
more finely analyze and project demand for your
products. But if you're going to need the power of
massively parallel systems, off-the-shelf solutions are
still a few years away.",
}
Partial Classification Using Association Rules, Kamal Ali and Stefanos Manganaris and Ramakrishnan Srikant
@InProceedings{ali.ea:partial-classification:97,
title = "Partial Classification Using Association Rules",
author = "Kamal Ali and Stefanos Manganaris and Ramakrishnan
Srikant",
pages = "115",
crossref = "heckerman.ea:proceedings-third:97",
}
Charter, Robert B. Allen
@Article{allen:charter:95,
author = "Robert B. Allen",
title = "Charter",
journal = "ACM Transactions on Information Systems",
volume = "13",
number = "3",
pages = "235",
year = "1995",
copyright = "(c) Copyright 1995 Association for Computing
Machinery",
abstract = "The ACM Transactions on Information Systems (TOIS)
considers the design, performance, and evaluation of
computer systems that facilitate the presentation of
information in a variety of media, as well as
underlying technologies that support these systems. The
major themes of TOIS and those topics which distinguish
it from other ACM Transactions include: - Information
Retrieval and Information Filtering: Algorithms and
inference mechanisms for search, retrieval, and
presentation of information and models of user
information preferences. - Information Interfaces:
Hypertext and hypermedia interfaces, information
visualization, multimedia presentation, and task and
user models for information systems. - Natural Language
Processing: Computational linguistics and models of
natural language (including content, syntax, semantics,
and dialogue) relevant to information systems. -
Knowledge and Information Representation:
Representation issues for supporting information
systems including semantic and object-oriented
databases, knowledge bases, and hypertext/hypermedia
document models. - Multimedia Information Systems:
Semantics, search, and presentation of media including
audio, image, video, and virtual reality. - Networked
Information Systems: Interfaces and indexing, resource
discovery, and visualization. - Organizational
Interfaces and Social Impact of Information Systems:
Electronic mail; decision and negotiation support
systems; the effects of information system use on
groups, organizations, and communities; social
constraints imposed on information systems such as
legal and privacy concerns. - Design and Evaluation of
Information Systems: Design principles for information
systems, methodologies for evaluating information
systems, and programming languages relevant to
information systems. - Information System Applications:
Electronic books, documents, journals, movies, and
libraries; authoring systems; office information
systems; geographic information systems; and
intelligent tutoring systems.",
}
Knowledge discovery in biomedical databases - a machine induction approach, H. Alnahi and S. Alshawi
@Article{alnahi.ea:biomedical-machine:93,
author = "H. Alnahi and S. Alshawi",
address = "Brunel Univ, Dept Comp Sci, Uxbridge Ub8 3Ph, Middx,
England",
title = "Knowledge discovery in biomedical databases - a
machine induction approach",
journal = "Computer Methods And Programs In Biomedicine",
year = "1993",
volume = "39",
issue = "3-4",
pages = "343--349",
abstract = "The increase in the number and size of available
databases by far exceeds the growth of the
corresponding knowledge. Furthermore, many databases
contain information which is not possessed by an
existing human expert. This creates both a need and an
opportunity for extracting knowledge from databases. An
unsolved problem in molecular biology is the problem of
predicting a protein's secondary structure from its
primary structure. Inductive machine learning is a
search for a plausible general description which can
explain the given input data, and is useful for
predicting new data. In this paper we present a
statistical inductive algorithm which can be used to
produce new rules for predicting multiple protein
secondary structures from protein primary structure
databases.",
keywords = "SECONDARY STRUCTURE, PREDICTION, SEQUENCE, MACHINE
LEARNING, INDUCTION, DATABASES, KNOWLEDGE, RULES,
PROTEIN PRIMARY SECONDARY STRUCTURES, AMINO ACID
RESIDUES",
}
Discovering rules for water demand prediction: an enhanced rough-set approach (reprinted from proceedings of the international joint conference on artificial intelligence), A. J. An and N. Shan and C. Chan and N. Cercone and W. Ziarko
@Article{an.ea:discovering-rules:96,
author = "A. J. An and N. Shan and C. Chan and N. Cercone and W.
Ziarko",
address = "Univ Regina, Dept Comp Sci, Regina, Sk S4S 0A2,
Canada",
title = "Discovering rules for water demand prediction: an
enhanced rough-set approach (reprinted from proceedings
of the international joint conference on artificial
intelligence)",
journal = "Engineering Applications Of Artificial Intelligence",
year = "1996",
volume = "9",
issue = "6",
pages = "645--653",
abstract = "Prediction of consumer demands is a pre-requisite for
optimal control of water distribution systems because
minimum-cost pumping schedules can be computed if water
demands are accurately estimated This paper presents an
enhanced rough-sets method for generating prediction
rules from a set of observed data. The proposed method
extends upon the standard rough set model by making use
of the statistical information inherent in the data to
handle incomplete and ambiguous training samples. It
also discusses some experimental results from using
this method for discovering knowledge on water demand
prediction. Copyright (C) 1996 IJCAI Inc.",
keywords = "water demand prediction, knowledge discovery, rough
sets",
}
Edm - a general framework for data mining based on evidence theory, S. S. Anand and D. A. Bell and J. G. Hughes
@Article{anand.ea:edm-general:96,
author = "S. S. Anand and D. A. Bell and J. G. Hughes",
address = "Univ Ulster, Fac Informat, Sch Informat \& Software
Engn, Jordanstown, North Ireland",
title = "Edm - a general framework for data mining based on
evidence theory",
journal = "Data \& Knowledge Engineering",
year = "1996",
volume = "18",
issue = "3",
pages = "189--223",
abstract = "Data Mining or Knowledge Discovery in Databases
[1,15,23] is currently one of the most exciting and
challenging areas where database techniques are coupled
with techniques from Artificial Intelligence and
mathematical sub-disciplines to great potential
advantage. It has been defined as the non- trivial
extraction of implicit, previously unknown and
potentially useful information from data. A lot of
research effort is being directed towards building
tools for discovering interesting patterns which are
hidden below the surface in databases. However, most of
the work bring done in this field has been
problem-specific and no general framework has yet been
proposed for Data Mining. In this paper we seek to
remedy this by proposing, EDM - Evidence-based Data
Mining - a general framework for Data Mining based on
Evidence Theory. Having a general framework for Data
Mining offers a number of advantages. It provides a
common method for representing knowledge which allows
prior knowledge from the user or knowledge discovered
by another discovery process to be incorporated into
the discovery process. A common knowledge
representation also supports the discovery of meta-
knowledge from knowledge discovered by different Data
Mining techniques. Furthermore, a general framework can
provide facilities that are common to most discovery
processes, e.g. incorporating domain knowledge and
dealing with missing values. The framework presented in
this paper has the following additional advantages. The
framework is inherently parallel. Thus, algorithms
developed within this framework will also be parallel
and will therefore be expected to be efficient for
large data sets - a necessity as most commercial data
sets, relational or otherwise, are very large. This is
compounded by the fact that the algorithms are complex.
Also, the parallelism within the framework allows its
use in parallel, distributed and heterogeneous
databases. The framework is easily updated and new
discovery methods can be readily incorporated within
the framework, making it 'general' in the functional
sense in addition to the representational sense
considered above. The framework provides an intuitive
way of dealing with missing data during the discovery
process using the concept of Ignorance borrowed from
Evidence Theory. The framework consists of a method for
representing data and knowledge, and methods for data
manipulation or knowledge discovery(1). We suggest an
extension of the conventional definition of mass
functions in Evidence Theory for use in Data Mining, as
a means to represent evidence of the existence of rules
in the database. The discovery process within EDM
consists of a series of operations on the mass
functions. Each operation is carried out by an EDM
operator. We provide a classification for the EDM
operators based on the discovery functions performed by
them and discuss aspects of the induction, domain and
combination operator classes. The application of EDM to
two separate Data Mining tasks is also addressed,
highlighting the advantages of using a general
framework for Data Mining in general and, in
particular, using one that is based on Evidence
Theory.",
keywords = "DATA MINING, KNOWLEDGE DISCOVERY IN DATABASES,
UNCERTAINTY HANDLING, EVIDENCE THEORY, PARALLEL
DISCOVERY",
}
A High-Performance Data Mining Server, S. S. Anand and D. A. Bell and J. G. Hughes and C. M. Shapcott
@Article{anand.ea:high-performance-server:96,
author = "S. S. Anand and D. A. Bell and J. G. Hughes and C. M.
Shapcott",
title = "A High-Performance Data Mining Server",
journal = "Lecture Notes in Computer Science",
volume = "1067",
pages = "907--??",
year = "1996",
ISSN = "0302-9743",
}
Data mining in parallel, S. S. Anand and C. Shapcott and D. Bell and J. Hughes
@InProceedings{anand.ea:parallel:95,
author = "S. S. Anand and C. Shapcott and D. Bell and J.
Hughes",
title = "Data mining in parallel",
volume = "44",
series = "Transputer and Occam Engineering",
pages = "113--124",
booktitle = "Proceedings of WoTUG-18: Transputer and occam
Developments",
year = "1995",
publisher = "IOS Press",
address = "Amsterdam",
month = apr,
ISBN = "ISBN 90-5199-222-x",
}
Getting to grips with arrears: `data mining' systems at the Leeds, anonymous
@Article{anonymous:getting-to:94,
author = "anonymous",
title = "Getting to grips with arrears: `data mining' systems
at the {L}eeds",
journal = "Expert Systems",
year = "1994",
volume = "11",
number = "2",
pages = "122--124",
month = may,
keywords = "Applications, Data mining, kdd, Attar Software, Xpert
Rule Analyser",
}
Data Mining: Intelligent Technology Gets down to Business, anonymous
@Article{anonymous:intelligent-technology:93,
author = "anonymous",
title = "Data Mining: Intelligent Technology Gets down to
Business",
journal = "PC AI",
year = "1993",
month = nov # " - " # dec,
}
Lessons in Data Mining, Anonymous
@Article{anonymous:lessons:97,
author = "Anonymous",
title = "Lessons in Data Mining",
journal = "Byte Magazine",
volume = "22",
number = "2",
pages = "40--??",
month = feb,
year = "1997",
ISSN = "0360-5280",
}
SIGMOD '93. 1993 ACM SIGMOD. International Conference on Management of Data, Anonymous (Ed)
@Proceedings{anonymous:sigmod-93:93,
editor = "Anonymous",
booktitle = "SIGMOD '93. 1993 ACM SIGMOD. International Conference
on Management of Data",
title = "{SIGMOD} '93. 1993 {ACM} {SIGMOD}. International
Conference on Management of Data",
volume = "22(2)",
month = jun,
publisher = "ACM Press",
address = "New York, NY 10036, USA",
year = "1993",
ISSN = "0163-5808",
series = "SIGMOD Record (ACM Special Interest Group on
Management of Data)",
classification = "C6160 (Database management systems (DBMS)); C4250
(Database theory); C7250 (Information storage and
retrieval); C6170 (Expert systems); C6120 (File
organisation); C6140D (High level languages); C6130
(Data handling techniques); C6150G (Diagnostic,
testing, debugging and evaluating systems)",
confdate = "26--28 May 1993",
conflocation = "Washington, DC, USA",
confsponsor = "ACM",
keywords = "Benchmark programs; Database rules; Integrity; Join
processing; Object-oriented databases; Memory-based
implementations; DBMS implementation issues; Recovery;
Knowledge discovery; Temporal reasoning; Data
compression; Query optimisation; Secondary storage
techniques; Search structures; Query languages;
Interfaces; Intelligent/deductive DBMSs;
Relational/parallel DBMS processing; Transaction
management; Object/scientific DBMSs; Interoperability",
thesaurus = "Data compression; Database management systems;
Database theory; Inference mechanisms; Knowledge based
systems; Program testing; Query languages; Query
processing; Storage management; System recovery;
Transaction processing",
}
Supercomputers Knock At IS Doors, Anonymous
@Article{anonymous:supercomputers-knock-at-is-doors:92,
author = "Anonymous",
title = "{Supercomputers Knock At {IS} Doors}",
journal = "Datamation",
volume = "38",
number = "24",
pages = "79--??",
day = "01",
month = dec,
year = "1992",
ISSN = "0011-6963",
abstract = "Cost-effective massively parallel designs gain
converts for data mining and OLTP applications among
leading edge users and traditional systems suppliers.",
}
Computational learning theory: an introduction, Martin Anthony and Norman Biggs
@Book{anthony.ea:computational-learning:92,
author = "Martin Anthony and Norman Biggs",
title = "Computational learning theory: an introduction",
year = "1992",
publisher = "Cambridge University Press",
series = "Cambridge Tracts in Theoretical Computer Science",
volume = "30",
}
Knowledge Mining by Imprecise Querying: A Classification-based System, T. M. Anwar and H. W. Beck and S. B. Navathe
@InProceedings{anwar.ea:by-imprecise:92,
author = "T. M. Anwar and H. W. Beck and S. B. Navathe",
title = "Knowledge Mining by Imprecise Querying: {A}
Classification-based System",
booktitle = "Proceedings of the International Conference on Data
Engineering",
address = "Tempe, AZ",
month = feb,
year = "1992",
pages = "622--630",
abstract = "Knowledge mining is the process of discovering new
knowledge that is hitherto unknown. Users with a lack
of knowledge of database schemas engage in the process
of knowledge mining by posing imprecise queries. An
approach to knowledge mining by imprecise querying is
presented that utilizes conceptual clustering
techniques. In contrast to numeric or fuzzy set
approaches which ultimately rely on some distance
metric and threshold to processing such queries,
conceptual clustering retrieves instances which are
structurally, semantically, and pragmatically similar
to the query even though they may not match the
requirements exactly. The query processor has both a
deductive and inductive component. The deductive
component finds precise matches in the traditional
sense, and the inductive component identifies ways in
which imprecise matches may be considered similar.
Ranking on similarity is done using the database
taxonomy, by which similar instances become members of
the same class. Relative similarity is determined by
depth in the taxonomy. The conceptual clustering
algorithm, its use in query processing and an example
are presented.",
}
Sales surge as mainframes find a role in client\slash server, E. L. Appleton
@Article{appleton:sales-surge:95,
author = "E. L. Appleton",
title = "Sales surge as mainframes find a role in client\slash
server",
journal = "Datamation",
volume = "41",
number = "10",
pages = "48",
month = jun,
year = "1995",
ISSN = "0011-6963",
classification = "D5010 (Computers and work stations); D5020 (Computer
networks and intercomputer communications)",
keywords = "Mainframes; Client/server; Demand; Economy;
Large-system market; Vendors; IBM Parallel Sysplex;
UNIX server; NT server; Pyramid; HP T-500; Data mining;
Parallelism; IBM Power Parallel; Amdahl ECL mainframe",
language = "English",
pubcountry = "USA",
thesaurus = "Client-server systems; DP industry; Mainframes",
}
Predicting defects in Disk Drive Manufacturing: a case study in High-Dimensional Classification, Chidanand Apt\'e and Sholom Weiss and Gordon Grout
@InProceedings{apte.ea:predicting-defects:93,
author = "Chidanand Apt\'e and Sholom Weiss and Gordon Grout",
title = "Predicting defects in Disk Drive Manufacturing: a case
study in High-Dimensional Classification",
booktitle = "Proceedings of the 9th Conference on Artificial
Intelligence for Applications",
pages = "212--218",
address = "Orlando, Florida",
year = "1993",
}
A Linear Method for Deviation Detection in Large Databases, Andreas Arning and Rakesh Agrawal and Prabhakar Raghavan
@InProceedings{arning.ea:linear-method:96,
title = "A Linear Method for Deviation Detection in Large
Databases",
pages = "164",
author = "Andreas Arning and Rakesh Agrawal and Prabhakar
Raghavan",
crossref = "simoudis.ea:proceedings-second:96",
}
Exploiting Background Knowledge in Automated Discovery, John M. Aronis and Foster J. Provost and Bruce G. Buchanan
@InProceedings{aronis.ea:exploiting-background:96,
title = "Exploiting Background Knowledge in Automated
Discovery",
pages = "355",
author = "John M. Aronis and Foster J. Provost and Bruce G.
Buchanan",
crossref = "simoudis.ea:proceedings-second:96",
}
Increasing the Efficiency of Data Mining Algorithms with Breadth-First Marker Propagation, John M. Aronis and Foster J. Provost
@InProceedings{aronis.ea:increasing-efficiency:97,
title = "Increasing the Efficiency of Data Mining Algorithms
with Breadth-First Marker Propagation",
author = "John M. Aronis and Foster J. Provost",
pages = "119",
crossref = "heckerman.ea:proceedings-third:97",
}
Data mining for lead identification and explosion, S. Ash and S. Gothe
@Article{ash.ea:lead-identification:97,
author = "S. Ash and S. Gothe",
address = "Tripos Inc, St Louis, Mo, 63144",
title = "Data mining for lead identification and explosion",
journal = "Abstracts Of Papers Of The American Chemical Soc.",
year = "1997",
volume = "213",
issue = "Pt1",
pages = "57--CINF",
}
Managing Complexity in Large Data Bases Using Self-Organizing Maps, Barbro Back and Mikko Irjala and Kaisa Sere and Hannu Vanharanta
@TechReport{back.ea:managing-complexity:96,
author = "Barbro Back and Mikko Irjala and Kaisa Sere and Hannu
Vanharanta",
title = "Managing Complexity in Large Data Bases Using
Self-Organizing Maps",
institution = "TUCS - Turku Centre for Computer Science",
number = "TUCS-TR-48",
month = oct # " 23",
year = "1996",
keywords = "neural networks, self-organizing maps, data bases,
benchmarking",
URL = "http://www.tucs.abo.fi/publications/techreports/TR48.html",
abstract = "The amount of financial information in today's
sophisticated large data bases is huge and makes
comparisons between company performance - especially
over time - difficult or at least very time consuming.
The aim of this paper is to invest igate whether neural
networks in the form of self-organizing maps can be
used to manage the complexity in large data bases. We
structure and analyze accoun ting numbers in a large
data base over several time periods. By using self
organizing maps, we overcome the problems associated
with finding the appropriate und erlying distribution
and the functional form of the underlying data in the
structuring task that is often encountered, for
example, when using cluster analysis. The method chosen
also offers a way of visualizing the results. The data
base in this study consists of annual reports of more
than 120 world wide forest companies with data from a
five year time period. This paper is an extended
version of our paper Data Mining Accambis Numbers Using
Self Organising Maps presented at Finnish Artificial
Intelligenc e Conference in Vasa 20-23 August 1996.",
}
ReDuce: Automatic Structuring and Compression in Relational Databases, B. Bain and C. Sammut and A. Sharma and J. Shepherd
@InProceedings{bain.ea:reduce-automatic:96,
author = "B. Bain and C. Sammut and A. Sharma and J. Shepherd",
title = "{R}e{D}uce: {A}utomatic Structuring and Compression in
Relational Databases",
booktitle = "Proceedings of the MLnet Familiarization Workshop on
Data Mining with Inductive Logic Programing",
pages = "41--52",
year = "1996",
}
Knowledge from data using fuzzy methods, J. F. Baldwin
@Article{baldwin:using-fuzzy:96,
author = "J. F. Baldwin",
address = "Univ Bristol, Dept Engn Math, Bristol, Avon, England",
title = "Knowledge from data using fuzzy methods",
journal = "Pattern Recognition Letters",
year = "1996",
volume = "17",
issue = "6",
pages = "593--600",
abstract = "The basic concept of a data browser is explained and
some methods are described which are suitable for
extracting knowledge from data as an induction process.
The data browser gives data mining capabilities but
also provides a stage for computers and users to act
out their parts in this knowledge discovery process.",
}
From molecules to models to data mining, N. Basta
@Article{basta:molecules-to:96,
author = "N. Basta",
address = "Us Dept Def, Off Infosec Comp Sci, Ft George G Meade,
Md, 20755",
title = "From molecules to models to data mining",
journal = "Chemical Engineering",
year = "1996",
volume = "103",
issue = "2",
pages = "5--5",
}
Brute-Force Mining of High-Confidence Classification Rules, Jr. Roberto J. Bayardo
@InProceedings{bayardo:brute-force-high-confidence:97,
title = "Brute-Force Mining of High-Confidence Classification
Rules",
author = "Jr. Roberto J. Bayardo",
pages = "123",
crossref = "heckerman.ea:proceedings-third:97",
}
Discovery and Maintenance of Functional Dependencies by Independencies, S. Bell
@InProceedings{bell:maintenance-functional:95,
author = "S. Bell",
title = "Discovery and Maintenance of Functional Dependencies
by Independencies",
booktitle = "Proceedings of the Workshop on Knowledge Discovery in
Databases",
pages = "27--32",
publisher = "AAAI Press",
year = "1995",
}
From data properties to evidence, D. A. Bell
@Article{bell:properties-to:93,
author = "D. A. Bell",
address = "Univ Ulster, Dept Informat Sci, Jordanstown Bt37 0Qb,
Antrim, North Ireland",
title = "From data properties to evidence",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1993",
volume = "5",
issue = "6",
pages = "965--969",
abstract = "Information and knowledge in computerized information
systems are often characterized by uncertainty. The
facts needed for some realistic applications are
unavailable or are crudely estimated or judged. This
problem manifests itself frequently in information
systems centered on databases. We describe here an
exploration of an aspect of the problem of handling
uncertain evidence on which reasoning is to be based.
We focus upon the problem of making decisions among
propositions based on both uncertain data items (in
contrast to data in conventional databases) and
arguments which are not certain. The primary knowledge
discovery issue we address is a classification problem
- which classification does the available evidence
support? The method investigated here seeks to exploit
information available from conventional database
systems - namely, the integrity assertions or data
dependency information contained in the database. This
information, e.g., from functional dependencies and a
form of multivalued dependencies, allows us to rank
arguments in terms of their strengths. Hence, as a step
in the process of discovering classification knowledge,
using a database as a secondary knowledge discovery
exercise, we explicate latent knowledge pertinent to
arguments of relevance to the purpose at hand. This is
called evidence. Information is requested via user
prompts from an evidential reasoner. It is fed as
evidence to the reasoner. An object-oriented structure
for managing evidence is used to model the conclusion
space and to reflect the evidence structure. The
implementation of the evidence structure and an example
of its use are outlined.",
keywords = "CLASSIFICATION, DATA DEPENDENCIES, DATABASE, EVIDENCE
BASE, EVIDENTIAL REASONING, INTEGRITY CONSTRAINTS",
}
Value-added databases: knowledge discovery and evidential reasoning., D. Bell
@InProceedings{bell:value-added-evidential:94,
title = "Value-added databases: knowledge discovery and
evidential reasoning.",
author = "D. Bell",
booktitle = "Proceedings of the International Workshop on Advances
in Databases and Information Systems - {ADBIS'94}",
address = "Moscow",
year = "1994",
month = may # " 23--26",
pages = "2--9",
abstract = "Results of research into methods of managing evidence
can be coupled with the power and capacity of data
management systems to give a potent approach to
discovering interesting but hidden patterns in large
collections of data. We present some pertinent results
from evidence theory and its applications, and suggest
an approach to the exploitation of these results in the
discovery of knowledge which is held in databases. In
this sense we {\em add value} to databases, which
presumably already justify their existence, and hence
further increase the attractiveness of very large
database systems.",
}
An Examination of Inductive Learning Algorithms for the Classification of Sleep Signals, John A. Bentrup and Sylvian R. Ray
@TechReport{bentrup.ea:examination-inductive:93,
author = "John A. Bentrup and Sylvian R. Ray",
title = "An Examination of Inductive Learning Algorithms for
the Classification of Sleep Signals",
institution = "Department of Computer Science, University of Illinois
at Urbana-Champaign",
type = "Report.",
number = "UIUCDCS-R-93-1792",
address = "1304 Springfield Avenue, Urbana, Il 61801",
month = feb,
year = "1993",
URL = "ftp://a.cs.uiuc.edu/pub/TechReports/UIUCDCS-R-93-1792.ps.Z",
note = "Modified version to appear in Proceedings of the 30th
Annual Rocky Mountain Bioengineering Symposium (April
1993).",
annote = "Nine inductive learning algorithms are tested on sleep
signals of 161 subjects. Algorithms are ID3, C4, CART,
MDL, AIMS, Bayes, PLS(K), PRG, Nearest Neighbour and
COBWEB. Nice table summarising algorithms.",
}
Integrated Learning in a Real Domain, F. Bergadano and A. Giordana and L. Saitta
@InCollection{bergadano.ea:integrated-learning:91,
editor = "Gregory Piatetsky-Shapiro and William J. Frawley",
booktitle = "Knowledge Discovery in Databases",
publisher = "AAAI Press / The MIT Press",
address = "Menlo Park, California",
edition = "1st",
year = "1991",
author = "F. Bergadano and A. Giordana and L. Saitta",
title = "Integrated Learning in a Real Domain",
pages = "277--288",
}
Applying Data Mining and Machine Learning Techniques to Submarine Intelligence Analysis, Ulla Bergsten and Johan Schubert and Per Svensson
@InProceedings{bergsten.ea:applying-machine:97,
title = "Applying Data Mining and Machine Learning Techniques
to Submarine Intelligence Analysis",
author = "Ulla Bergsten and Johan Schubert and Per Svensson",
pages = "127",
crossref = "heckerman.ea:proceedings-third:97",
}
Hot Topics: Customizing information. 2. How successful are we so far?, D. Berleant and H. Berghel
@Article{berleant.ea:hot-topics:94,
author = "D. Berleant and H. Berghel",
title = "Hot Topics: Customizing information. 2. {How}
successful are we so far?",
journal = "Computer",
volume = "27",
number = "10",
pages = "76--78",
month = oct,
year = "1994",
ISSN = "0018-9162",
affiliation = "Dept. of Comput. Syst. Eng., Arkansas Univ.,
Fayetteville, AR, USA",
classification = "C6130D (Document processing techniques); C7210
(Information services and centres); C7250N (Front end
systems for online searching)",
keywords = "Advanced information customization; Browsing; Data
interchange; Digital library; Document customization;
Filtering; Hypermedia; Hypertext; Information analysis;
Information extraction; Information retrieval;
Information science; Information-customizing
interfaces; Interactivity; Knowledge discovery;
Nonprescriptive structuring",
thesaurus = "Document handling; Full-text databases; Hypermedia;
Information retrieval; Online front-ends",
}
Enactment in Information Farming, Mark Bernstein
@InProceedings{bernstein:enactment-information:93,
author = "Mark Bernstein",
title = "Enactment in Information Farming",
booktitle = "Proceedings of ACM Hypertext'93",
series = "Technical Briefings",
pages = "242--249",
year = "1993",
copyright = "(c) Copyright 1993 Association for Computing
Machinery",
keywords = "Design, Rhetoric, Enactment, Collaboration,
Information farming",
abstract = "Information farming views the cultivation of
information as a continuing, collaborative activity
performed by groups of people working together to
achieve changing individual and common goals. Failure
to differentiate information farming from related but
distinct activities like information mining and data
factories has been a fruitful source of
misunderstanding and discord in the hypertext
literature and in the design of hypertext environments.
Dramatic enactment and visual salience -- not recall,
precision, or usability -- assume primary roles in
design for information gardening. In this technical
briefing, we examine how enactment contribute to the
success and failure of a variety of Hypergate and
Storyspace features.",
}
Computational Methods for Intelligent Information Access, Michael W. Berry and Susan T. Dumais and Todd A. Letsche
@InProceedings{berry.ea:computational-methods:95,
author = "Michael W. Berry and Susan T. Dumais and Todd A.
Letsche",
title = "Computational Methods for Intelligent Information
Access",
booktitle = "Proceedings of Supercomputing'95",
publisher = "ACM/IEEE",
address = "San Diego, CA",
month = dec,
year = "1995",
keywords = "data mining, indexing, information, latent, matrices,
retrieval, semantic, singular value decomposition
(SVD), sparse, updating,",
abstract = "ps/PDF on the CD with MPEG.",
}
Testing Complex Temporal Relationships Involving Multiple Granularities and Its Application to Data Mining, C. Bettini and X. Sean Wang and S. Jajodia
@InProceedings{bettini.ea:testing-complex:96,
author = "C. Bettini and X. {Sean Wang} and S. Jajodia",
title = "Testing Complex Temporal Relationships Involving
Multiple Granularities and Its Application to Data
Mining",
editor = "{ACM}",
booktitle = "Proceedings of the Fifteenth {ACM}
{SIGACT}-{SIGMOD}-{SIGART} Symposium on Principles of
Database Systems, {PODS} 1996, Montr{\'e}al, Canada,
June 3--5, 1996",
volume = "15",
publisher = "ACM Press",
address = "New York, NY 10036, USA",
year = "1996",
series = "Proceedings of the ACM SIGACT SIGMOD SIGART Symposium
on Principles of Database Systems",
pages = "68--78",
annote = "Held in conjunction with the 1996 ACM SIGMOD
international conference on management of data. Also
known as PODS 1996",
keywords = "database systems; PODS; ACM; SIGMOD; SIGART; SIGACT",
}
Time-dependent concepts: representation and reasoning using temporal description logics, C. Bettini
@Article{bettini:time-dependent-concepts:97,
author = "C. Bettini",
address = "Univ Milan, Dipartimento Sci Informaz, I-20122 Milan,
Italy",
title = "Time-dependent concepts: representation and reasoning
using temporal description logics",
journal = "Data \& Knowledge Engineering",
year = "1997",
volume = "22",
issue = "1",
pages = "1--38",
abstract = "A time-dependent concept is a conceptual entity that
is defined in terms of temporal relationships with
other entities. For example, the concept of an action
is defined in terms of a set of temporal relationships
among states of a system. The concept of ''widow'', in
natural language, is defined in terms of events that
have occurred in the past. Time-dependent concepts
appear in several application areas, from natural
language to diagnosis, from planning to data mining. An
interesting issue in knowledge representation is how to
formally represent and reason with these concepts. In
this paper, we represent a family of formal
representation languages obtained as an interval-based
temporal extension of description logics. We illustrate
the expressiveness of these formalisms in representing
time-dependent concepts with respect to standard
description logics and other extensions. We give some
complexity results for reasoning problems and we
propose approximate algorithms to compute subsumption
among time-dependent concepts.",
keywords = "INTERVALS, temporal knowledge, temporal reasoning,
description logics, taxonomies, subsumption algorithms,
temporal objects",
}
Advanced Scout: Data Mining and Knowledge Discovery in NBA data, Inderpal Bhandari and Ed Colet and Jennifer Parker and Zachary Pines and Rajiv Pratap and Krishnakumar Ramanujam
@Article{bhandari.ea:advanced-scout:97,
author = "Inderpal Bhandari and Ed Colet and Jennifer Parker and
Zachary Pines and Rajiv Pratap and Krishnakumar
Ramanujam",
title = "Advanced Scout: Data Mining and Knowledge Discovery in
{NBA} data",
journal = "Data Mining and Knowledge Discovery",
year = "1997",
volume = "1",
number = "1",
annote = "Advanced Scout is a PC-based data mining application
used by National Basketball Association (NBA) coaching
staffs to discover interesting patterns in basketball
game data. We describe Advanced Scout software from the
perspective of data mining and knowledge discovery.
This paper highlights the pre-processing of raw data
that the program performs, describes the data mining
aspects of the software and how the interpretation of
patterns supports the process of knowledge discovery.
The underlying technique of attribute focusing as the
basis of the algorithm is also described. The process
of pattern interpretation is facilitated by allowing
the user to relate patterns to video tape.",
}
A case-study of software process improvement during development, I. Bhandari and M. Halliday and E. Tarver and D. Brown and J. Chaar and R. Chillarege
@Article{bhandari.ea:case-study-software:93,
author = "I. Bhandari and M. Halliday and E. Tarver and D. Brown
and J. Chaar and R. Chillarege",
address = "Ibm Corp, Thomas J Watson Res Ctr, Yorktown Hts, Ny,
10598 Ibm Corp, Mid Hudson Valley Programming Lab,
Wappingers Falls, Ny, 12590",
title = "A case-study of software process improvement during
development",
journal = "Ieee Trans. On Software Engineering",
year = "1993",
volume = "19",
issue = "12",
pages = "1157--1170",
abstract = "We present a case study of the use of a software
process improvement method which is based on the
analysis of defect data. The first step of the method
is the classification of software defects using
attributes which relate defects to specific process
activities. Such classification captures the semantics
of the defects in a fashion which is useful for process
correction. The second step utilizes a machine-
assisted approach to data exploration which allows a
project team to discover such knowledge from defect
data as is useful for process correction. We show that
such analysis of defect data can readily lead a project
team to improve their process during development.",
keywords = "CYCLE, DATE EXPLORATION, DEFECT-BASED PROCESS
IMPROVEMENT, IN-PROCESS METRICS, KNOWLEDGE DISCOVERY",
}
Attribute focusing - machine-assisted knowledge discovery applied to software production process-control, I. Bhandari
@Article{bhandari:attribute-focusing:94,
author = "I. Bhandari",
address = "Ibm Corp, Thomas J Watson Res Ctr, Yorktown Hts, Ny,
10598",
title = "Attribute focusing - machine-assisted knowledge
discovery applied to software production
process-control",
journal = "Knowledge Acquisition",
year = "1994",
volume = "6",
issue = "3",
pages = "271--294",
abstract = "How can people who are not trained in data analysis
discover knowledge from a database of attribute-valued
data? I address this question by presenting a
man-machine approach to knowledge discovery called
Attribute Focusing and its application to software
production process control. Attribute Focusing utilizes
an automatic filter to focus attention on that small
part of a large amount of data which is interesting. A
person studies that part in a manner which leads him to
discover knowledge about the physical situation to
which the data pertain. Specifically, the paper
describes: 1. A model of interestingness of data based
on the magnitude of data values, the association of
data values and basic knowledge of the limits of human
processing. 2. The use of that model of interestingness
by people to discover knowledge. 3. The application of
the Attribute Focusing approach to diagnose and correct
the software production process. Based on the results
that have been observed, the paper concludes that
man-machine approaches to knowledge discovery should be
emphasized much more than has been in the past, and
that Attribute Focusing is a powerful, practical
approach to such discovery.",
}
Data mining, N. Bissantz and J. Hagedorn
@Article{bissantz.ea:data-mining:93,
author = "N. Bissantz and J. Hagedorn",
address = "Ibm Corp, Thomas J Watson Res Ctr, Yorktown Hts, Ny,
10598 Ibm Corp, Mid Hudson Valley Programming Lab,
Wappingers Falls, Ny, 12590",
title = "Data mining",
journal = "Wirtschaftsinformatik",
year = "1993",
volume = "35",
issue = "5",
pages = "481--487",
}
Relational knowledge discovery in databases, H. Blockeel and L. De Raedt
@InProceedings{blockeel.ea:relational:96,
author = "H. Blockeel and L. De Raedt",
title = "Relational knowledge discovery in databases",
booktitle = "Proceedings of the 6th International Workshop on
Inductive Logic Programming",
editor = "S. Muggleton",
publisher = "Stockholm University, Royal Institute of Technology",
pages = "1--13",
year = "1996",
}
Discovery, Confirmation and Incorporation of Causal Relationships from a Large Time-Oriented Clinical Database: The RX Project, Robert L. Blum
@Article{blum:confirmation-incorporation:82,
author = "Robert L. Blum",
title = "Discovery, Confirmation and Incorporation of Causal
Relationships from a Large Time-Oriented Clinical
Database: The {RX} Project",
journal = "Computers and Biomedical Research",
volume = "15",
pages = "164--187",
year = "1982",
}
Discovery and Representation of Causal Relationships from a Large Time-Oriented Clinical Database: The RX Project, Robert L. Blum
@Book{blum:representation-causal:82,
author = "Robert L. Blum",
title = "Discovery and Representation of Causal Relationships
from a Large Time-Oriented Clinical Database: The {RX}
Project",
year = "1982",
publisher = "Spinger-Verlag",
series = "Lecture Notes in Medical Informatics",
volume = "19",
}
Occam's Razor, Anselm Blumer and Andrzej Ehrenfeucht and David Haussler and Manfred K. Warmuth
@Article{blumer.ea:occams-razor:87,
author = "Anselm Blumer and Andrzej Ehrenfeucht and David
Haussler and Manfred K. Warmuth",
title = "Occam's Razor",
journal = "Information processing letters",
volume = "24",
pages = "377--380",
year = "1987",
}
Process-Based Database Support for the Early Indicator Method,
@InProceedings{breitner.ea:process-based-database:97,
title = "Process-Based Database Support for the Early Indicator
Method",
author = "Christoph Breitner and J{\"{o}}rg Schl{\"{o}}sser and
R{\"{u}}diger Wirth",
pages = "131",
crossref = "heckerman.ea:proceedings-third:97",
}
SAMIA: a bottom-up learning method using a simulated annealing algorithm, Pierre Br\'ezellec and Henri Soldano
@InProceedings{brezellec.ea:samia-bottom-up:93,
author = "Pierre Br\'ezellec and Henri Soldano",
title = "{SAMIA}: a bottom-up learning method using a simulated
annealing algorithm",
booktitle = "Proceedings of the European conference on Machine
Learning",
series = "Lecture notes in Artificial Intelligence",
pages = "297--309",
publisher = "Springer-verlag",
year = "1993",
}
Direct Access of an ILP Algorithm to a Database Management System, P. Brockhausen and K. Morik
@InProceedings{brockhausen.ea:direct-access:96,
author = "P. Brockhausen and K. Morik",
title = "Direct Access of an {ILP} Algorithm to a Database
Management System",
booktitle = "Proceedings of the MLnet Familiarization Workshop on
Data Mining with Inductive Logic Programing",
pages = "95--110",
year = "1996",
}
Applying classification algorithms in practice (preprint), C. E. Brodley and P. Smyth
@Article{brodley.ea:applying-classification:,
author = "C. E. Brodley and P. Smyth",
title = "Applying classification algorithms in practice
(preprint)",
journal = "(To appear) Statistics and Computing",
URL = "http://yake.ecn.purdue.edu/~brodley/my-papers/publications.html",
}
Distributed Information Management in the National HPCC Software Exchange, Shirley Browne and Jack Dongarra and Geoffrey C. Fox and Ken Hawick and Ken Kennedy and Rick Stevens and Robert Olson and Tom Rowan
@InProceedings{browne.ea:distributed-information:95,
author = "Shirley Browne and Jack Dongarra and Geoffrey C. Fox
and Ken Hawick and Ken Kennedy and Rick Stevens and
Robert Olson and Tom Rowan",
title = "Distributed Information Management in the National
{HPCC} Software Exchange",
booktitle = "Proceedings of Supercomputing'95",
publisher = "ACM/IEEE",
address = "San Diego, CA",
month = dec,
year = "1995",
keywords = "data mining, information management, information
retrieval, HPCC, high performance computing, software
repository,",
abstract = "Simple html document on CD.",
}
MineSet: An Integrated System for Data Mining, Cliff Brunk and James Kelly and Ron Kohavi
@InProceedings{brunk.ea:mineset-integrated:97,
title = "MineSet: An Integrated System for Data Mining",
author = "Cliff Brunk and James Kelly and Ron Kohavi",
pages = "135",
crossref = "heckerman.ea:proceedings-third:97",
}
A guide to the literature on learning probabilistic networks from data, W. Buntine
@Article{buntine:guide-to:96,
author = "W. Buntine",
address = "Thinkbank, 1678 Shattuck Ave, Suite 320, Berkeley, Ca,
94709",
title = "A guide to the literature on learning probabilistic
networks from data",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1996",
volume = "8",
issue = "2",
pages = "195--210",
abstract = "This literature review discusses different methods
under the general rubric of learning Bayesian networks
from data, and includes some overlapping work on more
general probabilistic networks. Connections are drawn
between the statistical, neural network, and
uncertainty communities, and between the different
methodological communities, such as Bayesian,
description length, and classical statistics. Basic
concepts for learning and Bayesian networks are
introduced and methods are then reviewed. Methods are
discussed for learning parameters of a probabilistic
network, for learning the structure, and for learning
hidden variables. The presentation avoids formal
definitions and theorems, as these are plentiful in the
literature, and instead illustrates key concepts with
simplified examples.",
keywords = "EXPERT-SYSTEMS, BAYESIAN NETWORKS, GRAPHICAL MODELS,
INDEPENDENCE, COMPLEXITY, BAYESIAN NETWORKS, GRAPHICAL
MODELS, HIDDEN VARIABLES, LEARNING, LEARNING STRUCTURE,
PROBABILISTIC NETWORKS, KNOWLEDGE DISCOVERY",
}
Attribute-Oriented Induction in Relational Databases, Yandong Cai and Nick Cercone and Jaiwei Han
@InCollection{cai.ea:attribute-oriented-induction:91,
editor = "Gregory Piatetsky-Shapiro and William J. Frawley",
booktitle = "Knowledge Discovery in Databases",
publisher = "AAAI Press / The MIT Press",
address = "Menlo Park, California",
edition = "1st",
year = "1991",
author = "Yandong Cai and Nick Cercone and Jaiwei Han",
title = "Attribute-Oriented Induction in Relational Databases",
pages = "213--228",
}
An overview of machine learning, Jaime G. Carbonell and Ryszard S. Michalski and Tom M. Mitchell
@InCollection{carbonell.ea:overview-machine:83,
author = "Jaime G. Carbonell and Ryszard S. Michalski and Tom M.
Mitchell",
title = "An overview of machine learning",
pages = "3--24",
crossref = "michalski.ea:machine-learning:83",
}
Assessing Credit Card Applications Using Machine Learning, Chris Carter and Jason Catlett
@Article{carter.ea:assessing-credit:87,
author = "Chris Carter and Jason Catlett",
title = "Assessing Credit Card Applications Using Machine
Learning",
journal = "IEEE Expert",
pages = "71--79",
volume = "Fall 1987",
year = "1987",
}
A fast, online generalization algorithm for knowledge discovery, C. L. Carter and H. J. Hamilton
@Article{carter.ea:fast-online:95,
author = "C. L. Carter and H. J. Hamilton",
address = "Univ Regina, Dept Comp Sci, Regina, Sk S4S 0A2,
Canada",
title = "A fast, online generalization algorithm for knowledge
discovery",
journal = "Applied Mathematics Letters",
year = "1995",
volume = "8",
issue = "2",
pages = "5--11",
abstract = "We present an O(n) algorithm for generalizing a
database relation using concept hierarchies, where n is
the number of tuples in the input relation. The
algorithm is based on a variant of Han et al.'s
attribute-oriented O(n log n) algorithm. Our algorithm
is an on-line algorithm; fast performance is achieved
because after encountering a tuple and generalizing it,
the location of the appropriate counter to increment is
calculated instead of searched for.",
keywords = "KNOWLEDGE DISCOVERY, DATA MINING, DATABASES, CONCEPT
HIERARCHIES, GENERALIZATION",
}
Megainduction: machine learning on very large databases, Jason Catlett
@PhdThesis{catlett:megainduction-machine:91,
title = "Megainduction: machine learning on very large
databases",
year = "1991",
author = "Jason Catlett",
URL = "http://www.research.att.com/orgs/ssr/people/catlett/phd.html",
}
IEEE Transactions on Knowledge and Data Engineering Special issue on Learning and Discovery in Databases, N. Cercone and M. Tsuchiya (guest editors) (Eds)
@Article{cercone.ea:ieee-transactions:93,
key = "cercone.ea:ieee-transactions:93",
title = "{IEEE} Transactions on Knowledge and Data Engineering
Special issue on Learning and Discovery in Databases",
journal = "IEEE Transactions on Knowledge and Data Engineering",
year = "1993",
volume = "5",
number = "6",
month = dec,
editor = "N. Cercone and M. Tsuchiya (guest editors)",
note = "Special issue on Learning and Discovery in Databases",
}
Proposal and Empirical Comparison of a Parallelizable Distance-Based Discretization Method, Jes\'us Cerquides and Ramon L\'opez de M\`antaras
@InProceedings{cerquides.ea:proposal-empirical:97,
title = "Proposal and Empirical Comparison of a Parallelizable
Distance-Based Discretization Method",
author = "Jes\'{u}s Cerquides and Ramon L\'{o}pez de
M\`{a}ntaras",
pages = "139",
crossref = "heckerman.ea:proceedings-third:97",
}
Experiments in Multistrategy Learning by Meta-Learning, Philip K. Chan and Salvatore J. Stolfo
@InProceedings{chan.ea:experiments-multistrategy:93,
author = "Philip K. Chan and Salvatore J. Stolfo",
title = "Experiments in Multistrategy Learning by
Meta-Learning",
booktitle = "Proceedings of the second international conference on
information and knowledge management",
pages = "314--323",
address = "Washington, DC",
year = "1993",
}
Sharing Learned Models among Remote Database Partitions by Local Meta-Learning, Philip K. Chan and Salvatore J. Stolfo
@InProceedings{chan.ea:sharing-learned:96,
title = "Sharing Learned Models among Remote Database
Partitions by Local Meta-Learning",
pages = "2",
author = "Philip K. Chan and Salvatore J. Stolfo",
crossref = "simoudis.ea:proceedings-second:96",
}
Model uncertainty, data mining and statistical-inference, C. Chatfield
@Article{chatfield:model-uncertainty:95,
author = "C. Chatfield",
address = "Univ Bath, Sch Math Sci, Bath Ba2 7Ay, Avon, England",
title = "Model uncertainty, data mining and
statistical-inference",
journal = "J. Of The Royal Statistical Soc. Series A-Statistics
In Society",
year = "1995",
volume = "158",
issue = "Pt3",
pages = "419--466",
abstract = "This paper takes abroad, pragmatic view of statistical
inference to include all aspects of model formulation.
The estimation of model: parameters traditionally
assumes that a model has a prespecified known form and
takes no account of possible uncertainty regarding the
model structure. This implicitly assumes the existence
of a 'true' model, which many would regard-as a
fiction. In practice model uncertainty is a fact of
life and likely to be more serious than other sources
of uncertainty which have received far more attention
from statisticians. This is true whether the model is
specified on subject-matter grounds or, as is
increasingly the case, when a model is formulated,
fitted and checked on the same data set in an
iterative, interactive way. Modern computing power
allows a large number of models to be considered and
data-dependent specification searches have become the
norm in many areas of statistics. The term data mining
may be used in this context when the analyst goes to
great lengths to obtain a good fit. This paper reviews
the effects of model uncertainty, such as too narrow
prediction intervals, and the non-trivial biases in
parameter estimates which can follow data-based
modelling. Ways of assessing and overcoming the effects
of model uncertainty are discussed, including the use
of simulation and resampling methods, a Bayesian model
averaging approach and collecting additional data
wherever possible. Perhaps the main aim of the paper is
to ensure that statisticians are aware of the problems
and start addressing the issues even if there is no
simple, general theoretical fix.",
keywords = "MOVING AVERAGE MODELS, BOOTSTRAP, VALIDATION,
PREDICTION, COMPLEXITY, SELECTION, CHOICE,
AUTOREGRESSIVE MODEL, BAYESIAN MODEL AVERAGING, DATA
MINING, FORECASTING, MODEL BUILDING, RESAMPLING,
STATISTICAL INFERENCE, SUBSET SELECTION",
}
Large Scale Data Mining: Challenges and Responses,
@InProceedings{chattratichat.ea:large-scale:97,
title = "Large Scale Data Mining: Challenges and Responses",
author = "Jaturon Chattratichat and John Darlington and Moustafa
Ghanem and Harald H{\"{u}}ning Yike Guo and Martin
K{\"{o}}hler and Janjao Sutiwaraphun and Hing Wing To
and Dan Yang",
pages = "143",
crossref = "heckerman.ea:proceedings-third:97",
}
Bayesian Classification (AUTOCLASS): Theory and Results, P. Cheeseman and J. Stutz
@InCollection{cheeseman.ea:bayesian-classification:95,
author = "P. Cheeseman and J. Stutz",
title = "Bayesian Classification ({AUTOCLASS}): Theory and
Results",
booktitle = "Advances in Knowledge Discovery and Data Mining",
editor = "U. M. Fayyad and G. Piatetsky-Shapiro and P Smyth and
R. Uthurusamy",
year = "1995",
}
Efficient Data Mining for Path Traversal Patterns in Distributed Systems, M. S. Chen and J. S. Park and P. S. Yu
@InProceedings{chen.ea:efficient-path:96,
author = "M. S. Chen and J. S. Park and P. S. Yu",
title = "Efficient Data Mining for Path Traversal Patterns in
Distributed Systems",
booktitle = "16th International Conference on Distributed Computing
Systems (16th IDCS'96)",
pages = "385--393?",
publisher = "IEEE",
address = "Hong Kong",
month = may,
year = "1996",
keywords = "Distributed Objects,",
note = "IBM T. J. Watson Research Center, USA",
}
Data mining: an overview from a database perspective, Ming-Syan Chen and Jiawei Han and Philip S. Yu
@Article{chen.ea:overview-database:96,
author = "Ming-Syan Chen and Jiawei Han and Philip S. Yu",
address = "Natl Taiwan Univ, Dept Elect Engn, Taipei 10764,
Taiwan Simon Fraser Univ, Sch Comp Sci, Burnaby, Bc V5A
1S6, Canada Ibm Corp, Thomas J Watson Res Ctr, Yorktown
Hts, Ny, 10598",
title = "Data mining: an overview from a database perspective",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1996",
month = dec,
volume = "8",
issue = "6",
pages = "866--883",
abstract = "Mining information and knowledge from large databases
has been recognized by many researchers as a key
research topic in database systems and machine
learning, and by many industrial companies as an
important area with an opportunity of major revenues.
Researchers in many different fields have shown great
interest in data mining. Several emerging applications
in information providing services, such as data
warehousing and on-line services over the Internet,
also call for various data mining techniques to better
understand user behavior, to improve the service
provided, and to increase the business opportunities.
In response to such a demand, this article is to
provide a survey, from a database researcher's point of
view, on the data mining techniques developed recently.
A classification of the available data mining
techniques is provided, and a comparative study of such
techniques is presented.",
keywords = "data mining, knowledge discovery, association rules,
classification, data clustering, pattern matching
algorithms, data generalization and characterization,
data cubes, multiple-dimensional databases",
}
A parallel computing approach to creating engineering concept spaces for semantic retrieval - the illinois digital library initiative project, H. C. Chen and B. Schatz and T. Ng and J. Martinez and A. Kirchhoff and C. T. Lin
@Article{chen.ea:parallel-computing:96,
author = "H. C. Chen and B. Schatz and T. Ng and J. Martinez and
A. Kirchhoff and C. T. Lin",
address = "Univ Arizona, Karl Eller Grad Sch Management, Mis
Dept, Mcclelland Hall, Tucson, Az, 85721 Univ Illinois,
Natl Ctr Supercomp Applicat, Beckman Inst, Urbana, Il,
61801 Univ Arizona, Sci \& Engn Lib, Tucson, Az, 85712
Univ Arizona, Dept Lib \& Informat Studies, Tucson, Az,
85712",
title = "A parallel computing approach to creating engineering
concept spaces for semantic retrieval - the illinois
digital library initiative project",
journal = "Ieee Trans. On Pattern Analysis And Machine
Intelligence",
year = "1996",
volume = "18",
issue = "8",
pages = "771--782",
abstract = "This research presents preliminary results generated
from the semantic retrieval research component of the
illinois Digital Library Initiative (DLI) project.
Using a variation of the automatic thesaurus generation
techniques, to which we refer as the concept space
approach, we aimed to create graphs of domain-specific
concepts (terms) and their weighted co-occurrence
relationships for all major engineering domains.
Merging these concept spaces and providing traversal
paths across:different concept spaces could potentially
help alleviate the vocabulary (difference) problem
evident in large- scale information retrieval. We have
experimented previously with such a technique for a
smaller molecular biology domain (Worm Community
System, with 10+ MBs of document collection) with
encouraging results. In order to address the
scalability issue related to large-scale information
retrieval and analysis for the current Illinois DLI
project, we recently conducted experiments using the
concept space approach on parallel supercomputers. Our
test collection included 2+ GBs of computer science and
electrical engineering abstracts extracted from the
INSPEC database. The concept space approach called for
extensive textual and statistical analysis (a form of
knowledge discovery) based on automatic indexing and
cooccurrence analysis algorithms, both previously
tested in the biology domain. Initial testing results
using a 512-node CM-5 and a 16-processor SGI Power
Challenge were promising. Power Challenge was later
selected to create a comprehensive computer engineering
concept space of about 270,000 terms and 4,000,000+
links using 24.5 hours of CPU time. Our system
evaluation involving 12 knowledgeable subjects revealed
that the automatically-created computer engineering
concept space generated significantly higher concept
recall than the human- generated INSPEC computer
engineering thesaurus. However, the INSPEC was more
precise than the automatic concept space. Our current
work mainly involves creating concept spaces for other
major engineering domains and developing robust graph
matching and traversal algorithms for cross-domain,
concept-based retrieval. Future work also will include
generating individualized concept spaces for assisting
user- specific concept-based information retrieval.",
keywords = "INFORMATION-RETRIEVAL, DOCUMENT-RETRIEVAL, CONNECTION
MACHINE, NEURAL NETWORKS, SYSTEMS, SEARCH, PERFORMANCE,
DATABASES, DESIGN, MODEL, SEMANTIC RETRIEVAL, CONCEPT
SPACE, CONCEPT ASSOCIATION, PARALLEL COMPUTING, DIGITAL
LIBRARY",
}
Semantics-Based Information Management and Retrieval: A Knowledge Discovery Approach, H. Chen and K. Lynch
@Article{chen.ea:semantics-based-information:92,
author = "H. Chen and K. Lynch",
title = "Semantics-Based Information Management and Retrieval:
{A} Knowledge Discovery Approach",
journal = "IEEE Transactions on Systems, Man, and Cybernetics",
publisher = "IEEE",
month = "Forthcoming",
year = "1992",
abstract = "We report results of a study that involved the
creation of knowledge bases from large, operational
textual databases. Two East-bloc computing knowledge
bases, both based on semantic network structure, were
created automatically using two statistical algorithms.
With the help of four East-bloc computing experts, we
evaluated the two knowledge bases in detail in a
concept-association experiment bases on recall and
recognition tests. In our experiment, one of the
knowledge bases that exhibited the asymmetric link
property out-performed all four experts in recalling
relevant concepts in East-bloc computing. The knowledge
base, which contained about 20,000 concepts (nodes) and
280,000 weighted relationships (links), was
incorporated as a thesauras-like component into an
intelligent retrieval system. The system allowed users
to perform semantics-based information management and
information retrieval via interactive, conceptual
relevance feedback. Current research efforts include
development of a meta knowledge base and design of
semantic network and neural network based inferencing
algorithms.",
}
Growing Simpler Decision Trees to Facilitate Knowledge Discovery, Kevin J. Cherkauer and Jude W. Shavlik
@InProceedings{cherkauer.ea:growing-simpler:96,
title = "Growing Simpler Decision Trees to Facilitate Knowledge
Discovery",
pages = "315",
author = "Kevin J. Cherkauer and Jude W. Shavlik",
crossref = "simoudis.ea:proceedings-second:96",
}
Efficient mining of association rules in distributed databases, D. W. Cheung and V. T. Ng and A. W. Fu and Y. J. Fu
@Article{cheung.ea:efficient-association:96,
author = "D. W. Cheung and V. T. Ng and A. W. Fu and Y. J. Fu",
address = "Univ Hong Kong, Dept Comp Sci, Hong Kong, Hong Kong
Hong Kong Polytech Univ, Dept Comp, Hong Kong, Hong
Kong Chinese Univ Hong Kong, Dept Comp Sci \& Engn,
Hong Kong, Hong Kong Simon Fraser Univ, Sch Comp Sci,
Burnaby, Bc V5A 1S6, Canada",
title = "Efficient mining of association rules in distributed
databases",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1996",
month = dec,
volume = "8",
issue = "6",
pages = "911--922",
abstract = "Many sequential algorithms have been proposed for
mining of association rules. However, very little work
has been done in mining association rules in
distributed databases. A direct application of
sequential algorithms to distributed databases is not
effective, because it requires a large amount of
communication overhead. In this study, an efficient
algorithm, DMA, is proposed. It generates a small
number of candidate sets and requires only O(n)
messages for support count exchange for each candidate
set, where n is the number of sites in a distributed
database. The algorithm has been implemented on an
experimental test bed and its performance is studied.
The results show that DMA has superior performance when
comparing with the direct application of a popular
sequential algorithm in distributed databases.",
keywords = "data mining, knowledge discovery, distributed data
mining, association rule, distributed database,
distributed algorithm, partitioned database",
}
Maintenance of Discovered Knowledge: A Case in Multi-Level Association Rules, David W. Cheung and Vincent T. Ng and Benjamin W. Tam
@InProceedings{cheung.ea:maintenance-discovered:96,
title = "Maintenance of Discovered Knowledge: {A} Case in
Multi-Level Association Rules",
pages = "307",
author = "David W. Cheung and Vincent T. Ng and Benjamin W.
Tam",
crossref = "simoudis.ea:proceedings-second:96",
}
Knowledge discovery in databases: a rule-based attribute-oriented approach, D. W.-l. Cheung and A. W.-C. Fu and J. Han
@InProceedings{cheung.ea:rule-based-attribute-oriented:94a,
key_modifier = "a",
author = "D. W.-l. Cheung and A. W.-C. Fu and J. Han",
title = "Knowledge discovery in databases: a rule-based
attribute-oriented approach",
pages = "164--173",
editor = "Zbigniew W. Ra{\'s} and Maria Zemankova",
booktitle = "Proceedings of the 8th International Symposium on
Methodologies for Intelligent Systems",
month = oct,
series = "LNAI",
volume = "869",
publisher = "Springer",
address = "Berlin",
year = "1994",
}
Knowledge discovery in databases: a rule-based attribute-oriented approach, D. W.-I. Cheung and A. W.-C. Fu and J. Han
@Article{cheung.ea:rule-based-attribute-oriented:94b,
key_modifier = "b",
author = "D. W.-I. Cheung and A. W.-C. Fu and J. Han",
title = "Knowledge discovery in databases: a rule-based
attribute-oriented approach",
journal = "Lecture Notes in Computer Science",
volume = "869",
pages = "164--??",
year = "1994",
ISSN = "0302-9743",
}
Using Artificial Intelligence Planning to Automate Science Data Analysis for Large Image Databases, Steve Chien and Forest Fisher and and Helen Mortensen and Edisanter Lo and Ronald Greeley
@InProceedings{chien.ea:using-artificial:97,
title = "Using Artificial Intelligence Planning to Automate
Science Data Analysis for Large Image Databases",
author = "Steve Chien and Forest Fisher and and Helen Mortensen
and Edisanter Lo and Ronald Greeley",
pages = "147",
crossref = "heckerman.ea:proceedings-third:97",
}
A framework for query optimization to support data mining, R. Sunil Choenni and Arno P. J. M. Siebes
@InCollection{choenni.ea:framework-query:96,
author = "R. Sunil Choenni and Arno P. J. M. Siebes",
title = "A framework for query optimization to support data
mining",
publisher = "Centrum voor Wiskunde en Informatica (CWI)",
ISSN = "ISSN 0169-118X",
month = oct # " 31",
year = "1996",
keywords = "data mining systems, search strategies, query
optimization, physical database design.",
URL = "ftp://ftp.cwi.nl/pub/CWIreports/AA/CS-R9637.ps.Z",
abstract = "In order to extract knowledge from databases, data
mining algorithms heavily query the databases.
Inefficient processing of these queries will inevitably
have its impact on the performance of these algorithms,
making them less valuable. In this paper, we describe
an optimization framework for an efficient processing
of queries generated by different data mining
algorithms. In this framework, we show how to take
advantage of the physical organization of the database,
the operators and the control structures used in an
algorithm. Finally, we discuss how our framework fits
into conventional query optimization frameworks.",
note = "AA (Department of Algorithmics and Architecture)",
annote = "originally contained the following fields and values -
booktitle, 105 note, CS-R9637",
}
On multi-query optimization, R. (Sunil) Choenni and Martin L. Kersten and Johan F. P. van den Akker and Amani Saad
@InCollection{choenni.ea:on-multi-query:96,
author = "R. (Sunil) Choenni and Martin L. Kersten and Johan F.
P. van den Akker and Amani Saad",
title = "On multi-query optimization",
pages = "19",
publisher = "Centrum voor Wiskunde en Informatica (CWI)",
address = "ISSN 0169-118X",
month = oct # " 31",
year = "1996",
keywords = "multi-query optimization, architectures, exploiting
interdependencies between queries.",
URL = "ftp://ftp.cwi.nl/pub/CWIreports/AA/CS-R9638.ps.Z",
abstract = "In some key database applications, such as data
mining, a sequence of interdependent queries may be
posed simultaneously to the DBMS. The optimization of
such sequences is called multi-query optimization, and
it attempts to exploit these dependencies in the
derivation of a query evaluation plan (qep). Although
it has been observed and demonstrated by several
researchers that exploitation of dependencies speed up
the query processing, limited research has been
reported how to benefit from multi-query optimization,
taking the capabilities of existing query optimizers
into account. This is exactly the topic of this paper.
Since existing optimizers are able to optimize queries
in which a restricted number of basic operations
appears, e.g., number of joins is limited to 10, and
the optimization of a query is relatively expensive, we
attempt to profit from multi query optimization under
the condition that queries are passed only once and
separately to the optimizer. We propose a two-step
optimization procedure. In the first step, we
determine, on the basis of the dependencies between
queries, in which order they should be specified and
what results should be stored. In the second step, each
query is passed separately to an optimizer.",
note = "AA (Department of Algorithmics and Architecture)",
annote = "originally contained the following fields and values -
note, CS-R9638, booktitle, 143",
}
Using a Hybrid Neural/Expert System for Data Base Mining in Market Survey Data, Victor Ciesielski and Gregory Palstra
@InProceedings{ciesielski.ea:using-hybrid:96,
title = "Using a Hybrid Neural/Expert System for Data Base
Mining in Market Survey Data",
pages = "38",
author = "Victor Ciesielski and Gregory Palstra",
crossref = "simoudis.ea:proceedings-second:96",
}
Classification Problem Solving, W. J. Clancey
@InProceedings{clancey:classification-problem:84,
title = "Classification Problem Solving",
author = "W. J. Clancey",
editor = "R. J. Brachman",
booktitle = "Proceedings of the National Conference on Artificial
Intelligence",
address = "Austin, Texas",
month = aug,
year = "1984",
publisher = "William Kaufmann",
pages = "49--55",
}
The CN2 Induction Algorithm, Peter Clark and Tim Niblett
@Article{clark.ea:cn2-induction:89,
author = "Peter Clark and Tim Niblett",
title = "The {CN2} Induction Algorithm",
journal = "Machine Learning",
year = "1989",
volume = "3",
pages = "261--283",
}
Knowledge Representation in Machine Learning, Peter Clark
@InCollection{clark:representation-machine:89,
author = "Peter Clark",
title = "Knowledge Representation in Machine Learning",
editor = "Yves Kodratoff and Alan Hutchinson",
booktitle = "Machine and Human Learning, advances in European
Research",
publisher = "Michael Horwood",
year = "1989",
pages = "35--49",
address = "London",
}
Security and Privacy Implications of Data Mining, Chris Clifton and Don Marks
@InProceedings{clifton.ea:security-privacy:96,
author = "Chris Clifton and Don Marks",
title = "Security and Privacy Implications of Data Mining",
booktitle = "Workshop on Data Mining and Knowledge Discovery",
address = "Montreal, Canada",
organization = "ACM SIGMOD",
year = "1996",
publisher = "University of British Columbia Department of Computer
Science",
number = "96-08",
pages = "15--19",
month = jun # " 2",
URL = "ftp://ftp.fas.sfu.ca/pub/cs/han/dmkd96/p15.ps",
contributedby = "clifton(at)mitre.org",
}
Overfitting Explained, P. R. Cohen and D. Jensen
@InProceedings{cohen.ea:overfitting-explained:97,
author = "P. R. Cohen and D. Jensen",
title = "Overfitting Explained",
booktitle = "Preliminary Papers of the Sixth International Workshop
on Artificial Intelligence and Statistics",
year = "1997",
month = jan,
pages = "115--122",
abstract = "Overfitting arises when model components are evaluated
against the wrong reference distribution. Most modeling
algorithms iteratively find the best of several
components and then test whether this component is good
enough to add to the model. We show that for
independently distributed random variables, the
reference distribution for any one variable
underestimates the reference distribution for the the
highest-valued variable; thus variate values will
appear significant when they are not, and model
components will be added when they should not be added.
We relate this problem to the well-known statistical
theory of multiple comparisons or simultaneous
inference.",
abstract_url = "http://eksl-www.cs.umass.edu/~jensen/papers/ais97b.html",
URL = "http://www-eksl.cs.umass.edu/papers/cohen-ais96b.ps",
}
The Role of Knowledge Mining in the Development and Evolution of New Applications, David Cohen and L. Berke and P. Bloom and D. Cohen and D. Tsur
@InProceedings{cohen.ea:role-development:94,
author = "David Cohen and L. Berke and P. Bloom and D. Cohen and
D. Tsur",
title = "The Role of Knowledge Mining in the Development and
Evolution of New Applications",
pages = "166--167",
editor = "Ahmed K. Elmagarmid and Erich Neuhold",
booktitle = "Proceedings of the 10th International Conference on
Data Engineering",
address = "Houston, TX",
month = feb,
year = "1994",
publisher = "IEEE Computer Society Press",
}
Knowledge in context: a strategy for expert system maintenance, P. Compton and R. Jansen
@InProceedings{compton.ea:context-strategy:88,
author = "P. Compton and R. Jansen",
title = "Knowledge in context: a strategy for expert system
maintenance",
booktitle = "Proceedings of the 2nd {A}ustralian Joint Artificial
Intelligence conference",
address = "Adelaide",
year = "1988",
publisher = "Springer",
series = "Lecture Notes in Artificial Intelligence",
volume = "406",
pages = "292--306",
}
Knowledge discovery in molecular databases, D. Conklin and S. Fortier and J. Glasgow
@Article{conklin.ea:molecular:93,
author = "D. Conklin and S. Fortier and J. Glasgow",
address = "Queens Univ, Dept Comp \& Informat Sci, Kingston K7L
3N6, On, Canada Queens Univ, Dept Chem, Kingston K7L
3N6, On, Canada",
title = "Knowledge discovery in molecular databases",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1993",
volume = "5",
issue = "6",
pages = "985--987",
abstract = "This paper describes an approach to knowledge
discovery in complex molecular databases. The machine
learning paradigm used is structured concept formation,
in which objects described in terms of components and
their interrelationships are clustered and organized in
a knowledge base. Symbolic images are used to represent
classes of structured objects. A discovered molecular
knowledge base is successfully used in the
interpretation of a high resolution electron density
map.",
keywords = "PROTEIN, CASE-BASED REASONING, CHEMICAL INFORMATION
RETRIEVAL, CONCEPTUAL CLUSTERING, DESCRIPTION LOGICS,
INDEXING, RELATIONAL MODELS, SCENE ANALYSIS, SPATIAL
CONCEPTS, SPATIAL REASONING, STRUCTURED CONCEPT
FORMATION",
}
Machine discovery of protein motifs, D. Conklin
@Article{conklin:machine-protein:95,
author = "D. Conklin",
address = "Zymogenet Inc, 1201 Eastlake Ave E, Seattle, Wa,
98102",
title = "Machine discovery of protein motifs",
journal = "Machine Learning",
year = "1995",
volume = "21",
issue = "1-2",
pages = "125--150",
abstract = "The investigation of relations between protein
tertiary structure and amino acid sequence is a topic
of tremendous importance in molecular biology. The
automated discovery of recurrent patterns of structure
and sequence is an essential part of this
investigation. These patterns, known as protein motifs,
are abstractions of fragments drawn from proteins of
known sequence and tertiary structure. This paper has
two objectives. The first is to introduce and define
protein motifs, and provide a survey of previous
research on protein motif discovery. The second is to
present and apply a novel approach to protein motif
representation and discovery, which is based on a
spatial description logic and the symbolic machine
learning paradigm of structured concept formation. A
large database of protein fragments is processed using
this approach, and several interesting and significant
protein motifs are discovered.",
keywords = "SECONDARY STRUCTURE, SEQUENCE PATTERNS, PREDICTIVE
POWER, IDENTIFICATION, RECOGNITION, GENERATION,
DEFINITION, TEMPLATES, SETS, PROTEIN TERTIARY
STRUCTURE, MACHINE DISCOVERY, RELATIONAL LEARNING,
KNOWLEDGE REPRESENTATION, DESCRIPTION LOGICS,
INFORMATION RETRIEVAL, KNOWLEDGE DISCOVERY IN
DATABASES",
}
Scalable discovery of informative structural concepts using domain knowledge, D. J. Cook and L. B. Holder and S. Djoko
@Article{cook.ea:scalable-informative:96,
author = "D. J. Cook and L. B. Holder and S. Djoko",
address = "Univ Texas, Dept Comp Sci \& Engn, Arlington, Tx,
76019 Bell No Res, Sci Staff, Richardson, Tx",
title = "Scalable discovery of informative structural concepts
using domain knowledge",
journal = "Ieee Expert-Intelligent Systems \& Their
Applications",
year = "1996",
volume = "11",
issue = "5",
pages = "59--68",
}
Substructure Discovery Using Minimum Description Length and Background Knowledge, D. J. Cook and L. B. Holder
@Article{cook.ea:substructure-using:94,
author = "D. J. Cook and L. B. Holder",
title = "Substructure Discovery Using Minimum Description
Length and Background Knowledge",
journal = "JAIR",
year = "1994",
volume = "1",
pages = "231--255",
abstract = "The ability to identify interesting and repetitive
substructures is an essential component to discovering
knowledge in structural data. We describe a new version
of our SUBDUE substructure discovery system based on
the minimum description length principle. The SUBDUE
system discovers substructures that compress the
original data and represent structural concepts in the
data. By replacing previously-discovered substructures
in the data, multiple passes of SUBDUE produce a
hierarchical description of the structural regularities
in the data. SUBDUE uses a computationally-bounded
inexact graph match that identifies similar, but not
identical, instances of a substructure and finds an
approximate measure of closeness of two substructures
when under computational constraints. In addition to
the minimum description length principle, other
background knowledge can be used by SUBDUE to guide the
search towards more appropriate substructures.
Experiments in a variety of domains demonstrate
SUBDUE's ability to find substructures capable of
compressing the original data and to discover
structural concepts important to the domain.",
annote = "The SUBDUE system discovers substructures that
compress the original data and represent structural
concepts in the data. By replacing
previously-discovered substructures in the data,
multiple passes of SUBDUE produce a hierarchical
description of the structural regularities in the
data.",
URL = "gopher://P.GP.CS.CMU.EDU:70/00/volume1/cook94a.ps",
}
What has Mill to Say About Data Mining ?, Tremaine A. O. Cornish and Anthony D. Elliman
@InProceedings{cornish.ea:what-has:95,
author = "Tremaine A. O. Cornish and Anthony D. Elliman",
title = "What has Mill to Say About Data Mining ?",
pages = "347--353",
booktitle = "Proceedings of the Eleventh Conference on Artificial
Intelligence for Applications",
month = "20--2~" # feb,
publisher = "IEEE Computer Society Press",
address = "Los Alamitos",
year = "1995",
}
Historical perspectives on information-science, T. A. O. Cornish
@Article{cornish:historical-perspectives:96,
author = "T. A. O. Cornish",
address = "Brunel Univ, Dept Comp Sci \& Informat Syst, Uxbridge
Ub8 3Ph, Middx, England",
title = "Historical perspectives on information-science",
journal = "Systems Research And Information Science",
year = "1996",
volume = "7",
issue = "2",
pages = "105--116",
abstract = "There is a general attitude in science and
particularly computer science, that if something is
more than five year old, then we have nothing to learn
from it. This paper seeks first to destroy the basis of
this myth with reference to areas of current research
which are still striving to live up to visions set many
years ago. Secondly to look at an area of research,
Knowledge Discovery in Databases and demonstrate that
it to has a great deal to learn from the distant past,
which has been all but overlooked.",
keywords = "KNOWLEDGE DISCOVERY, SYSTEMATIC, SCIENTIFIC, DATA
MINING, HISTORICAL, INFORMATION, SYSTEMS",
}
Data Mining of Multi-dimensional Remotely Sensed Images, Robert F. Cromp and William J. Campbell
@InProceedings{cromp.ea:multi-dimensional-remotely:93,
author = "Robert F. Cromp and William J. Campbell",
title = "Data Mining of Multi-dimensional Remotely Sensed
Images",
pages = "471--480",
editor = "Bharat Bhargava and Timothy Finin and Yelena Yesha",
booktitle = "Proceedings of the 2nd International Conference on
Information and Knowledge Management",
month = nov,
publisher = "ACM Press",
address = "New York, NY, USA",
year = "1993",
}
Knowledge Discovery in Databases: Exploiting Knowledge-Level Redescription, J. Cupit and N. Shadbolt
@Article{cupit.ea:exploiting-knowledge-level:96a,
key_modifier = "a",
author = "J. Cupit and N. Shadbolt",
title = "Knowledge Discovery in Databases: Exploiting
Knowledge-Level Redescription",
journal = "Lecture Notes in Computer Science",
volume = "1076",
pages = "245--??",
year = "1996",
ISSN = "0302-9743",
}
Knowledge Discovery in Databases: Exploiting Knowledge-Level Redescription, James Cupit and Nigel Shadbolt
@InProceedings{cupit.ea:exploiting-knowledge-level:96b,
key_modifier = "b",
author = "James Cupit and Nigel Shadbolt",
title = "Knowledge Discovery in Databases: Exploiting
Knowledge-Level Redescription",
pages = "245--261",
editor = "Nigel Shadbolt and Kieron O'Hara and Schreiber Guus",
booktitle = "Proceedings of the Nineth European Knowledge
Acquisition Workshop ({EKAW}-96)",
month = may # "14--17~",
series = "LNAI",
volume = "1076",
publisher = "Springer",
address = "Berlin",
year = "1996",
}
Mining Knowledge in Noisy Audio Data, Andrzej Czyzewski
@InProceedings{czyzewski:noisy-audio:96,
title = "Mining Knowledge in Noisy Audio Data",
pages = "220",
author = "Andrzej Czyzewski",
crossref = "simoudis.ea:proceedings-second:96",
}
Distributed learning: An agent-based approach to data-mining, Winton Davies and Peter Edwards
@InProceedings{davies.ea:distributed-learning:95,
title = "Distributed learning: {A}n agent-based approach to
data-mining",
author = "Winton Davies and Peter Edwards",
booktitle = "Working Notes of the ICML '95 Workshop on Agents that
Learn from Other Agents",
year = "1995",
address = "Tahoe City, CA",
editor = "Diana Gordon",
}
Knowledge discovery in an infrared database, B. J. Debska and B. Guzowskaswider
@Article{debska.ea:infrared-database:97,
author = "B. J. Debska and B. Guzowskaswider",
address = "Rzeszow Univ Technol, Dept Comp Chem, 6 Powstancow
Warszawy Av, Pl-35041 Rseszow, Poland",
title = "Knowledge discovery in an infrared database",
journal = "Computers \& Chemistry",
year = "1997",
volume = "21",
issue = "1",
pages = "51--59",
abstract = "The paper describes a process of knowledge acquisition
in the collection of infrared spectra (infrared
database). In fact it is a strategy for the automated
generation of correlation tables, i.e. correlations
between specific molecular subunits (substructures,
chemical groups) and their absorption frequencies. The
data in the tables are subsequently converted
automatically into rules that can be used to infer the
existence of molecular substructures from the IR
spectrum of an analysed compound. Copyright (C) 1996
Elsevier Science Ltd",
keywords = "SYSTEM, STRUCTURE IDENTIFICATION, SPECTROSCOPY
METHODS, KNOWLEDGE DISCOVERY, RULE KNOWLEDGEBASE",
}
Technology Overview: A Report on Data Mining, K. Decker and S. Focardi
@TechReport{decker.ea:technology-overview:94,
URL = "http://www.cscs.ch/Official/PubTR95.html",
title = "Technology Overview: {A} Report on Data Mining",
author = "K. Decker and S. Focardi",
month = feb,
year = "1994",
}
Mining Multivariate Time-Series Sensor Data to Discover Behavior Envelopes, Dennis DeCoste
@InProceedings{decoste:multivariate-time-series:97,
title = "Mining Multivariate Time-Series Sensor Data to
Discover Behavior Envelopes",
author = "Dennis DeCoste",
pages = "151",
crossref = "heckerman.ea:proceedings-third:97",
}
Data Mining --- There's gold in those hills of data, E. X. Dejesus
@Article{dejesus:theres-gold:95,
author = "E. X. Dejesus",
address = "Univ Bath, Sch Math Sci, Bath Ba2 7Ay, Avon, England",
title = "Data Mining --- There's gold in those hills of data",
journal = "Byte",
year = "1995",
volume = "20",
issue = "10",
pages = "81--81",
}
Clausal discovery, L. Deraedt and L. Dehaspe
@Article{deraedt.ea:clausal:97,
author = "L. Deraedt and L. Dehaspe",
address = "Katholieke Univ Leuven, Dept Comp Sci, Celestijnenlaan
200A, B-3001 Heverlee, Belgium",
title = "Clausal discovery",
journal = "Machine Learning",
year = "1997",
volume = "26",
issue = "2-3",
pages = "99--146",
abstract = "The clausal discovery engine CLAUDIEN is presented.
CLAUDIEN is an inductive logic programming engine that
fits in the descriptive data mining paradigm. CLAUDIEN
addresses characteristic induction from
interpretations, a task which is related to existing
formalisations of induction in logic. In characteristic
induction from interpretations, the regularities are
represented by clausal theories, and the data using
Herbrand interpretations. Because CLAUDIEN uses clausal
logic to represent hypotheses, the regularities induced
typically involve multiple relations or predicates.
CLAUDIEN also employs a novel declarative bias
mechanism to define the set of clauses that may appear
in a hypothesis.",
keywords = "inductive logic programming, knowledge discovery in
databases, data mining, learning, induction, semantics
for induction, logic of induction, parallel learning",
}
An Interactive Visualization Environment for Data Exploration, Mark Derthick and John Kolojejchick and Steven F. Roth
@InProceedings{derthick.ea:interactive-environment:97,
title = "An Interactive Visualization Environment for Data
Exploration",
author = "Mark Derthick and John Kolojejchick and Steven F.
Roth",
pages = "2",
crossref = "heckerman.ea:proceedings-third:97",
abstract = "
Abstract-Driven Pattern Discovery in Databases, V. Dhar and A. Tuzhilin
@Article{dhar.ea:abstract-driven-pattern:93,
author = "V. Dhar and A. Tuzhilin",
title = "Abstract-Driven Pattern Discovery in Databases",
journal = "IEEE Transactions on Knowledge and Data Engineering",
pages = "926--938",
volume = "5",
number = "6",
month = dec,
year = "1993",
}
A comparative review of selected methods for learning from examples, Thomas G. Dietterich and Ryszard S. Michalski
@InCollection{dietterich.ea:comparative-review:83,
author = "Thomas G. Dietterich and Ryszard S. Michalski",
title = "A comparative review of selected methods for learning
from examples",
pages = "41--81",
crossref = "michalski.ea:machine-learning:83",
}
A comparison of ID3 and backpropagation for English text-to-speech mapping. (Preprint), T. G. Dietterich and H. Hild and G. Bakiri
@Article{dietterich.ea:comparison-id3:95,
author = "T. G. Dietterich and H. Hild and G. Bakiri",
title = "A comparison of {ID3} and backpropagation for English
text-to-speech mapping. (Preprint)",
year = "1995",
URL = "ftp://ftp.cs.orst.edu/users/t/tgd/papers/mlj-nettalk.ps.gz",
}
Efficient Specific-to-General Rule Induction, Pedro Domingos
@InProceedings{domingos:efficient-specific-to-general:96,
title = "Efficient Specific-to-General Rule Induction",
pages = "319",
author = "Pedro Domingos",
crossref = "simoudis.ea:proceedings-second:96",
}
Linear-Time Rule Induction, Pedro Domingos
@InProceedings{domingos:linear-time-rule:96,
title = "Linear-Time Rule Induction",
pages = "96",
author = "Pedro Domingos",
crossref = "simoudis.ea:proceedings-second:96",
}
Why Does Bagging Work? A Bayesian Account and its Implications, Pedro Domingos
@InProceedings{domingos:why-does:97,
title = "Why Does Bagging Work? {A} Bayesian Account and its
Implications",
author = "Pedro Domingos",
pages = "155",
crossref = "heckerman.ea:proceedings-third:97",
}
Chemistry facing the phenomena of data mining idea mining and knowledge recovery, H. Dou
@Article{dou:chemistry-facing:96,
author = "H. Dou",
address = "Univ Aix Marseille 3, Crrm, Ctr St Jerome, F-13397
Marseille 20, France",
title = "Chemistry facing the phenomena of data mining idea
mining and knowledge recovery",
journal = "Analusis",
year = "1996",
volume = "24",
issue = "2",
pages = "M 8--M 12",
keywords = "LAW",
}
Use of artificial-intelligence techniques for the description of processes in ni/al multilayers, M. Drobnic and M. Mozetic and T. Mozetic and M. Gams
@Article{drobnic.ea:use-artificial-intelligence:96,
author = "M. Drobnic and M. Mozetic and T. Mozetic and M. Gams",
address = "Jozef Stefan Inst, Jamova 39, Ljubljana 1001, Slovenia
Inst Surface Engn \& Optoelect, Ljubljana 1001,
Slovenia High Med Coll, Ljubljana 1001, Slovenia",
title = "Use of artificial-intelligence techniques for the
description of processes in ni/al multilayers",
journal = "Surface \& Coatings Technology",
year = "1996",
volume = "84",
issue = "1-3",
pages = "491--494",
abstract = "Knowledge discovery is a novel research area in the
field of artificial intelligence. Its aim is to
discover empirical laws that govern the behavior of
complex systems using measurements of system variables.
In this paper a brief description of the GOLDHORN
knowledge discovery system is presented. GOLDHORN
discovers differential equations and has features for
handling noisy data, including some digital filters. In
the present case, this method was used to describe
analytically atomic migration in thin layers. A
multilayer structure of nickel and aluminum was
deposited on a copper substrate using the triode
sputtering system and hollow cathode CVD plasma
deposition. The composition of the elements in the
deposited layers was determined by Auger electron
spectroscopy (AES). The structure was then annealed for
different times. After annealing, the samples were
analyzed again. The AES data were then analyzed by the
GOLDHORN software package in order to obtain an
analytical description of atomic migration as a
function of the relative concentration of elements in a
layer. The analysis shows that the rate of migration of
Al in Ni depends on the relative concentrations of the
elements. Different phases appeared to be indicated via
the changes in the slope of the curve. Our results show
that knowledge discovery is a very useful tool for
analyzing complex processes such as atomic migration in
multilayer systems.",
keywords = "INTERFACE, MULTILAYER STRUCTURES, KNOWLEDGE
DISCOVERY",
}
Fast Committee Machines for Regression and Classification, Harris Drucker
@InProceedings{drucker:fast-committee:97,
title = "Fast Committee Machines for Regression and
Classification",
author = "Harris Drucker",
pages = "159",
crossref = "heckerman.ea:proceedings-third:97",
}
Cluster analysis: a survey, Benjamin S. Duran and Patrick L. Odell
@Book{duran.ea:cluster-analysis:74,
author = "Benjamin S. Duran and Patrick L. Odell",
title = "Cluster analysis: a survey",
year = "1974",
publisher = "Spinger-Verlag",
series = "Lecture Notes in Economics and Mathematical Systems",
volume = "100",
}
Discovering dynamics, S. D\vzeroski and L. Todorovski
@InProceedings{dzeroski.ea:discovering-dynamics:93,
author = "S. D\v{z}eroski and L. Todorovski",
title = "Discovering dynamics",
booktitle = "Proceedings of the AAAI-93 Workshop on Knowledge
Discovery in Databases",
pages = "125--137",
publisher = "AAAI Press",
year = "1993",
}
Inductive logic programming and knowledge discovery in databases, S. D\vzeroski
@InCollection{dzeroski:inductive-logic:95,
author = "S. D\v{z}eroski",
title = "Inductive logic programming and knowledge discovery in
databases",
editor = "U. Fayyad and G. Piatetsky-Shapiro and P. Smyth and R.
Uthurusamy",
booktitle = "Advances in Knowledge Discovery and Data Mining",
pages = "118--152",
year = "1995",
publisher = "The MIT Press",
}
Interactive Data Visualization at AT\&T Bell Labs, Stephen G. Eick and Brian S. Johnson
@InProceedings{eick.ea:interactive-at:95,
author = "Stephen G. Eick and Brian S. Johnson",
title = "Interactive Data Visualization at {AT}\&{T} Bell
Labs",
booktitle = "Proceedings of ACM CHI'95 Conference on Human Factors
in Computing Systems",
URL = "http://www.acm.org/sigchi/chi95/proceedings/demos/bsj\_bdy.htm",
series = "Demonstrations: Visualization",
volume = "2",
pages = "17--18",
year = "1995",
copyright = "(c) Copyright 1995 Association for Computing
Machinery",
keywords = "Visualization, Graphic interaction, Abstract data
visualization, Database visualization, Data mining",
abstract = "Visualization is a key technology for understanding
large bodies of data. Our approach to visualizing
abstract, non-geometric data involves a
reduced-representation overview, multiple linked views,
filtering and focusing techniques to reduce visual
clutter, color, and a highly-interactive user
interface. The reduced representations allow users to
see the entire data set in one view while still
providing immediate access to relevant detail and
answers to specific questions in the linked views. We
have developed a software infrastructure embodying our
design principles for producing novel, high-bandwidth
visualizations of corporate datasets. Our approach to
abstract data visualization is one the best off-ramps
on the information superhighway.",
}
Essay: Anne Eisenberg --- Data mining and privacy invasion on the Net, Anne Eisenberg
@Article{eisenberg:essay-anne:96,
author = "Anne Eisenberg",
title = "Essay: Anne Eisenberg --- Data mining and privacy
invasion on the Net",
journal = "Scientific American",
volume = "274",
number = "3",
pages = "120--??",
month = mar,
year = "1996",
ISSN = "0036-8733",
}
In Defence of C4.5 Notes on Learning One-Level Decision Trees, Tapio Elomaa
@Article{elomaa:defence-c4:,
URL = "ftp://ftp.cs.helsinki.fi/pub/Reports/by_Project/PMDM/In_Defence_of_C4.5__Notes_on_Learning_One-Level_Decision_Trees.ps.gz",
author = "Tapio Elomaa",
title = "In Defence of {C4}.5 Notes on Learning One-Level
Decision Trees",
note = "To appear in W. Cohen \& H. Hirsh (eds.), Machine
Learning: Proceedings of the Eleventh International
Conference.(New Brunswick NJ, July 1994.) Morgan
Kaufmann, San Francisco CA.",
abstract = "We discuss the implications of Holte's recently
published article, which demonstrated that on the most
commonly used data very simple classification rules are
almost as accurate as decision trees produced by
Quinlan's C4.5. We consider, in particular, what is the
significance of Holte's results for the future of
top-down induction of decision trees. To an extent,
Holte questioned the sense of further research on
multilevel decision tree learning. We go in detail
through all the parts of Holte's study. We try to put
the results into perspective. We argue that the (in
absolute terms) small difference in accuracy between 1R
and C4.5 that was witnessed by Holte is still
significant. We claim that C4.5 possesses additional
accuracy-related advantages over 1R. In addition we
discuss the representativeness of the databases used by
Holte. We compare empirically the optimal accuracies of
multilevel and one-level decision trees and observe
some significant differences. We point out several
deficiencies of limited-complexity classifiers.",
}
, Werner Emde and Dierich Wettschereck and Stefan Wrobel
@Article{emde.ea:uberblick:96,
author = "Werner Emde and Dierich Wettschereck and Stefan
Wrobel",
title = "Data Mining - Ein {\"U}berblick",
journal = "Unix/Mail",
year = "1996",
note = "to appear",
}
A Guided Tour through the Data Mining Jungle, Robert Engels and Guido Lindner and Rudi Studer
@InProceedings{engels.ea:guided-tour:97,
title = "A Guided Tour through the Data Mining Jungle",
author = "Robert Engels and Guido Lindner and Rudi Studer",
pages = "163",
crossref = "heckerman.ea:proceedings-third:97",
}
Planning Tasks for Knowledge Discovery in Databases; Performing Task-Oriented User-Guidance, Robert Engels
@InProceedings{engels:planning-tasks:96,
title = "Planning Tasks for Knowledge Discovery in Databases;
Performing Task-Oriented User-Guidance",
pages = "170",
author = "Robert Engels",
crossref = "simoudis.ea:proceedings-second:96",
}
Evaluation und Erweiterung eines Verfahrens zum Finden von Regelmaessigkeiten in relationalen Datenbanken, Stefan Escher
@TechReport{escher:evaluation-und:97,
author = "Stefan Escher",
title = "Evaluation und Erweiterung eines Verfahrens zum Finden
von Regelmaessigkeiten in relationalen Datenbanken",
institution = "Universitaet Stuttgart, Fakultaet Informatik,
Germany",
number = "DIP-1444",
month = jan # " 1",
year = "1997",
keywords = "ILP, Data Mining, Knowledge Discovery in Databases",
URL = "ftp://ftp.informatik.uni-stuttgart.de/pub/library/ncstrl.ustuttgart_fi/DIP-1444/DIP-1444.ps.gz",
abstract = "In den letzten Jahren wurden die Techniken zur
Datenerhebung und Speicherung stark weiterentwickelt.
Zum Beispiel fuehren Barcodes auf nahezu allen
Produkten und die Automatisierung von Betriebsablaeufen
zu immer groesseren Datenmengen, die interpretiert
werden muessen. Das Problem liegt darin, dass eine
grosse Menge von Information vorhanden ist, das darin
enthaltene Wissen jedoch aufgrund der grossen
Datenmenge nicht zugaenglich ist. Daraus ergibt sich
die Notwendigkeit zur Entdeckung von Wissen in grossen
Datenbanken (Knowledge Discovery in Databases, Data
Mining). Grundlage des in dieser Diplomarbeit
vorgestellten Verfahrens ist das angenaeherte
nichtmonotone ILP (Inductive Logic Programming).
Gefunden werden Hornformeln, wobei eine Menge von
Rumpfliteralen vom Benutzer angegeben werden muss. Die
Qualitaet von gefundenen Klauseln wird von den
Messwerten Support und Confidence bestimmt.
Hauptsaechlich beschaeftigt sich die Diplomarbeit mit
der Erweiterung eines bestehenden ILP-Verfahrens um
eine Komponente, die numerische Attribute behandeln
kann",
}
Refinement of Datalog Programs, F. Esposito and A. Laterza and D. Malerba and G. Semeraro
@InProceedings{esposito.ea:refinement-datalog:96,
author = "F. Esposito and A. Laterza and D. Malerba and G.
Semeraro",
title = "Refinement of {D}atalog Programs",
booktitle = "Proceedings of the MLnet Familiarization Workshop on
Data Mining with Inductive Logic Programing",
pages = "73--94",
year = "1996",
}
A Density-Based Algorithm for Discovering Clusters in Large Spatial Databases with Noise, Martin Ester and Hans-Peter Kriegel and Jorg Sander and Xiaowei Xu
@InProceedings{ester.ea:density-based-algorithm:96,
title = "A Density-Based Algorithm for Discovering Clusters in
Large Spatial Databases with Noise",
pages = "226",
author = "Martin Ester and Hans-Peter Kriegel and Jorg Sander
and Xiaowei Xu",
crossref = "simoudis.ea:proceedings-second:96",
}
Density-Connected Sets and their Application for Trend Detection in Spatial Databases,
@InProceedings{ester.ea:density-connected-sets:97,
title = "Density-Connected Sets and their Application for Trend
Detection in Spatial Databases",
author = "Martin Ester and Hans-Peter Kriegel and J{\"{o}}rg
Sander and Xiaowei Xu",
pages = "10",
crossref = "heckerman.ea:proceedings-third:97",
}
Knowledge discovery in large spatial databases - focusing techniques for efficient class identification, M. Ester and H. P. Kriegel and X. W. Xu
@Article{ester.ea:large-spatial:95,
author = "M. Ester and H. P. Kriegel and X. W. Xu",
address = "Univ Munich, Inst Comp Sci, Leopoldstr 11B, D-80802
Munich, Germany",
title = "Knowledge discovery in large spatial databases -
focusing techniques for efficient class
identification",
journal = "Lecture Notes In Computer Science",
year = "1995",
volume = "951",
pages = "67--82",
abstract = "Both, the number and the size of spatial databases are
rapidly growing because of the large amount of data
obtained from satellite images, X-ray crystallography
or other scientific equipment. Therefore, automated
knowledge discovery becomes more and more important in
spatial databases. So far, most of the methods for
knowledge discovery in databases (KDD) have been based
on relational database systems. In this paper, we
address the task of class identification in spatial
databases using clustering techniques. We put special
emphasis on the integration of the discovery methods
with the DB interface, which is crucial for the
efficiency of KDD on large databases. The key to this
integration is the use of a well-known spatial access
method, the R*-tree. The focusing component of a KDD
system determines which parts of the database are
relevant for the knowledge discovery task. We present
several strategies for focusing: selecting
representatives from a spatial database, focusing on
the relevant clusters and retrieving all objects of a
given cluster. We have applied the proposed techniques
to real data from a large protein database used for
predicting protein-protein docking. A performance
evaluation on this database indicates that clustering
on large spatial databases can be performed, both,
efficiently and effectively.",
keywords = "PROTEIN",
}
Discovering Functional and Inclusion Dependancies in Relational Databases, Martti Kantola etal.
@Article{etal:discovering-functional:92,
crossref = "ijis-special-issue:92",
author = "Martti Kantola etal.",
title = "Discovering Functional and Inclusion Dependancies in
Relational Databases",
pages = "591--607",
}
Overcoming Process Delays with Decision Tree Induction, Bob Evans and Doug Fisher
@Article{evans.ea:overcoming-process:94,
author = "Bob Evans and Doug Fisher",
title = "Overcoming Process Delays with Decision Tree
Induction",
journal = "IEEE Expert",
year = "1994",
pages = "60--66",
month = feb,
keywords = "Knowledge Acquisition, Decision Trees, ID3",
}
CLARIT, David A. Evans
@InProceedings{evans:clarit:95,
author = "David A. Evans",
title = "{CLARIT}",
booktitle = "Proceedings of the Eighteenth Annual International ACM
SIGIR Conference on Research and Development in
Information Retrieval",
series = "Systems Demonstrations: Abstracts",
pages = "360",
year = "1995",
copyright = "(c) Copyright 1995 Association for Computing
Machinery",
abstract = "The CLARIT system consists of a set of flexible tools
for application in a wide range of information
management problems. These tools integrate
natural-language processing (NLP), automatic knowledge
discovery, and traditional information retrieval
techniques. An advanced functionality application for
free-text database management is demonstrated,
incorporating full NLP, a broad range of querying
mechanisms, automatic or user controlled query
expansion, document collection profiling, document
summarization, automatic document classification, and
integrated handling of scanned images. The application
provides rapid analysis of potentially large queries
over large-scale databases in monolithic or
client/server processing modes.",
}
Constructing bayesian networks to predict uncollectible telecommunications accounts, K. J. Ezawa and S. W. Norton
@Article{ezawa.ea:constructing-bayesian:96,
author = "K. J. Ezawa and S. W. Norton",
address = "At\&T Bell Labs, Consumer Lab, Tech Staff, 600 Mt Ave,
Rm 7E-523, Murray Hill, Nj, 07974",
title = "Constructing bayesian networks to predict
uncollectible telecommunications accounts",
journal = "Ieee Expert-Intelligent Systems \& Their
Applications",
year = "1996",
volume = "11",
issue = "5",
pages = "45--51",
keywords = "EXPERT-SYSTEMS",
}
Interaction Selection and Complexity Control for Learning in Binarized Domains, Gerald Fahner
@TechReport{fahner:interaction-selection:96,
author = "Gerald Fahner",
title = "Interaction Selection and Complexity Control for
Learning in Binarized Domains",
institution = "International Computer Science Institute",
number = "TR-96-001",
address = "Berkeley, CA",
month = may,
year = "1996",
keywords = "learning algorithms, feature selection,
Walsh-functions, input-space representation, complexity
measures, capacity control, model comparison",
abstract = "We empirically investigate the potential of a novel,
greatly simplified classifier design for binarized
data. The generic model allocates a sparse, _digital_
hidden layer comprised of interaction nodes that
compute PARITY of selected submasks of input bits,
followed by a sigmoidal output node with adjustable
weights. Model identification incorporates
user-assigned complexity preferences. We discuss the
situations: a) when the input space obeys a metrics b)
when the inputs are discrete attributes We propose a
family of respective model priors that make search
through the combinatorial space of multi-input
interactions feasible. Model capacity and smoothness of
the approximation are controlled by two complexity
parameters. Model comparison over the parameter plane
discovers models with excellent performance. In some
cases interpretable structures are achieved. We point
out the significance of our novel data mining tool for
overcoming scaling problems, impacts on real-time
systems, and possible contributions to the development
of non-standard computing devices for inductive
inference.",
}
Data Mining with Sparse and Simplified Interaction Selection, Gerald Fahner
@InProceedings{fahner:with-sparse:96,
title = "Data Mining with Sparse and Simplified Interaction
Selection",
pages = "359",
author = "Gerald Fahner",
crossref = "simoudis.ea:proceedings-second:96",
}
FastMap: A Fast Algorithm for Indexing, Data-Mining and Visualization of Traditional and Multimedia Datasets, Christos Faloutsos and King-Ip Lin
@InProceedings{faloutsos.ea:fastmap-fast:95a,
key_modifier = "a",
title = "{FastMap}: {A} Fast Algorithm for Indexing,
Data-Mining and Visualization of Traditional and
Multimedia Datasets",
author = "Christos Faloutsos and King-Ip Lin",
editor = "Michael J. Carey and Donovan A. Schneider",
booktitle = "Proceedings of the 1995 {ACM} {SIGMOD} International
Conference on Management of Data",
address = "San Jose, California",
month = "22--25~" # may,
year = "1995",
pages = "163--174",
}
FastMap: a fast algorithm for indexing, data-mining and visualization of traditional and multimedia datasets, C. Faloutsos and King-Ip Lin
@Article{faloutsos.ea:fastmap-fast:95b,
key_modifier = "b",
author = "C. Faloutsos and King-Ip Lin",
title = "{FastMap}: a fast algorithm for indexing, data-mining
and visualization of traditional and multimedia
datasets",
journal = "SIGMOD Record (ACM Special Interest Group on
Management of Data)",
volume = "24",
number = "2",
pages = "163--174",
month = jun,
year = "1995",
ISSN = "0163-5808",
affiliation = "AT\&T Bell Labs., Murray Hill, NJ, USA",
classification = "C6160 (Database management systems (DBMS)); C6170K
(Knowledge engineering techniques); C7240 (Information
analysis and indexing); C6160S (Spatial and pictorial
databases); C4240 (Programming and algorithm theory)",
keywords = "FastMap; Fast algorithm; Indexing; Data-mining;
Visualization; Multimedia datasets; Traditional
datasets; Feature-extraction functions; Domain expert;
Highly fine-tuned spatial access methods; Best-match
query; K-dimensional space; Potential clusters; Pattern
recognition",
thesaurus = "Feature extraction; Indexing; Knowledge acquisition;
Multimedia computing; Pattern matching",
}
FastMap: A Fast Algorithm for Indexing, Data-Mining and Visualization of Traditional and Multimedia Datasets, Christos Faloutsos and King-Ip (David) Lin
@TechReport{faloutsos.ea:fastmap-fast:95c,
key_modifier = "c",
author = "Christos Faloutsos and King-Ip (David) Lin",
title = "FastMap: {A} Fast Algorithm for Indexing, Data-Mining
and Visualization of Traditional and Multimedia
Datasets",
institution = "University of Maryland Institute for Advanced Computer
Studies Dept. of Computer Science, Univ. of Maryland",
number = "CS-TR-3383",
address = "College Park, MD",
month = jan,
year = "1995",
URL = "ftp://ftp.cs.umd.edu/pub/papers/papers/3383/3383.ps.Z",
abstract = "A very promising idea for fast searching in
traditional and multimedia databases is to map objects
into points in k-d space, using k feature-extraction
functions, provided by a domain expert rJag91]. Thus.
we can subsequently use highly fine-tuned spatia l
access methods (SAMs), to answer several types of
queries, including the 'Query By Example' type (which
translates to a range query); the 'all pairs' query
(which translates to a spatial join [BKSS94]); the
nearest-neighbor or best-match query, etc. \par
However, designing feature extraction functions can be
hard. It is relatively easier for a domain expert to
assess the similarity/distance of two objects. Given
only the distance information though, it is not obvious
how to map objects into points. \par This is exactly
the topic of this paper. We describe a fast algorithm
to map objects into points in some k-dimensional space
(k is user-defined), such that the dissimilarities are
preserved. There are two benefits from this mapping:
(a) efficient retriev al, in conjunction with a SAM, as
discussed before and (b) visualization and data-mining:
the objects can now be plotted as points in 2-d or Sd
space, revealing potential clusters, correlations among
attributes and other regularities that data-mining is l
ooking for. \par We introduce an older method from
pattern recognition, namely, Multi-Dimcnsional Scaling
(MDS) [Tor52]; although unsuitable for indexing, we use
it as yardstick for our method. Then, we propose a much
faster algorithm to solve the problem in hand, while in
addition it allows for indexing. Experiments on real
and synthetic data indeed show that the proposed
algorithm is significantly faster than MDS, (being
linear, as opposed to quadratic, on the database size
N), while it manages to preserve distances an d the
overall structure of the data-set. \par (Also
cross-referenced as UMIACS-TR-94-132)",
}
Combining Data Mining and Machine Learning for Effective User Profiling, Tom Fawcett and Foster Provost
@InProceedings{fawcett.ea:combining-machine:96,
title = "Combining Data Mining and Machine Learning for
Effective User Profiling",
page = "8",
author = "Tom Fawcett and Foster Provost",
crossref = "simoudis.ea:proceedings-second:96",
}
Advances in Knowledge Discovery and Data Mining, U. M. Fayyad and G. Piatetsky-Shapiro and P. Smyth and R. Uthurusamy (Eds)
@Book{fayyad.ea:advances:96,
editor = "U. M. Fayyad and G. Piatetsky-Shapiro and P. Smyth and
R. Uthurusamy",
title = "Advances in Knowledge Discovery and Data Mining",
publisher = "MII Press",
address = "Mento Park",
year = "1996",
ISBN = "0-262-56097-6",
descriptor = "Data Mining, Daten",
}
The Attribute Selection Problem in Decision Tree Generation, U. M. Fayyad and K. B. Irani
@InProceedings{fayyad.ea:attribute-selection:92,
author = "U. M. Fayyad and K. B. Irani",
title = "The Attribute Selection Problem in Decision Tree
Generation",
year = "1992",
booktitle = "Proc.\ of AAAI-92",
pages = "104--110",
}
Automated cataloging and analysis of ski survey image databases: the SKICAT system, Usama M. Fayyad and Nicholas Weir and S. Djorgovski
@InProceedings{fayyad.ea:automated-cataloging:93,
author = "Usama M. Fayyad and Nicholas Weir and S. Djorgovski",
title = "Automated cataloging and analysis of ski survey image
databases: the {SKICAT} system",
booktitle = "Proc. of the second Int. Conf. on Information and
Knowledge Management",
address = "Washington DC",
pages = "527--536",
year = "1993",
}
Data Mining and Knowledge Discovery in Databases, Usama Fayyad and Ramasamy Uthurusamy
@Article{fayyad.ea:data-mining:96,
author = "Usama Fayyad and Ramasamy Uthurusamy",
address = "Microsoft Corp, Res, Redmond, Wa, 98052 Gm Corp,
Knowledge \& Decis Support, Detroit, Mi, 48202",
title = "Data Mining and Knowledge Discovery in Databases",
journal = "Communications of the ACM",
volume = "39",
number = "11",
pages = "24--27",
month = nov,
year = "1996",
ISSN = "0001-0782",
}
From digitized images to online catalogs - data mining a sky survey, U. M. Fayyad and S. G. Djorgovski and N. Weir
@Article{fayyad.ea:digitized-images:96,
author = "U. M. Fayyad and S. G. Djorgovski and N. Weir",
address = "Microsoft Res, Redmond, Ca Caltech, Jpl, Machine
Learning Syst Grp, Pasadena, Ca, 91125",
title = "From digitized images to online catalogs - data mining
a sky survey",
journal = "Ai Magazine",
year = "1996",
volume = "17",
issue = "2",
pages = "51--66",
abstract = "The value of scientific digital-image libraries seldom
lies in the pixels of images. For large collections of
images, such as those resulting from astronomy sky
surveys, the typical useful product is an online
database cataloging entries of interest. We focus on
the automation of the cataloging effort of a major sky
survey and the availability of digital libraries in
general. The SKICAT system automates the reduction and
analysis of the three terabytes worth of images,
expected to contain on the order of 2 billion sky
objects. For the primary scientific analysis of these
data, it is necessary to detect, measure, and classify
every sky object. SKICAT integrates techniques for
image processing, classification learning, database
management, and visualization. The learning algorithms
are trained to classify the detected objects and can
classify objects too faint for visual classification
with an accuracy level exceeding 90 percent. This
accuracy level increases the number of classified
objects in the final catalog threefold relative to the
best results from digitized photographic sky surveys to
date. Hence, learning algorithms played a powerful and
enabling role and solved a difficult, scientifically
significant problem, enabling the consistent, accurate
classification and the ease of access and analysis of
an otherwise unfathomable data set.",
}
Mining Scientific Data, Usama Fayyad and David Haussler and Paul Stolorz
@Article{fayyad.ea:scientific:96,
author = "Usama Fayyad and David Haussler and Paul Stolorz",
title = "Mining Scientific Data",
journal = "Communications of the ACM",
volume = "39",
number = "11",
pages = "51--57",
month = nov,
year = "1996",
ISSN = "0001-0782",
}
From data mining to knowledge discovery in databases, U. Fayyad and G. Piatetsky-Shapiro and P. Smyth
@Article{fayyad.ea:to:96,
author = "U. Fayyad and G. Piatetsky-Shapiro and P. Smyth",
address = "Univ Calif Irvine, Dept Comp \& Informat Sci, Irvine,
Ca, 92717 Gte Labs Inc, Knowledge Discovery Databases
Kdd Project, Tech Staff, Waltham, Ma, 02254",
title = "From data mining to knowledge discovery in databases",
journal = "Ai Magazine",
year = "1996",
volume = "17",
issue = "3",
pages = "37--54",
abstract = "Data mining and knowledge discovery in databases have
been attracting a significant amount of research,
industry, and media attention of late. What is all the
excitement about? This article provides an overview of
this emerging field, clarifying how data mining and
knowledge discovery in databases are related both to
each other and to related fields, such as machine
learning, statistics, and databases. The article
mentions particular real-world applications, specific
data-mining techniques, challenges involved in real-
world applications of knowledge discovery, and current
and future research directions in the field.",
keywords = "NEURAL NETWORKS",
}
Knowledge Discovery and Data Mining: Towards a Unifying Framework, Usama Fayyad and Gregory Piatetsky-Shapiro and Padhraic Smyth
@InProceedings{fayyad.ea:towards-unifying:96,
title = "Knowledge Discovery and Data Mining: Towards a
Unifying Framework",
pages = "82",
author = "Usama Fayyad and Gregory Piatetsky-Shapiro and
Padhraic Smyth",
crossref = "simoudis.ea:proceedings-second:96",
}
Data Mining and Knowledge Discovery in Databases: Applications in Astronomy and Planetary Science (Invited Talk), Usama M. Fayyad
@InProceedings{fayyad:applications-astronomy:96a,
key_modifier = "a",
author = "Usama M. Fayyad",
title = "Data Mining and Knowledge Discovery in Databases:
Applications in Astronomy and Planetary Science
(Invited Talk)",
pages = "1590--1592",
booktitle = "Proceedings of the Thirteenth National Conference on
Artificial Intelligence and the Eighth Innovative
Applications of Artificial Intelligence Conference",
month = aug # "4--8~",
publisher = "AAAI Press / MIT Press",
address = "Menlo Park",
year = "1996",
}
Data mining and knowledge discovery - making sense out of data, U. M. Fayyad
@Article{fayyad:making-sense:96b,
key_modifier = "b",
author = "U. M. Fayyad",
address = "Microsoft Res, 1 Microsoft Way 9-S, Redmond, Wa,
98052",
title = "Data mining and knowledge discovery - making sense out
of data",
journal = "Ieee Expert-Intelligent Systems \& Their
Applications",
year = "1996",
volume = "11",
issue = "5",
pages = "20--25",
}
Learning from Biased Data Using Mixture Models, A. J. Feelders
@InProceedings{feelders:learning-biased:96,
title = "Learning from Biased Data Using Mixture Models",
pages = "102",
author = "A. J. Feelders",
crossref = "simoudis.ea:proceedings-second:96",
}
Mining Associations in Text in the Presence of Background Knowledge, Ronen Feldman and Haym Hirsh
@InProceedings{feldman.ea:associations-text:96,
title = "Mining Associations in Text in the Presence of
Background Knowledge",
pages = "343",
author = "Ronen Feldman and Haym Hirsh",
crossref = "simoudis.ea:proceedings-second:96",
}
Maximal Association Rules: A New Tool for Mining for Keyword Co-Occurrences in Document Collections, Ronen Feldman and Yonatan Aumann and Amihood Amir and Willi Kloesgen Amir Zilberstein
@InProceedings{feldman.ea:maximal-association:97,
title = "Maximal Association Rules: {A} New Tool for Mining for
Keyword Co-Occurrences in Document Collections",
author = "Ronen Feldman and Yonatan Aumann and Amihood Amir and
Willi Kloesgen Amir Zilberstein",
pages = "167",
crossref = "heckerman.ea:proceedings-third:97",
}
Visualization Techniques to Explore Data Mining Results for Document Collections,
@InProceedings{feldman.ea:techniques-to:97,
title = "Visualization Techniques to Explore Data Mining
Results for Document Collections",
author = "Ronen Feldman and Willi Kl{\"{o}}sgen and Amir
Zilberstein",
pages = "16",
crossref = "heckerman.ea:proceedings-third:97",
}
Applying AI Clustering to Engineering Tasks., Doug Fisher and etal.
@Article{fisher.ea:applying-ai:93,
author = "Doug Fisher and etal.",
title = "Applying {AI} Clustering to Engineering Tasks.",
journal = "IEEE Expert",
year = "1993",
pages = "51--60",
month = dec,
keywords = "COBWEB, Clustering, Applications, Time Series",
annote = "Application of COBWEB to fault diagnosis, Bridge
design and human gait analysis. 17 references",
}
Iterative Optimization and Simplification of Hierarchical Clusterings, Doug Fisher
@TechReport{fisher:iterative-optimization:,
URL = "http://cswww.vuse.vanderbilt.edu/~dfisher/tech-reports/tr-95-01.html",
title = "Iterative Optimization and Simplification of
Hierarchical Clusterings",
number = "Technical Report CS-95-01",
author = "Doug Fisher",
abstract = "Clustering is often used for discovering structure in
data. Clustering systems differ in the objective
function used to evaluate clustering quality and the
control strategy used to search the space of
clusterings. Ideally, the search strategy should
consistently construct clusterings of high quality, but
be computationally inexpensive as well. In general, we
cannot have it both ways, but we can partition the
search so that a system inexpensively constructs a
`tentative' clustering for initial examination,
followed by iterative optimization, which continues to
search in background for improved clusterings. Given
this motivation, we evaluate an inexpensive `sorting'
strategy coupled with several control strategies for
iterative optimization, each of which repeatedly
modifies an initial clustering in search of a better
one. One of these optimization strategies, inspired by
work on macro-operator learning, appears to be novel in
the clustering literature. Once a clustering has been
constructed it is judged by analysts -- often according
to task-specific criteria. Several authors have
abstracted these criteria and posited a generic
performance task akin to pattern completion, where the
error rate over completed patterns is used to
`externally' judge clustering utility. Given this
performance task we adapt resampling-based pruning
strategies used by supervised learning systems to the
task of simplifying hierarchical clusterings, thus
promising to ease post-clustering analysis. Finally, we
propose a number of objective functions, based on
attribute-selection measures for decision-tree
induction, that might perform well on the error rate
and simplicity dimensions.",
keywords = "clustering, iterative optimization, cluster
validation, resampling, pruning, objective functions",
}
Iterative Optimization and Simplification of Hierarchical Clusterings, Doug Fisher
@Article{fisher:iterative-optimization:96,
URL = "http://cswww.vuse.vanderbilt.edu/~dfisher/jair-96/html-final/html-final.html",
title = "Iterative Optimization and Simplification of
Hierarchical Clusterings",
author = "Doug Fisher",
year = "1996",
journal = "Journal of Artificial Intelligence Research",
volume = "4",
pages = "147--180",
abstract = "
Inductive characterisation of database relations., P. A. Flach
@Article{flach:inductive-characterisation:90,
URL = "ftp://ftp.gmd.de/MachineLearning/ILP/public/papers/flach-ITKreport23.ps.Z",
title = "Inductive characterisation of database relations.",
author = "P. A. Flach",
year = "1990",
note = "In Proc. International Symposium on Methodologies for
Intelligent Systems, Z.W. Ras, M. Zemankowa \& M.L.
Emrich (eds.), pp. 371-378, North-Holland, Amsterdam.
ITK Research Report No. 23.",
}
10 hottest technologies in telecom, Patrick Flanagan
@Article{flanagan:10-hottest:96,
author = "Patrick Flanagan",
title = "10 hottest technologies in telecom",
journal = "Telecommunications (Americas Edition)",
volume = "30",
number = "5",
month = may,
year = "1996",
ISSN = "0278-4831",
classification = "716.1; 722.3; 723.1.1; 901",
journalabr = "Telecommunications Am Ed",
keywords = "Asynchronous transfer mode; Automated network
management; Cable modems; Computer networks; Computer
programming languages; Data mining; Electric relays;
Internet appliances; Intranet; Java programming
language; Local area networks; Modems; Personal
communication systems; Personal satellite phones;
Technology; Telecommunication; Telecommunication
systems; Telecommunication technology; Voice over frame
relay; Voice/data communication systems",
pages = "6",
}
A Genetic Algorithm-Based Approach to Data Mining, Ian W. Flockhart and Nicholas J. Radcliffe
@InProceedings{flockhart.ea:genetic-algorithm-based:96,
title = "A Genetic Algorithm-Based Approach to Data Mining",
pages = "299",
author = "Ian W. Flockhart and Nicholas J. Radcliffe",
crossref = "simoudis.ea:proceedings-second:96",
}
Inductive Learning for Expert Systems, Richard Forsyth
@InCollection{forsyth:inductive-learning:89,
author = "Richard Forsyth",
title = "Inductive Learning for Expert Systems",
booktitle = "Expert Systems Principles and Case Studies",
publisher = "Chapman and Hall, New York",
year = "1989",
}
Knowledge discovery in databases: an overview, W. J. Frawley and G. Piatetsky-Shapiro and C. J. Matheus
@InProceedings{frawley.ea:overview:91,
author = "W. J. Frawley and G. Piatetsky-Shapiro and C. J.
Matheus",
title = "Knowledge discovery in databases: an overview",
editor = "G. Piatetsky-Shapiro and W. J. Frawley",
booktitle = "Knowledge discovery in databases",
pages = "1--27",
publisher = "AAAI Press/MIT Press",
address = "Menlo Park, CA/Cambridge, MA",
year = "1991",
}
Knowledge Discovery in Databases: An Overview., W. Frawley and G. Piatetsky-Shapiro and C. Matheus
@Article{frawley.ea:overview:92a,
key_modifier = "a",
author = "W. Frawley and G. Piatetsky-Shapiro and C. Matheus",
title = "Knowledge Discovery in Databases: An Overview.",
journal = "AI Magazine",
year = "1992",
pages = "213--228",
month = "Fall (Autumn)",
abstract = "After a decade of fundamental interdisciplinary
research in machine learning, the spadework in this
field has been done; the 1990s should see the
widespread exploitation of knowledge discovery as an
aid to assembling knowledge bases. The contributors to
the AAAI Press book Knowledge Discovery in Databases
were excited at the potential benefits of this
research. The editors hope that some of this excitement
will communicate itself to AI Magazine readers of this
article.",
note = "Reprint of the introductory chapter of {\em Knowledge
Discovery in Databases} collection, AAAI/MIT Press,
1991.",
annote = "Conflicting page numbers in another bibtex entry!",
}
Knowledge discovery in databases - an overview, W. J. Frawley and G. Piatetsky-Shapiro and C. J. Matheus
@Article{frawley.ea:overview:92b,
key_modifier = "b",
author = "W. J. Frawley and G. Piatetsky-Shapiro and C. J.
Matheus",
address = "Gte Labs Inc, Distributed Cooperating Learning Syst
Project, Waltham, Ma, 02254 Gte Labs Inc, Knowledge
Discovery Databases Project, Waltham, Ma, 02254",
title = "Knowledge discovery in databases - an overview",
journal = "Ai Magazine",
year = "1992",
volume = "13",
issue = "3",
pages = "57--70",
abstract = "After a decade of fundamental interdisciplinary
research in machine learning, the spadework in this
field has been done; the 1990s should see the
widespread exploitation of knowledge discovery as an
aid to assembling knowledge bases. The contributors to
the AAAI Press book Knowledge Discovery in Databases
were excited at the potential benefits of this
research. The editors hope that some of this excitement
will communicate itself to AI Magazine readers of this
article.",
annote = "Conflicting page numbers in another bibtex entry!",
}
Using function to encode domain and contextual knowledge in statistical induction, W. Frawley
@Article{frawley:using-function:91,
crossref = "piatetsky-shapiro.ea:knowledge-discovery:91",
editor = "Gregory Piatetsky-Shapiro and William J. Frawley",
booktitle = "Knowledge Discovery in Databases",
publisher = "AAAI Press / The MIT Press",
address = "Menlo Park, California",
edition = "1st",
year = "1991",
author = "W. Frawley",
title = "Using function to encode domain and contextual
knowledge in statistical induction",
annote = "Details of the FBI system for decision tree
induction",
}
A data-parallel primitive for high-performance knowledge discovery in large databases, S. H. Freitas and A. A. Lavington
@TechReport{freitas.ea:data-parallel-primitive:95,
URL = "ftp://ftp.essex.ac.uk/pub/csc/technical-reports/CSM-242.ps.Z",
title = "A data-parallel primitive for high-performance
knowledge discovery in large databases",
author = "S. H. Freitas and A. A. Lavington",
number = "Internal Report CSM-242",
institution = "University of Essex, UK",
month = may,
year = "1995",
abstract = "Efficiency is crucial in KDD (Knowledge Discovery in
Databases), due to the huge amount of data stores in
current databases. We argue that high efficiency in KDD
can be achieved by combining two approaches, namely
encapsulating KDD functionally within standard DBMS
operations and using parallel processing. Hence, KDD
tasks can be executed on a back-end SQL server, e.g. a
parallel DB machine. We propose a KDD primitive (a set
of basic operations) which underlies the candidate-rule
evaluation procedures of many KDD algorithms. We
compare and analyse the time required to carry out this
primitive on three different computational
architecture, viz. a conventional workstation and two
parallel DB machines. The main advantages of
encapsulating a KDD primitive in a parallel DB server
are automatic parallelization and the run-time speed
which can be achieved through parallel processing.",
}
Parallel Data Mining for Very Large Relational Databases, A. A. Freitas and S. H. Lavington
@Article{freitas.ea:parallel-very:96,
author = "A. A. Freitas and S. H. Lavington",
title = "Parallel Data Mining for Very Large Relational
Databases",
journal = "Lecture Notes in Computer Science",
volume = "1067",
pages = "158--??",
year = "1996",
ISSN = "0302-9743",
}
Speeding up Knowledge Discovery in Large Relational Databases by Means of a New Discretization Algorithm, A. A. Freitas and S. H. Lavington
@Article{freitas.ea:speeding-up:96,
author = "A. A. Freitas and S. H. Lavington",
title = "Speeding up Knowledge Discovery in Large Relational
Databases by Means of a New Discretization Algorithm",
journal = "Lecture Notes in Computer Science",
volume = "1094",
pages = "124--??",
year = "1996",
ISSN = "0302-9743",
}
Lazy Decision Trees, Jerome Friedman and Ron Kohavi and Yeogirl Yun
@InProceedings{friedman.ea:lazy-decision:96,
author = "Jerome Friedman and Ron Kohavi and Yeogirl Yun",
title = "Lazy Decision Trees",
booktitle = "Proceedings of the Thirteenth National Conference on
Artificial Intelligence",
publisher = "AAAI Press and the MIT Press",
year = "1996",
pages = "717--724",
URL = "http://robotics.stanford.edu/users/ronnyk",
month = aug,
contributedby = "Ronny Kohavi, ronnyk(at)sgi.com",
}
Mining Optimized Association Rules for Numeric Attributes, T. Fukuda and Y. Morimoto and S. Morishita and T. Tokuyama
@InProceedings{fukuda.ea:optimized-association:96,
author = "T. Fukuda and Y. Morimoto and S. Morishita and T.
Tokuyama",
title = "Mining Optimized Association Rules for Numeric
Attributes",
editor = "{ACM}",
booktitle = "Proceedings of the Fifteenth {ACM}
{SIGACT}-{SIGMOD}-{SIGART} Symposium on Principles of
Database Systems, {PODS} 1996, Montr{\'e}al, Canada,
June 3--5, 1996",
volume = "15",
publisher = "ACM Press",
address = "New York, NY 10036, USA",
year = "1996",
series = "Proceedings of the ACM SIGACT SIGMOD SIGART Symposium
on Principles of Database Systems",
pages = "182--191",
annote = "Held in conjunction with the 1996 ACM SIGMOD
international conference on management of data. Also
known as PODS 1996",
keywords = "database systems; PODS; ACM; SIGMOD; SIGART; SIGACT",
}
Data Mining using Two-dimensional Optimized Association Rules: Scheme, Algorithms, and Visualization, Takeshi Fukuda and Yasuhiko Morimoto and Shinichi Morishita and Takeshi Tokuyama
@InProceedings{fukuda.ea:using-two-dimensional:96,
title = "Data Mining using Two-dimensional Optimized
Association Rules: Scheme, Algorithms, and
Visualization",
author = "Takeshi Fukuda and Yasuhiko Morimoto and Shinichi
Morishita and Takeshi Tokuyama",
editor = "H. V. Jagadish and Inderpal Singh Mumick",
booktitle = "Proceedings of the 1996 {ACM} {SIGMOD} International
Conference on Management of Data",
address = "Montreal, Quebec, Canada",
month = "4--6~" # jun,
year = "1996",
pages = "13--23",
}
Local Induction of Decision Trees: Towards Interactive Data Mining, Truxton Fulton and Steven Salzberg and Simon Kasif and David Waltz
@InProceedings{fulton.ea:local-induction:96,
title = "Local Induction of Decision Trees: Towards Interactive
Data Mining",
pages = "14",
author = "Truxton Fulton and Steven Salzberg and Simon Kasif and
David Waltz",
crossref = "simoudis.ea:proceedings-second:96",
}
Knowledge discovery in international conflict databases, J. Furnkranz and J. Petrak and R. Trappl
@Article{furnkranz.ea:international-conflict:97,
author = "J. Furnkranz and J. Petrak and R. Trappl",
address = "Austrian Res Inst Artificial Intelligence,
Schottengasse 3, a-1010 Vienna, Austria Austrian Res
Inst Artificial Intelligence, a-1010 Vienna, Austria",
title = "Knowledge discovery in international conflict
databases",
journal = "Applied Artificial Intelligence",
year = "1997",
volume = "11",
issue = "2",
pages = "91--118",
abstract = "Artificial intelligence (AI) is heavily supported by
military institutions, while practically no effort goes
into the investigation of possible contributions of AI
to the avoidance and termination of crises and wars.
This article rakes a first step in this direction by
investigating the use of machine learning techniques
for discovering knowledge in international conflict and
conflict management databases. We have applied
similarity-based case retrieval to the KOSIMO database
of international conflicts. Furthermore, we present
results of analyzing the CONFMAN database of successful
and unsuccessful conflict management attempts with an
inductive decision tree learning algorithm. The latter
approach seems to be particularly promising, as
conflict management events apparently are more
repetitive and thus better suited for machine-aided
analysis.",
keywords = "MEDIATION",
}
A Comparison of Pruning Methods for Relational Concept Learning,
@InProceedings{furnkranz:comparison-pruning:94,
author = "J. F{\"{u}}rnkranz",
title = "A Comparison of Pruning Methods for Relational Concept
Learning",
booktitle = "Proceedings of the AAAI-94 Workshop on Knowledge
Discovery in Databases",
year = "1994",
}
Induction of meta-knowledge about knowledge discovery, B. R. Gaines and P. Compton
@Article{gaines.ea:induction-meta-knowledge:93,
author = "B. R. Gaines and P. Compton",
address = "Univ Calgary, Inst Knowledge Sci, Calgary T2N 1N4, Ab,
Canada Univ New S Wales, Dept Comp Sci, Sydney, Nsw
2033, Australia",
title = "Induction of meta-knowledge about knowledge
discovery",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1993",
volume = "5",
issue = "6",
pages = "990--992",
abstract = "A study is reported of the use of ripple-down rule
induction to develop a meta-model of ten years of
clinical data captured as part of the development of an
expert system for thyroid diagnosis. The study shows
how the suitability for inductive knowledge discovery
of such real-world data can be characterized in terms
of its stationarity, and how the best error rates
achievable and the amount of data necessary to achieve
them, can be estimated.",
keywords = "GARVAN THYROID DATABASE, INDUCT, INDUCTION, KNOWLEDGE
DISCOVERY, MACHINE LEARNING, MEDICAL DIAGNOSIS,
METAMODELING, META-KNOWLEDGE, RIPPLE-DOWN RULES, RULES
WITH EXCEPTIONS",
}
Improving Scalability in a Scientific Discovery System by Exploiting Parallelism, Gehad Galal and Diane J. Cook and Lawrence B. Holder
@InProceedings{galal.ea:improving-scalability:97,
title = "Improving Scalability in a Scientific Discovery System
by Exploiting Parallelism",
author = "Gehad Galal and Diane J. Cook and Lawrence B. Holder",
pages = "171",
crossref = "heckerman.ea:proceedings-third:97",
}
Connectionist Expert Systems, Stephen I. Gallant
@Article{gallant:connectionist-expert:88,
author = "Stephen I. Gallant",
title = "Connectionist Expert Systems",
journal = "Communications of the ACM",
year = "1988",
volume = "32",
number = "2",
pages = "153--168",
}
Mining Entity-Identification Rules for Database Integration, M. Ganesh and Jaideep Srivastava and Travis Richardson
@InProceedings{ganesh.ea:entity-identification-rules:96,
title = "Mining Entity-Identification Rules for Database
Integration",
pages = "291",
author = "M. Ganesh and Jaideep Srivastava and Travis
Richardson",
crossref = "simoudis.ea:proceedings-second:96",
}
Discovering interesting statements from a database, F. Gebhardt
@Article{gebhardt:discovering-interesting:94,
author = "F. Gebhardt",
address = "Gesell Math \& Datenverarbeitung Gmbh, Schloss
Birlinghoven, Postfach 1316, D-53731 St Augustin,
Germany",
title = "Discovering interesting statements from a database",
journal = "Applied Stochastic Models And Data Analysis",
year = "1994",
volume = "10",
issue = "1",
pages = "1--14",
abstract = "Knowledge discovery aims at extracting new knowledge
from potentially large databases; this may be in the
form of interesting statements about the data. Two
interrelated classes of problem arise that are treated
here: to put the subjective notion of 'interesting'
into concrete terms and to deal with large numbers of
statements that are related to one another (one
rendering the other redundant or at least less
interesting). Four increasingly subjective facets of
'interestingness' are identified: the subject field
under consideration, the conspicuousness of a finding,
its novelty, and its deviation from prior knowledge. A
procedure is proposed, and tried out on two quite
different data sets, that allows for specifying
interestingness by various means and that ranks the
results in a way that takes interestingness (relevance,
evidence) as well as mutual relatedness (similarity,
affinity) into account- manifestations of the second
and third facets of interestingness in the given data
environment.",
keywords = "PROJECTION PURSUIT, KNOWLEDGE DISCOVERY IN DATABASES,
EXPLORATORY DATA ANALYSIS, INTERESTINGNESS, PROJECT
EXPLORA",
}
Excavate Your Data, Cheryl Gerber
@Article{gerber:excavate-your:96,
author = "Cheryl Gerber",
title = "Excavate Your Data",
journal = "Datamation",
year = "1996",
volume = "42",
number = "9",
month = may,
abstract = "Datamining could be your No. 1 strategic weapon--and
source of profit--in dissecting archival information.
But with its roots in machine learning, this esoteric
technology takes some time to master.",
URL = "http://www.datamation.com/PlugIn/issues/1996/may1/05asoft3frame.html",
}
ENIGMA: A System that Learns Diagnostic Knowledge, A. Giordana and L. Saitta and F. Bergadano and F. Brancadori and D. De Marchi
@Article{giordana.ea:enigma-system:93,
author = "A. Giordana and L. Saitta and F. Bergadano and F.
Brancadori and D. De Marchi",
title = "{ENIGMA}: {A} System that Learns Diagnostic
Knowledge",
journal = "IEEE Transactions on Knowledge and Data Engineering",
pages = "15--28",
volume = "5",
number = "1",
month = feb,
year = "1993",
}
Statistical Inference and Data Mining, Clark Glymour and David Madigan and Daryl Pregibon and Padhraic Smyth
@Article{glymour.ea:statistical-inference:96,
author = "Clark Glymour and David Madigan and Daryl Pregibon and
Padhraic Smyth",
address = "Carnegie Mellon Univ, Pittsburgh, Pa, 15213 Univ Calif
San Diego, La Jolla, Ca, 92093 Washington Univ,
Seattle, Wa At\&T Bell Labs, Murray Hill, Nj, 07974",
title = "Statistical Inference and Data Mining",
journal = "Communications of the ACM",
volume = "39",
number = "11",
pages = "35--41",
month = nov,
year = "1996",
ISSN = "0001-0782",
}
Knowledge discovery in deductive databases with large deduction results: the first step, C. L. Goh and M. Tsukamoto and S. Nishio
@Article{goh.ea:deductive-with:96,
author = "C. L. Goh and M. Tsukamoto and S. Nishio",
address = "Osaka Univ, Fac Engn, Dept Informat Syst Engn, 2-1
Yamadaoka, Suita, Osaka 565, Japan",
title = "Knowledge discovery in deductive databases with large
deduction results: the first step",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1996",
month = dec,
volume = "8",
issue = "6",
pages = "952--956",
abstract = "Deductive databases have the ability to deduce new
facts from a set of facts using a set of rules. They
are also useful in the integration of artificial
intelligence and database. However, when recursive
rules are involved, the amount of deduced facts can
become too large to be practically stored, viewed or
analyzed. This seriously hinders the usefulness of
deductive databases. In order to overcome this problem,
we propose four methods to discover characteristic
rules from large amount of deduction results without
actually having to store all the deduction results.
This paper presents the first step in the application
of knowledge discovery techniques to deductive
databases with large deduction results.",
keywords = "attribute-oriented algorithm, characteristic rule,
data mining, deductive database, recursive rule",
}
COGIN: Symbolic Induction with Genetic Algorithms, D. P. Greene and S. F. Smith
@InProceedings{greene.ea:cogin-symbolic:92,
author = "D. P. Greene and S. F. Smith",
title = "{COGIN}: Symbolic Induction with Genetic Algorithms",
year = "1992",
booktitle = "Proc.\ of AAAI-92",
pages = "111--116",
keywords = "GA",
}
Visualization for knowledge discovery, G. Grinstein and J. C. Sieg and S. Smith and M. G. Williams
@Article{grinstein.ea:visualization-for:92,
crossref = "ijis-special-issue:92",
author = "G. Grinstein and J. C. Sieg and S. Smith and M. G.
Williams",
address = "Univ Massachusetts, Lowell, Ma, 01854",
title = "Visualization for knowledge discovery",
journal = "International J. Of Intelligent Systems",
year = "1992",
volume = "7",
issue = "7",
pages = "637--648",
abstract = "Although the fields of data visualization and
automated knowledge discovery (AKD) share many goals,
workers in each field have been reluctant to adopt the
tools and methods of the other field. Many AKD
researchers discourage the use of visualization tools
because they believe that dependence on human steering
will impede the development of numerical or analytical
descriptions of complex data. Many visualization
researchers are concerned that their present platforms
are being pushed to the limits of their performance by
the most advanced visualization techniques and are
therefore unwilling to incur the perceived overhead of
having a database system mediate access to the data. We
argue that these attitudes are somewhat short- sighted
and that the techniques of these two communities are
complementary. We discuss a specific visualization
system that we have developed and describe the
obstacles that must be overcome in integrating it into
an AKD system.",
annote = "Deals with Exvis system, which produces textures of
icons, each icon representing upto 15 variables + X,Y
co-ords. Also use of sound textures.",
}
Harnessing the Human in Knowledge Discovery, Georges G. Grinstein
@InProceedings{grinstein:harnessing-human:96,
title = "Harnessing the Human in Knowledge Discovery",
pages = "384",
author = "Georges G. Grinstein",
crossref = "simoudis.ea:proceedings-second:96",
}
Data Mining and Tree-Based Optimization, Robert Grossman and Haim Bodek and Dave Northcutt and Vince Poor
@InProceedings{grossman.ea:tree-based-optimization:96,
title = "Data Mining and Tree-Based Optimization",
pages = "323",
author = "Robert Grossman and Haim Bodek and Dave Northcutt and
Vince Poor",
crossref = "simoudis.ea:proceedings-second:96",
}
The Terabyte Challenge: An Open, Distributed Testbed for Managing and Mining Massive Data Sets, Robert Grossman
@InProceedings{grossman:terabyte-challenge:96,
author = "Robert Grossman",
title = "The Terabyte Challenge: An Open, Distributed Testbed
for Managing and Mining Massive Data Sets",
booktitle = "CD-ROM Proceedings of Supercomputing'96",
publisher = "IEEE",
address = "Pittsburgh, PA",
month = nov,
year = "1996",
keywords = "contest,",
}
Data-base mining - discovering new knowledge and competitive advantage, F. H. Grupe and M. M. Owrang
@Article{grupe.ea:data-base-discovering:95,
author = "F. H. Grupe and M. M. Owrang",
address = "Univ Nevada, Reno, Nv, 89557 American Univ,
Washington, Dc, 20016",
title = "Data-base mining - discovering new knowledge and
competitive advantage",
journal = "Information Systems Management",
year = "1995",
volume = "12",
issue = "4",
pages = "26--31",
abstract = "Buried in the huge data bases assembled by large
organizations is information useful for generating new
facts and relationships that can provide significant
competitive advantage. This article describes how data
base mining extracts knowledge from existing data
bases, data base mining applications and their
limitations, and bottom-line benefits.",
}
Data mining, Hypergraph Transversals, and Machine Learning, Dimitrios Gunopulos and Roni Khardon and Heikki Mannila and Hannu Toivonen
@InProceedings{gunopulos.ea:hypergraph-transversals:97,
title = "Data mining, Hypergraph Transversals, and Machine
Learning",
author = "Dimitrios Gunopulos and Roni Khardon and Heikki
Mannila and Hannu Toivonen",
booktitle = "Proceedings of the Sixteenth {ACM}
{SIGACT}-{SIGMOD}-{SIGART} Symposium on Principles of
Database Systems",
month = "12--15 " # may,
year = "1997",
address = "Tucson, Arizona",
}
Data mining - mother lode or fools gold, B. Gunter
@Article{gunter:mother-lode:96,
author = "B. Gunter",
address = "Pob 9, Hopewell, Nj, 08525",
title = "Data mining - mother lode or fools gold",
journal = "Quality Progress",
year = "1996",
volume = "29",
issue = "4",
pages = "113",
}
Classification trees with Neural Network Feature Extraction, Heng Guo and Saul B. Gelfand
@Article{guo.ea:classification-trees:92,
author = "Heng Guo and Saul B. Gelfand",
title = "Classification trees with Neural Network Feature
Extraction",
journal = "IEEE Transactions on Neural Networks.",
year = "1992",
volume = "3",
number = "6",
pages = "923--933",
month = nov,
keywords = "Neural Nets, binary decision trees, CART",
annote = "Uses small multilayer nets at the decision nodes of a
binary classification tree. Comparison with CART",
}
Deep Knowledge Discovery from Natural Language Texts, Udo Hahn and Klemens Schnattinger
@InProceedings{hahn.ea:deep-natural:97,
title = "Deep Knowledge Discovery from Natural Language Texts",
author = "Udo Hahn and Klemens Schnattinger",
pages = "175",
crossref = "heckerman.ea:proceedings-third:97",
}
Integrating and Mining Distributed Customer Databases,
@InProceedings{haimowitz.ea:integrating-distributed:97,
title = "Integrating and Mining Distributed Customer
Databases",
author = "Ira J. Haimowitz and {\"{o}}zden G{\"{u}}r-Ali and
Henry Schwarz",
pages = "179",
crossref = "heckerman.ea:proceedings-third:97",
}
Analyzing fd inference in relational databases, J. Hale and S. Shenoi
@Article{hale.ea:analyzing-fd:96,
author = "J. Hale and S. Shenoi",
address = "Univ Tulsa, Dept Math \& Comp Sci, Keplinger Hall,
Tulsa, Ok, 74104 Univ Tulsa, Dept Math \& Comp Sci,
Tulsa, Ok, 74104",
title = "Analyzing fd inference in relational databases",
journal = "Data \& Knowledge Engineering",
year = "1996",
volume = "18",
issue = "2",
pages = "167--183",
abstract = "This paper deals with the general problem of analyzing
fuzzy inference based on functional dependencies (FDs)
in database relations. Fuzzy inference, the ability to
infer fuzzy set values, generalizes imprecise
(set-valued) inference and precise inference. Likewise,
fuzzy relational databases generalize their classical
and imprecise counterparts by supporting fuzzy
information storage and retrieval. Inference analysis
is performed using a special abstract model which
maintains vital links to classical, imprecise and fuzzy
relational database models. These links increase the
utility of the inference formalism in practical
applications involving ''catalytic inference
analysis'', including knowledge discovery and database
security.",
keywords = "DATABASE INFERENCE, FUNCTIONAL DEPENDENCIES, KNOWLEDGE
DISCOVERY, DATABASE SECURITY, FUZZY SETS",
}
A practical formalism for imprecise inference control, J. Hale and J. Threet and S. Shenoi
@Article{hale.ea:practical-formalism:94,
author = "J. Hale and J. Threet and S. Shenoi",
address = "Univ Tulsa, Dept Math \& Comp Sci, Keplinger Hall,
Tulsa, Ok, 74104",
title = "A practical formalism for imprecise inference
control",
journal = "Ifip Trans. A-Computer Science And Technology",
year = "1994",
volume = "60",
pages = "139--156",
abstract = "This paper describes a powerful, yet practical,
formalism for modeling and controlling imprecise
FD-based inference in relational database systems. The
formalism provides a canonical representation of
inference which unifies precise inference and the
primitive imprecise inference mechanisms of abduction
and partial deduction. Whereas other imprecise
(partial) inference models estimate the probability of
making inferences, the formalism supports the analysis
of the actual imprecise values inferred in a database
extension. Imprecise inference is analyzed by
transforming a precise database augmented with
additional ''catalytic'' relations, conveying possibly
imprecise a priori knowledge, into an equivalent
imprecise database. The analysis of imprecise inference
and the related infer ence control methodology are
highly flexible and robust. They can be directly
applied to classical, MLS, and imprecise databases.
With minimal modifications, they also can be used in
knowledge discovery or database mining.",
keywords = "DATABASE MANAGEMENT, GENERAL, ARTIFICIAL INTELLIGENCE,
DEDUCTION AND THEOREM PROVING",
}
Estimating dblearns potential for knowledge discovery in databases, H. J. Hamilton and D. R. Fudger
@Article{hamilton.ea:estimating-dblearns:95,
author = "H. J. Hamilton and D. R. Fudger",
address = "Univ Regina, Dept Comp Sci, Regina, Sk S4S 0A2,
Canada",
title = "Estimating dblearns potential for knowledge discovery
in databases",
journal = "Computational Intelligence",
year = "1995",
volume = "11",
issue = "2",
pages = "280--296",
abstract = "We propose a procedure for estimating DBLEARN's
potential for knowledge discovery, given a relational
database and concept hierarchies. This procedure is
most useful for evaluating alternative concept
hierarchies for the same database. The DBLEARN
knowledge discovery program uses an attribute-oriented
inductive-inference method to discover potentially
significant high-level relationships in a database. A
concept forest, with at most one concept hierarchy for
each attribute, defines the possible generalizations
that DBLEARN can make for a database. The potential for
discovery in a database is estimated by examining the
complexity of the corresponding concept forest. Two
heuristic measures are defined based on the number,
depth, and height of the interior nodes. Higher values
for these measures indicate more complex concept
forests and arguably more potential for discovery.
Experimental results using a variety of concept forests
and four commercial databases show that in practice
both measures permit quite reliable decisions to be
made; thus, the simplest may be most appropriate.",
keywords = "KNOWLEDGE DISCOVERY, CONCEPT HIERARCHIES, DISCOVERY
POTENTIAL, DATABASES, MACHINE LEARNING",
}
Knowledge Discovery in Databases: An Attribute-oriented Approach, Jiawei Han and Yandong Cai and Nick Cercone
@InProceedings{han.ea:attribute-oriented-approach:92,
author = "Jiawei Han and Yandong Cai and Nick Cercone",
title = "Knowledge Discovery in Databases: An
Attribute-oriented Approach",
booktitle = "Proceedings of the 18th {VLDB} Conference",
pages = "547--559",
address = "Vancouver, British Columbia, Canada",
year = "1992",
month = aug,
keywords = "dblearn",
annote = "simple hierarchies are used to generate attribute
summaries",
}
Data-driven discovery of quantitative rules in relational databases, J. W. Han and Y. D. Cai and N. Cercone
@Article{han.ea:data-driven-quantitative:93,
author = "J. W. Han and Y. D. Cai and N. Cercone",
address = "Simon Fraser Univ, Sch Comp Sci, Ctr Syst Sci, Burnaby
V5A 1S6, Bc, Canada",
title = "Data-driven discovery of quantitative rules in
relational databases",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1993",
month = "Feburary",
volume = "5",
issue = "1",
pages = "29--40",
abstract = "A quantitative rule is a rule associated with
quantitative information which assesses the
representativeness of the rule in the database. In this
paper, an efficient induction method is developed for
learning quantitative rules in relational databases.
With the assistance of knowledge about concept
hierarchies, data relevance, and expected rule forms,
attribute-oriented induction can be performed on the
database, which integrates database operations with the
learning process and provides a simple, efficient way
of learning quantitative rules from large databases.
Our method learns both characteristic rules and
classification rules. Quantitative information
facilitates quantitative reasoning, incremental
learning, and learning in the presence of noise.
Moreover, learning qualitative rules can be treated as
a special case of learning quantitative rules. Our
paper shows that attribute-oriented induction provides
an efficient and effective mechanism for learning
various kinds of knowledge rules from relational
databases.",
keywords = "KNOWLEDGE DISCOVERY IN DATABASES, MACHINE LEARNING,
ATTRIBUTE- ORIENTED INDUCTION, QUANTITATIVE RULES,
CHARACTERISTIC RULES, CLASSIFICATION RULES, DATA-DRIVEN
LEARNING ALGORITHMS",
}
DBLEARN: A Knowledge Discovery System for Large Databases, J. Han and Y. Cai and N. Cerone and Y. Huang
@InProceedings{han.ea:dblearn-system:92,
author = "J. Han and Y. Cai and N. Cerone and Y. Huang",
title = "{DBLEARN}: {A} Knowledge Discovery System for Large
Databases",
booktitle = "Int. Conf. on Information and Knowledge Management,
Baltimore",
year = "1992",
month = nov,
}
DBLearn: A System Prototype for Knowledge Discovery in Relational Databases, Jiawei Han and Yongjian Fu and Yue Huang and Yandong Cai and N. Cercone
@Article{han.ea:dblearn-system:94,
author = "Jiawei Han and Yongjian Fu and Yue Huang and Yandong
Cai and N. Cercone",
title = "{DBLearn}: {A} System Prototype for Knowledge
Discovery in Relational Databases",
journal = "SIGMOD Record (ACM Special Interest Group on
Management of Data)",
volume = "23",
number = "2",
pages = "516--516",
month = jun,
year = "1994",
ISSN = "0163-5808",
affiliation = "Sch. of Comput. Sci., Simon Fraser Univ., Burnaby, BC,
Canada",
classification = "C6160D (Relational DBMS); C6160K (Deductive
databases)",
keywords = "DBLearn; System prototype; Knowledge discovery;
Relational databases; Data mining system; Knowledge
rule extraction; High level learning interfaces;
Automatic refinement; Concept hierarchies; Efficient
discovery algorithms; Performance; Knowledge mining;
Object-oriented databases; Deductive databases; Spatial
databases",
thesaurus = "Deductive databases; Knowledge acquisition; Relational
databases; Very large databases",
xxcrossref = "Anonymous:1994:ASI",
}
DBMiner: Interactive Mining of Multiple-Level Knowledge in Relational Databases,
@InProceedings{han.ea:dbminer-interactive:96,
title = "{DBMiner}: Interactive Mining of Multiple-Level
Knowledge in Relational Databases",
author = "Jiawei Han and Yongjian Fu and Wei Wang and Jenny
Chiang and Osmar R. Za{\"\i}ane and Krzysztof
Koperski",
editor = "H. V. Jagadish and Inderpal Singh Mumick",
booktitle = "Proceedings of the 1996 {ACM} {SIGMOD} International
Conference on Management of Data",
address = "Montreal, Quebec, Canada",
month = "4--6~" # jun,
year = "1996",
pages = "550",
}
DBMiner: A System for Mining Knowledge in Large Relational Databases, Jiawei Han and Yongjian Fu and Wei Wang and Jenny Chiang and Wan Gong and Krzystof Koperski and Deyi Li and Yijun Lu and Amynmohamed Rajan and Nebojsa Stefanovic and Betty Xia and Osmar R. Zaiane
@InProceedings{han.ea:dbminer-system:96,
title = "{DBM}iner: {A} System for Mining Knowledge in Large
Relational Databases",
pages = "250",
author = "Jiawei Han and Yongjian Fu and Wei Wang and Jenny
Chiang and Wan Gong and Krzystof Koperski and Deyi Li
and Yijun Lu and Amynmohamed Rajan and Nebojsa
Stefanovic and Betty Xia and Osmar R. Zaiane",
crossref = "simoudis.ea:proceedings-second:96",
}
Intelligent query answering by knowledge discovery techniques, J. W. Han and Y. Huang and N. Cercone and Y. J. Fu
@Article{han.ea:intelligent-query:96,
author = "J. W. Han and Y. Huang and N. Cercone and Y. J. Fu",
address = "Simon Fraser Univ, Sch Comp Sci, Burnaby, Bc V5A 1S6,
Canada Univ Regina, Dept Comp Sci, Regina, Sk S4S 0A2,
Canada",
title = "Intelligent query answering by knowledge discovery
techniques",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1996",
volume = "8",
issue = "3",
pages = "373--390",
abstract = "Knowledge discovery facilitates querying database
knowledge and intelligent query answering in database
systems. In this paper, we investigate the application
of discovered knowledge, concept hierarchies, and
knowledge discovery tools for intelligent query
answering in database systems. A knowledge-rich data.
model is constructed to incorporate discovered
knowledge and knowledge discovery tools. Queries are
classified into data queries and knowledge queries.
Both types of queries can be answered directly by
simple retrieval or intelligently by analyzing the
intent of query and providing generalized, neighborhood
or associated information using stored or discovered
knowledge. Techniques have been developed for
intelligent query answering using discovered knowledge
and/or knowledge discovery tools, which includes
generalization, data summarization, concept clustering,
rule discovery, query rewriting, deduction, lazy
evaluation, application of multiple-layered databases,
etc. Our study shows that knowledge discovery
substantially broadens the spectrum of intelligent
query answering and may have deep implications on query
answering in data- and knowledge-base systems.",
keywords = "RELATIONAL DATABASES, MODEL, DATABASE AND
KNOWLEDGE-BASE SYSTEMS, KNOWLEDGE DISCOVERY IN
DATABASES, KNOWLEDGE-RICH DATA MODEL, INTELLIGENT QUERY
ANSWERING, MULTIPLE LAYERED DATABASES, QUERY ANALYSIS
AND QUERY PROCESSING",
}
Mining Multi-Dimensional Association Rules Using Data Cubes, Jiawei Han and Micheline Kamber and Jenny Chiang
@TechReport{han.ea:multi-dimensional-association:97,
number = "TR 97-06",
author = "Jiawei Han and Micheline Kamber and Jenny Chiang",
title = "Mining Multi-Dimensional Association Rules Using Data
Cubes",
month = feb,
year = "1997",
org = "SFU-CMPT",
school = "School of Computing Science, Simon Fraser University",
abstract = "Methods for mining association rules have been studied
extensively. However, most previous studies have been
confined to the mining of single dimensional and single
variable association rules. There are applications in
relational databases and data warehouses which require
the mining of multi-dimensional association rules. In
this paper, we study efficient methods for mining
multi-dimensional association rules using a data cube
structure, a popular data structure used in data
warehouses. Efficient algorithms are developed for
mining multi-dimensional association rules by either
using an existing data cube, when available, or
construction of a data cube on the fly. In both cases,
the algorithms outperform the direct application of a
table-based Apriori algorithm to the mining of
multi-dimensional association rules. The extension of
the method for mining multi-level, multi-dimensional
association rules and meta-rule guided mining is also
discussed in the paper.",
URL = "ftp://fas.sfu.ca/pub/cs/TR/1997/CMPT97-06.ps.Z",
}
Discovery of Multiple-Level Association Rules from Large Databases, Jiawei Han and Yongjian Fu
@TechReport{han.ea:multiple-level-association:95,
number = "TR 95-05",
author = "Jiawei Han and Yongjian Fu",
title = "Discovery of Multiple-Level Association Rules from
Large Databases",
month = mar,
year = "1995",
org = "SFU-CMPT",
school = "School of Computing Science, Simon Fraser University",
pages = "35",
abstract = "Discovery of association rules from large databases
has been a focused topic recently in the research into
database mining. Previous studies discover association
rules at a single concept level, however, mining
association rules at multiple concept levels may lead
to finding more informative and refined knowledge from
data. In this paper, we study efficient methods for
mining multiple-level association rules from large
transaction databases. A top-down progressive deepening
method is proposed by extension of some existing
(single-level) association rule mining algorithms. In
particular, a group of algorithms for mining
multiple-level association rules are developed and
their relative performance are tested on different
kinds of transaction data. Relaxation of the rule
conditions for finding flexible multiple-level
association rules is also discussed. Our study shows
that efficient algorithms can be developed for the
discovery of interesting and strong multiple-level
association rules from large databases.",
URL = "ftp://ftp.fas.sfu.ca/pub/cs/techreports/1995/CMPT95-05.ps.Z",
}
Resource and Knowledge Discovery in Global Information Systems: A Multiple Layered Database Approach, Jiawei Han and Osmar R. Zaiane and Yongjian Fu
@TechReport{han.ea:resource-global:94,
number = "TR 94-10",
author = "Jiawei Han and Osmar R. Zaiane and Yongjian Fu",
title = "Resource and Knowledge Discovery in Global Information
Systems: {A} Multiple Layered Database Approach",
month = nov,
year = "1994",
org = "SFU-CMPT",
school = "School of Computing Science, Simon Fraser University",
pages = "30",
keywords = "Resource Discovery, Knowledge Discovery, Data Mining,
Multiple Layered Database, Internet, World Wide Web,
Global Information Network",
abstract = "With huge amounts of information connected to the
global information network (Internet), efficient and
effective discovery of resource and knowledge from the
``global information base'' has become an imminent
research issue, especially with the advent of the
Information Highway. In this article, a multiple
layered database (MLDB) approach is proposed to handle
the resource and knowledge discovery in global
information base. A multiple layered database is a
database formed by generalization and transformation of
the information, layer-by-layer, starting from the
original information base (treated as layer-0, the
primitive layer). Information retrieval, data mining,
and data analysis techniques can be used to extract and
transform information from a lower layer database to a
higher one. Layer-1 and higher layers of an MLDB can be
modeled by an extended-relational or object-oriented
model, constructed automatically, and updated
incrementally. Information at all the layers except the
primitive one can be stored, managed and retrieved by
the available database technology; resources can be
found by controlled search through different layers of
the database; and knowledge discovery can be performed
efficiently in such a multiple layered database.",
URL = "ftp://ftp.fas.sfu.ca/pub/cs/techreports/1994/CMPT94-10.ps.Z",
note = "(also CSS/LCCR TR94-24)",
}
Data Mining Techniques, Jiawei Han
@InProceedings{han:techniques:96,
title = "Data Mining Techniques",
author = "Jiawei Han",
editor = "H. V. Jagadish and Inderpal Singh Mumick",
booktitle = "Proceedings of the 1996 {ACM} {SIGMOD} International
Conference on Management of Data",
address = "Montreal, Quebec, Canada",
month = "4--6~" # jun,
year = "1996",
pages = "545",
}
Towards efficient induction mechanisms in database-systems, J. W. Han
@Article{han:towards-efficient:94,
author = "J. W. Han",
address = "Simon Fraser Univ, Sch Comp Sci, Burnaby V5A 1S6, Bc,
Canada",
title = "Towards efficient induction mechanisms in
database-systems",
journal = "Theoretical Computer Science",
year = "1994",
volume = "133",
issue = "2",
pages = "361--385",
abstract = "With the wide availability of huge amounts of data in
database systems, the extraction of knowledge in
databases by efficient and powerful induction or
knowledge discovery mechanisms has become an important
issue in the construction of new generation database
and knowledge-base systems. In this article, an
attribute-oriented induction method for knowledge
discovery in databases is investigated, which provides
an efficient, set-oriented induction mechanism for
extraction of different kinds of knowledge rules, such
as characteristic rules, discriminant rules, data
evolution regularities and high level dependency rules
in large relational databases. Our study shows that the
method is robust in the existence of noise and database
updates, is extensible to knowledge discovery in
advanced and/or special purpose databases, such as
object-oriented databases, active databases, spatial
databases, etc., and has wide applications.",
}
Mega-Classification: Discovering Motifs in Massive Datastreams, N. L. Harris and L. Hunter and D. J. States
@InProceedings{harris.ea:mega-classification-discovering:92,
author = "N. L. Harris and L. Hunter and D. J. States",
title = "Mega-Classification: Discovering Motifs in Massive
Datastreams",
year = "1992",
booktitle = "Proc.\ of AAAI-92",
pages = "837--842",
}
Knowledge discovery from telecommunication network alarm databases, K. Hatonen and M. Klemettinen and H. Mannila and P. Ronkainen and H. Toivonen
@InProceedings{hatonen.ea:telecommunication-network:96,
author = "K. Hatonen and M. Klemettinen and H. Mannila and P.
Ronkainen and H. Toivonen",
title = "Knowledge discovery from telecommunication network
alarm databases",
editor = "Stanley Y. W. Su",
booktitle = "Proceedings of the twelfth International Conference on
Data Engineering, February 26--March 1, 1996, New
Orleans, Louisiana",
publisher = "IEEE Computer Society Press",
address = "1109 Spring Street, Suite 300, Silver Spring, MD
20910, USA",
year = "1996",
pages = "115--122",
affiliation = "Dept. of Comput. Sci., Helsinki Univ., Finland",
keywords = "Knowledge discovery, data mining, frequent episodes,
telecommunication alarm databases, telecommunication
network management, SGML",
URL = "http://www.cs.Helsinki.FI/research/pmdm/datamining/ICDE96.html",
abstract = "A telecommunication network produces daily large
amounts of alarm data. The data contains hidden
valuable knowledge about the behavior of the network.
This knowledge can be used in filtering redundant
alarms, locating problems in the network, and possibly
in predicting severe faults. We describe the TASA
(Telecommunication Network Alarm Sequence Analyzer)
system for discovering and browsing knowledge from
large alarm databases.
Learning qualitative models of dynamic systems, D. T. Hau and E. W. Coiera
@Article{hau.ea:learning-qualitative:97,
author = "D. T. Hau and E. W. Coiera",
address = "Johns Hopkins Univ, Sch Med, Baltimore, Md, 21205
Hewlett Packard Labs, Bristol Bs12 6Qz, Avon, England",
title = "Learning qualitative models of dynamic systems",
journal = "Machine Learning",
year = "1997",
volume = "26",
issue = "2-3",
pages = "177--211",
abstract = "The automated construction of dynamic system models is
an important application area for ILP. We describe a
method that learns qualitative models from time-varying
physiological signals. The goal is to understand the
complexity of the learning task when faced with
numerical data, what signal processing techniques are
required, and how this affects learning. The
qualitative representation is based on Kuipers' QSIM.
The learning algorithm for model construction is based
on Coiera's GENMODEL. We show that QSIM models are
efficiently PAC learnable from positive examples only,
and that GENMODEL is an ILP algorithm for efficiently
constructing a QSIM model. We describe both GENMODEL
which performs RLGG on qualitative states to learn a
QSIM model, and the front-end processing and segmenting
stages that transform a signal into a set of
qualitative states. Next we describe results of
experiments on data from six cardiac bypass patients.
Useful models were obtained, representing both normal
and abnormal physiological states. Model variation
across time and across different levels of temporal
abstraction and fault tolerance is explored. The
assumption made by many previous workers that the
abstraction of examples from data can be separated from
the learning task is not supported by this study.
Firstly, the effects of noise in the numerical data
manifest themselves in the qualitative examples.
Secondly, the models learned are directly dependent on
the initial qualitative abstraction chosen.",
keywords = "SIMULATION, inductive logic programming, qualitative
modelling, system identification, PAC learning,
physiological modelling, cardiovascular system, data
mining, patient monitoring",
}
Digging For Gold, Emma Haughton
@Article{haughton:digging-gold:94,
author = "Emma Haughton",
title = "Digging For Gold",
journal = "Computing",
year = "1994",
pages = "20--21",
month = jan,
keywords = "Logica, applications, neural networks, Data Mariner,
Netmap, 4thought, autonet, recognition research",
}
KDD for Science Data Analysis: Issues and Examples, Usama Fayyadand David Haussler and Paul Stolorz
@InProceedings{haussler.ea:kdd-science:96,
title = "{KDD} for Science Data Analysis: Issues and Examples",
pages = "50",
author = "Usama Fayyadand David Haussler and Paul Stolorz",
crossref = "simoudis.ea:proceedings-second:96",
}
Quantifying Inductive Bias: AI Learning Algorithms and Valiant's Learning Framework, D. Haussler
@Article{haussler:quantifying-inductive:88,
author = "D. Haussler",
title = "Quantifying Inductive Bias: {AI} Learning Algorithms
and Valiant's Learning Framework",
year = "1988",
journal = "Artificial Intelligence",
volume = "36",
number = "2",
month = sep,
pages = "177--221",
}
Proceedings of the Third International Conference on Knowledge Discovery and Data Mining (KDD-97), David Heckerman and Heikki Mannila and Daryl Pregibon and Ramasamy Uthurusamy (Eds)
@Proceedings{heckerman.ea:proceedings-third:97,
title = "Proceedings of the Third International Conference on
Knowledge Discovery and Data Mining ({KDD}-97)",
year = "1997",
editor = "David Heckerman and Heikki Mannila and Daryl Pregibon
and Ramasamy Uthurusamy",
publisher = "AAAI Press",
}
The Data Gold Rush --- Here's how corporations, researchers, and scientists are using data-mining techniques to discover everything from new customers to new galaxies, Sara Reese Hedberg
@Article{hedberg:gold-rush:95,
author = "Sara Reese Hedberg",
title = "The Data Gold Rush --- Here's how corporations,
researchers, and scientists are using data-mining
techniques to discover everything from new customers to
new galaxies",
journal = "Byte Magazine",
volume = "20",
number = "10",
pages = "83--??",
month = oct,
year = "1995",
ISSN = "0360-5280",
}
Parallelism speeds data mining, S. R. Hedberg
@Article{hedberg:parallelism-speeds:95,
author = "S. R. Hedberg",
address = "Zymogenet Inc, 1201 Eastlake Ave E, Seattle, Wa,
98102",
title = "Parallelism speeds data mining",
journal = "Ieee Parallel \& Distributed Technology",
year = "1995",
volume = "3",
issue = "4",
pages = "3--6",
}
Searching for the mother lode - tales of the first data miners, S. R. Hedberg
@Article{hedberg:searching-mother:96,
author = "S. R. Hedberg",
address = "Mit, Spoken Language Syst Grp, Cambridge, Ma, 02139",
title = "Searching for the mother lode - tales of the first
data miners",
journal = "Ieee Expert-Intelligent Systems \& Their
Applications",
year = "1996",
volume = "11",
issue = "5",
pages = "4--7",
}
Troubleshooting CFM 56-3 Engines for the Boeing 737 --- Using CBR and Data-Mining, R. Heider
@Article{heider:troubleshooting-cfm:96,
author = "R. Heider",
title = "Troubleshooting {CFM} 56-3 Engines for the {Boeing
737} --- Using {CBR} and Data-Mining",
journal = "Lecture Notes in Computer Science",
volume = "1168",
pages = "512--??",
year = "1996",
ISSN = "0302-9743",
}
GA-Based Rule Enhancement in Concept Learning, Jukka Hekanaho
@InProceedings{hekanaho:ga-based-rule:97,
title = "{GA}-Based Rule Enhancement in Concept Learning",
author = "Jukka Hekanaho",
pages = "183",
crossref = "heckerman.ea:proceedings-third:97",
}
Target-Independent Mining for Scientific Data: Capturing Transients and Trends for Phenomena Mining, Thomas H. Hinke and John Rushing and Heggere Ranganath and Sara J. Graves
@InProceedings{hinke.ea:target-independent-scientific:97,
title = "Target-Independent Mining for Scientific Data:
Capturing Transients and Trends for Phenomena Mining",
author = "Thomas H. Hinke and John Rushing and Heggere Ranganath
and Sara J. Graves",
pages = "187",
crossref = "heckerman.ea:proceedings-third:97",
}
Zeta: A Global Method for Discretization of Continuous Variables, K. M. Ho and P. D. Scott
@InProceedings{ho.ea:zeta-global:97,
title = "Zeta: {A} Global Method for Discretization of
Continuous Variables",
author = "K. M. Ho and P. D. Scott",
pages = "191",
crossref = "heckerman.ea:proceedings-third:97",
}
Knowledge Discovery in RNA Sequence Families of HIV Using Scalable Computers, Ivo L. Hofacker and Martijn A. Huynen and Peter F. Stadler and Paul E. Stolorz
@InProceedings{hofacker.ea:rna-sequence:96,
title = "Knowledge Discovery in {RNA} Sequence Families of
{HIV} Using Scalable Computers",
pages = "20",
author = "Ivo L. Hofacker and Martijn A. Huynen and Peter F.
Stadler and Paul E. Stolorz",
crossref = "simoudis.ea:proceedings-second:96",
}
Inferring Hierarchical Clustering Structures by Deterministic Annealing, Thomas Hofmann and Joachim M. Buhmann
@InProceedings{hofmann.ea:inferring-hierarchical:96,
title = "Inferring Hierarchical Clustering Structures by
Deterministic Annealing",
pages = "363",
author = "Thomas Hofmann and Joachim M. Buhmann",
crossref = "simoudis.ea:proceedings-second:96",
}
Induction: processes of inference, learning and discovery, John H. Holland and Keith J. Holyoak and Richard E. Nisbett and Paul R. Thagard
@Book{holland.ea:induction-processes:86,
author = "John H. Holland and Keith J. Holyoak and Richard E.
Nisbett and Paul R. Thagard",
title = "Induction: processes of inference, learning and
discovery",
publisher = "MIT Press",
year = "1986",
series = "Computational models of cognition and perception",
address = "Cambridge",
}
Adaptation in natural artificial systems, John H. Holland
@Book{holland:adaptation-natural:75,
author = "John H. Holland",
title = "Adaptation in natural artificial systems",
publisher = "University of Michigan Press",
year = "1975",
address = "Ann Arbor",
}
Escaping brittleness: the possibilities of general purpose algorithms applied to parallel rule-based systems, John H. Holland
@InCollection{holland:escaping-brittleness:86,
author = "John H. Holland",
title = "Escaping brittleness: the possibilities of general
purpose algorithms applied to parallel rule-based
systems",
crossref = "michalski.ea:machine-learning:86",
pages = "593--623",
}
Architectural Support for Data Mining, Marcel Holsheimer and Martin L. Kersten
@TechReport{holsheimer.ea:architectural-support:,
URL = "ftp://ftp.cwi.nl/pub/CWIreports/AA/CS-R9429.ps.Z",
title = "Architectural Support for Data Mining",
author = "Marcel Holsheimer and Martin L. Kersten",
institution = "CWI Amsterdam",
address = "PO Box 94079, 1090 GB, Amsterdam, The Netherlands",
abstract = "One of the main obstacles in applying data mining
techniques to large, real-world databases is the lack
of efficient data management. In this paper, we present
the design and implementation of an effective two-level
architecture for a data mining environment. It consists
of a mining tool and a parallel DBMS server. The mining
tool organizes and controls the search process, while
the DBMS provides optimal response times for the few
query types being used by the tool. Key elements of our
architecture are its use of fast and simple database
operations, its re-use of results obtained by previous
queries, its maximal use of main-memory to keep the
database hot-set resident, and its parallel computation
of queries. Apart from a clear separation of
responsibilities, we show that this architecture leads
to competitive performance on large data sets.
Moreover, this architecture provides a flexible
experimentation platform for further studies in
optimization of repetitive database queries and quality
driven rule discovery schemes.
CR subject
classification (1991): Data storage representations
(E.2), Database systems (H.2.4) parallel systems,
query processing, Information search and retrieval
(H.3.3), Learning (I.2.6) induction, knowledge
acquisition
Keywords \& Phrases: data
mining, parallel databases, inductive learning,
knowledge discovery in databases",
}
A perspective on databases and data mining, Marcel Holsheimer and Martin L. Kersten and Heikki Mannila and Hannu Toivonen
@InCollection{holsheimer.ea:perspective-on:95,
author = "Marcel Holsheimer and Martin L. Kersten and Heikki
Mannila and Hannu Toivonen",
title = "A perspective on databases and data mining",
pages = "10",
publisher = "Centrum voor Wiskunde en Informatica (CWI)",
address = "ISSN 0169-118X",
month = apr # " 30",
year = "1995",
keywords = "Association rules, database techniques, generalization
hierarchies.",
URL = "ftp://ftp.cwi.nl/pub/CWIreports/AA/CS-R9531.ps.Z",
abstract = "We discuss the use of database methods for data
mining. Recently impressive results have been achieved
for some data mining problems using highly specialized
and clever data structures. We study how well one can
manage by using general purpose database management
systems. We illustrate our ideas by investigating the
use of a dbms for a well-researched area: the discovery
of association rules. We present a simple algorithm,
consisting of only union and intersection operations,
and show that it achieves quite good performance on an
efficient dbms. Our method can incorporate inheritance
hierarchies to the association rule algorithm easily.
We also present a technique that effectively reduces
the number of database operations when searching large
search spaces that contain only few interesting items.
Our work shows that database techniques are promising
for data mining: general architectures can achieve
reasonable results.",
note = "AA (Department of Algorithmics and Architecture)",
annote = "Originally contained the following fields and values -
booktitle, 128, note,CS-R9531",
}
Data Mining, The Search for Knowledge in Databases, Marcel Holsheimer and Arno Siebes
@TechReport{holsheimer.ea:search:94,
URL = "ftp://ftp.cwi.nl/pub/CWIreports/AA/CS-R9406.ps.Z",
title = "Data Mining, The Search for Knowledge in Databases",
author = "Marcel Holsheimer and Arno Siebes",
pages = "88",
institution = "CWI, Amsterdam",
address = "PO Box 94079, 1090 GB Amsterdam, The Netherlands",
keywords = "Data Mining, Knowlege Engineering, Database
applications, Information search and Retrieval.",
number = "CS-R9406",
type = "Report",
year = "1994",
annote = "Large (88 pages), Includes information on ID3, AQ15,
CN2, DBLearn, Meta-Dendral, Radix/Rx, Bacon and KEDS.",
abstract = "Data mining is the search for relationships and global
patterns that exist in large databases, but are
`hidden' among the vast amounts of data, such as a
relationship between patient data and their medical
diagnosis. These relationships represent valuable
knowledge about the database and objects in the
database and, if the database is a faithful mirror, of
the real world registered by the database.
Extraction and applications of statistical relationships in relational databases, W. C. Hou
@Article{hou:extraction-applications:96,
author = "W. C. Hou",
address = "So Illinois Univ, Dept Comp Sci, Carbondale, Il,
62901",
title = "Extraction and applications of statistical
relationships in relational databases",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1996",
volume = "8",
issue = "6",
pages = "939--945",
abstract = "In this paper, we discuss modeling and extraction of
statistical relationships among attributes. Different
methods are used for extraction of different types of
relationships. A complete methodology for extraction is
developed by integrating widely accepted statistical
methods. Statistical relationships manifest embedded
relationships in data and thus lend themselves
naturally to estimating unknown attribute values and
detecting unlikely values. We will carefully examine
these applications and evaluate the usefulness of
statistical relationships in these applications using a
real-life database.",
keywords = "data mining, estimating unknown attribute values,,
integration of data mining techniques, integrity
constraints, knowledge discovery in databases,
statistical relationships",
}
Set-Oriented Mining for Association Rules in Relational Databases, M. Houtsma and A. Swami
@InProceedings{houtsma.ea:set-oriented-association:95,
author = "M. Houtsma and A. Swami",
title = "Set-Oriented Mining for Association Rules in
Relational Databases",
pages = "25--34",
editor = "P. S. Yu and A. L. P. Chen",
booktitle = "Proceedings of the 11th International Conference on
Data Engineering",
month = mar,
publisher = "IEEE Computer Society Press",
address = "Los Alamitos, CA, USA",
year = "1995",
}
Set-oriented data mining in relational databases, M. Houtsma and A. Swami
@Article{houtsma.ea:set-oriented-relational:95,
author = "M. Houtsma and A. Swami",
address = "Telemat Res Ctr, Pob 217, 7500 Ae Enschede,
Netherlands Univ Twente, 7500 Ae Enschede, Netherlands
Ibm Corp, Almaden Res Ctr, San Jose, Ca",
title = "Set-oriented data mining in relational databases",
journal = "Data \& Knowledge Engineering",
year = "1995",
volume = "17",
issue = "3",
pages = "245--262",
abstract = "Data mining is an important real-life application for
businesses. It is critical to find efficient ways of
mining large data sets. In order to benefit from the
experience with relational databases, a set-oriented
approach to mining data is needed. In such an approach,
the data mining operations are expressed in terms of
relational or set-oriented operations. Query
optimization technology can then be used for efficient
processing. In this paper, we describe set- oriented
algorithms for mining association rules. Such
algorithms imply performing multiple joins and thus may
appear to be inherently less efficient than special-
purpose algorithms. We develop new algorithms that can
be expressed as SQL queries, and discuss optimization
of these algorithms. After analytical evaluation, an
algorithm named SETM emerges as the algorithm of
choice. Algorithm SETM uses only simple database
primitives, viz., sorting and merge-scan join.
Algorithm SETM is simple, fast, and stable over the
range of parameter values. It is easily parallelized
and we suggest several additional optimizations. The
set-oriented nature of Algorithm SETM makes it possible
to develop extensions easily and its performance makes
it feasible to build interactive data mining tools for
large databases.",
keywords = "DATA MINING, OPTIMIZATION, SET-ORIENTED ALGORITHMS",
}
Learning in relational databases - a rough set approach, X. H. Hu and N. Cercone
@Article{hu.ea:learning-relational:95,
author = "X. H. Hu and N. Cercone",
address = "Univ Regina, Dept Comp Sci, Regina, Sk S4S 0A2,
Canada",
title = "Learning in relational databases - a rough set
approach",
journal = "Computational Intelligence",
year = "1995",
volume = "11",
issue = "2",
pages = "323--338",
abstract = "Knowledge discovery in databases, or data mining, is
an important direction in the development of data and
knowledge- based systems. Because of the huge amount of
data stored in large numbers of existing databases, and
because the amount of data generated in electronic
forms is growing rapidly, it is necessary to develop
efficient methods to extract knowledge from databases.
An attribute- oriented rough set approach has been
developed for knowledge discovery in databases. The
method integrates machine-learning paradigm, especially
learning-from-examples techniques, with rough set
techniques. An attribute-oriented concept tree
ascension technique is first applied in generalization,
which substantially reduces the computational
complexity of database learning processes. Then the
cause-effect relationship among the attributes in the
database is analyzed using rough set techniques, and
the unimportant or irrelevant attributes are
eliminated. Thus concise and strong rules with little
or no redundant information can be learned efficiently.
Our study shows that attribute-oriented induction
combined with rough set theory provide an efficient and
effective mechanism for knowledge discovery in database
systems.",
keywords = "KNOWLEDGE DISCOVERY IN DATABASES, MACHINE LEARNING,
ROUGH SET, ATTRIBUTE-ORIENTED INDUCTION",
}
Mining Knowledge Rules from Databases: A Rough Set Approach, X. Hu and N. Cercone
@InProceedings{hu.ea:rules-rough:96,
author = "X. Hu and N. Cercone",
title = "Mining Knowledge Rules from Databases: {A} Rough Set
Approach",
pages = "96--105",
booktitle = "Proceedings of the 12th International Conference on
Data Engineering",
month = feb,
publisher = "IEEE Computer Society",
address = "Washington - Brussels - Tokyo",
year = "1996",
}
Knowledge Discovery in Databases: An Attribute-Oriented Rough Set Approach, Tony Xiaohua Hu
@PhdThesis{hu:attribute-oriented-rough:,
title = "Knowledge Discovery in Databases: An
Attribute-Oriented Rough Set Approach",
author = "Tony Xiaohua Hu",
URL = "http://www.cs.bham.ac.uk/~anp/dm_docs/tony_xiaohua_hu.ps.gz",
}
Conceptual Clustering and Concept Hierarchies in Knowledge Discovery., Xiaohua Hu
@MastersThesis{hu:conceptual-clustering:93,
URL = "ftp://fas.sfu.ca/pub/cs/theses/1993/XiaohuaHuMSc.ps.Z",
title = "Conceptual Clustering and Concept Hierarchies in
Knowledge Discovery.",
author = "Xiaohua Hu",
year = "1993",
month = jan,
abstract = "Knowledge discovery is the nontrivial extraction of
implicit, previously unknown, and potentially useful
information from data. Knowledge discovery from a
database is a form of machine learning where the
discovered knowledge is represented in a high-level
language. The growth in the size and number of existing
databases far exceeds human abilities to analyse the
data, which creates both a need and an opportunity for
extracting knowledge from databases. In this thesis, I
propose two algorithms for knowledge discovery in
database systems. One algorithm finds knowledge rules
associated with concepts in the different levels of the
conceptual hierarchy; the algorithm is developed based
on earlier attribute-oriented conceptual ascension
techniques. The other algorithm combines a conceptual
clustering technique and machine learning. It can find
three kinds of rules, characteristic rules, inheritance
rules, and domain knowledge, even in the absence of a
conceptual hierarchy. The two algorithms are
implemented as a component of the database learning
system (DBLEARN) using C under Sybase/Unix environment.
The test of DBLEARN on NSERC's grant information system
shows that our method can discover many meaningful
knowledge rules very quickly. The application of
knowledge discovery in database is very wide. I will
discuss how to apply DBLEARN to a lot of
data-intensified areas such as Hospital's patient
information system, customer database of telephone
company , airplane company and bank, inventory system
of department store and so on to find some intesesting
rules hidden among the data, and how the people in
these companies can use these learned rules to help
them.",
annote = "M.Sc Thesis. From Simon Fraser University. The
supervisiors are Nick Cercone and Jiawei Han. It
discusses extensions to DBLearn. 88pages.",
}
Object aggregation and cluster identification - a knowledge discovery approach, X. H. Hu
@Article{hu:object-aggregation:94,
author = "X. H. Hu",
address = "Univ Regina, Dept Comp Sci, Regina S4S 0A2,
Saskatchewan, Canada",
title = "Object aggregation and cluster identification - a
knowledge discovery approach",
journal = "Applied Mathematics Letters",
year = "1994",
volume = "7",
issue = "4",
pages = "29--34",
abstract = "A method for object aggregation and cluster
identification has been proposed for knowledge
discovery in databases. By integrating conceptual
clustering and machine learning (especially
learning-from- examples) paradigms, the method
classifies the data into different clusters, extracts
the characteristics of each cluster, and discovers
knowledge rules based on the relationships among
different clusters. Different kinds of knowledge rules,
including hierarchical, equivalence an inheritance
rules can be discovered efficiently.",
keywords = "KNOWLEDGE DISCOVERY IN DATABASES, CONCEPTUAL
CLUSTERING",
}
From Large to Huge: A Statistician's Reactions to KDD \& DM, Peter J. Huber
@InProceedings{huber:large-to:97,
title = "From Large to Huge: {A} Statistician's Reactions to
{KDD} \& {DM}",
author = "Peter J. Huber",
pages = "304",
crossref = "heckerman.ea:proceedings-third:97",
}
Knowledge acquisition planning for inference from large databases., L. Hunter
@Article{hunter:acquisition-planning:90,
author = "L. Hunter",
title = "Knowledge acquisition planning for inference from
large databases.",
journal = "Hawaii Int. Conf. Sys. Sci-23",
volume = "2",
pages = "35--44",
publisher = "IEEE",
year = "1990",
keywords = "AI, database data base, mining, HICSS HICSS23
HICSS90",
}
A Radial Basis Function Approach to Financial Time Series Analysis, James M. Hutchinson
@TechReport{hutchinson:radial-basis:93,
title = "A Radial Basis Function Approach to Financial Time
Series Analysis",
author = "James M. Hutchinson",
institution = "Artificial Intelligence Laboratory, Massachusetts
Institute of Technology (MIT)",
address = "Cambridge, Massachusetts",
month = dec,
year = "1993",
pages = "160",
URL = "ftp://publications.ai.mit.edu/ai-publications/1000-1499/AITR-1457.ps.Z",
abstract = "Nonlinear multivariate statistical techniques on fast
computers offer the potential to capture more of the
dynamics of the high dimensional, noisy systems
underlying financial markets than traditional models,
while making fewer restrictive assumptions. This thesis
presents a collection of practical techniques to
address important estimation and confidence issues for
Radial Basis Function networks arising from such a data
driven approach, including efficient methods for
parameter estimation and pruning, a pointwise
prediction error estimator, and a methodology for
controlling the ``data mining'' problem. Novel
applications in the finance area are described,
including customized, adaptive option pricing and stock
price prediction.",
}
Data Mining: Extending the Information Warehouse Framework,
@Misc{ibm:white-paper,
key = "ibm:white-paper",
title = "Data Mining: Extending the Information Warehouse
Framework",
note = "IBM white paper on data mining",
URL = "http://booksrv2.raleigh.ibm.com/cgi-bin/bookmgr/bookmgr.cmd/BOOKS/datamine",
}
Mining Data, IceBreaker
@Unpublished{icebreaker:mining-data:96,
title = "Mining Data",
author = "IceBreaker",
URL = "http://www.bdt.com/icemfg/ice_it.htm",
keywords = "Data Mining",
month = sep,
year = "1996",
}
Discovering Knowledge in Commercial Databases Using Modern Heuristic Techniques, B. de la Iglesia and J. C. W. Debuse and V. J. Rayward-Smth
@InProceedings{iglesia.ea:discovering-commercial:96,
title = "Discovering Knowledge in Commercial Databases Using
Modern Heuristic Techniques",
pages = "44",
author = "B. de la Iglesia and J. C. W. Debuse and V. J.
Rayward-Smth",
crossref = "simoudis.ea:proceedings-second:96",
}
IJIS Special issue on Knowledge Discovery in Databases and Knowledge Bases, G. Piatetsky-Shapiro (guest editor) (Ed)
@Article{ijis-special-issue:92,
key = "ijis-special-issue:92",
title = "{IJIS} Special issue on Knowledge Discovery in
Databases and Knowledge Bases",
journal = "International Journal of Intelligent Systems",
year = "1992",
volume = "7",
number = "7",
month = sep,
editor = "G. Piatetsky-Shapiro (guest editor)",
note = "Special issue on Knowledge Discovery in Databases and
Knowledge Bases, edited selection of best papers from
AAAI KDD-91 workshop",
}
A database perspective on knowledge discovery, T. Imielinski and H. Mannila
@Article{imielinski.ea:database-perspective:96,
author = "T. Imielinski and H. Mannila",
address = "Rutgers State Univ, Dept Comp Sci, New Brunswick, Nj,
08903 Univ Helsinki, Fin-00014 Helsinki, Finland",
title = "A database perspective on knowledge discovery",
journal = "Comm. Of The Acm",
year = "1996",
volume = "39",
issue = "11",
pages = "58--64",
}
DataMine: Application Programming Interface and Query Language for Database Mining, Tomasz Imielinski and Aashu Virmani and Amin Abdulghani
@InProceedings{imielinski.ea:datamine-application:96,
title = "DataMine: Application Programming Interface and Query
Language for Database Mining",
pages = "256",
author = "Tomasz Imielinski and Aashu Virmani and Amin
Abdulghani",
crossref = "simoudis.ea:proceedings-second:96",
}
Understanding Data Pattern Processing, W. H. Inmon and S. Osterfelt
@Book{inmon.ea:understanding-pattern:91,
title = "Understanding Data Pattern Processing",
author = "W. H. Inmon and S. Osterfelt",
publisher = "QED Techincal Publishing Group",
year = "1991",
address = "Wellesley, MA.",
annote = "Piatetsky : a business-oriented, nontechnical book",
}
The Data Warehouse and Data Mining, W. H. Inmon
@Article{inmon:warehouse:96,
author = "W. H. Inmon",
title = "The Data Warehouse and Data Mining",
journal = "Communications of the ACM",
volume = "39",
number = "11",
pages = "49--50",
month = nov,
year = "1996",
ISSN = "0001-0782",
}
Discovery of Relevant New Features by Generating Non-Linear Decision Trees, Andreas Ittner and Michael Schlosser
@InProceedings{ittner.ea:relevant-new:96,
title = "Discovery of Relevant New Features by Generating
Non-Linear Decision Trees",
pages = "108",
author = "Andreas Ittner and Michael Schlosser",
crossref = "simoudis.ea:proceedings-second:96",
}
Data-Driven Discovery of Quantitative Ruels in Relational Databases, Han. J. and Y. Cai and N. Cercone
@Article{j.ea:data-driven-quantitative:93,
author = "Han. J. and Y. Cai and N. Cercone",
title = "Data-Driven Discovery of Quantitative Ruels in
Relational Databases",
journal = "IEEE Transactions on Knowledge and Data Engineering",
pages = "29--40",
volume = "5",
number = "1",
month = feb,
year = "1993",
}
Adjusting for Multiple Comparisons in Decision Tree Pruning, David Jensen and Matt Schmill
@InProceedings{jensen.ea:adjusting-multiple:97,
title = "Adjusting for Multiple Comparisons in Decision Tree
Pruning",
author = "David Jensen and Matt Schmill",
pages = "195",
crossref = "heckerman.ea:proceedings-third:97",
}
SIPping from the Data Firehose, George H. John and Brian Lent
@InProceedings{john.ea:sipping-firehose:97,
title = "{SIP}ping from the Data Firehose",
author = "George H. John and Brian Lent",
pages = "199",
crossref = "heckerman.ea:proceedings-third:97",
}
Static Versus Dynamic Sampling for Data Mining, George H. John and Pat Langley
@InProceedings{john.ea:static-versus:96,
title = "Static Versus Dynamic Sampling for Data Mining",
pages = "367",
author = "George H. John and Pat Langley",
crossref = "simoudis.ea:proceedings-second:96",
}
Stock selection using rule induction, G. H. John and P. Miller and R. Kerber
@Article{john.ea:stock-selection:96,
author = "G. H. John and P. Miller and R. Kerber",
address = "Ibm Corp, Data Min Grp, Armonk, Ny, 10504 Stanford
Univ, Dept Comp Sci, Stanford, Ca, 94305 Lockheed
Martin Corp, Ctr Artificial Intelligence, Palo Alto,
Ca, 94304",
title = "Stock selection using rule induction",
journal = "Ieee Expert-Intelligent Systems \& Their
Applications",
year = "1996",
volume = "11",
issue = "5",
pages = "52--58",
}
Genetic-algorithm-based learning, Kenneth De Jong
@InCollection{jong:genetic-algorithm-based-learning:90,
author = "Kenneth De Jong",
title = "Genetic-algorithm-based learning",
crossref = "kodratoff.ea:machine-learning:90",
pages = "611--638",
}
Seer: Maximum Likelihood Regression for Learning-Speed Curves, Ph.D. Carl Myers Kadie
@PhdThesis{kadie:seer-maximum:,
author = "Ph.D. Carl Myers Kadie",
title = "Seer: Maximum Likelihood Regression for Learning-Speed
Curves",
URL = "ftp://ftp.cs.uiuc.edu/pub/TechReports/UIUCDCS-R-95-1874.ps.Z",
school = "Department of Computer Science, University of Illinois
at Urbana-Champaign.",
annote = "The research presented here focuses on modeling
machine-learning performance",
}
Mining Generalized Term Associations: Count Propagation Algorithm, Wen-Hsiang Kevin Liao Jonghyun Kahng and Dennis McLeod
@InProceedings{kahng.ea:generalized-term:97,
title = "Mining Generalized Term Associations: Count
Propagation Algorithm",
author = "Wen-Hsiang Kevin Liao Jonghyun Kahng and Dennis
McLeod",
pages = "203",
crossref = "heckerman.ea:proceedings-third:97",
}
Metarule-Guided Mining of Multi-Dimensional Association Rules Using Data Cubes, Micheline Kamber and Jiawei Han and Jenny Y. Chiang
@InProceedings{kamber.ea:metarule-guided-multi-dimensional:97,
title = "Metarule-Guided Mining of Multi-Dimensional
Association Rules Using Data Cubes",
author = "Micheline Kamber and Jiawei Han and Jenny Y. Chiang",
pages = "207",
crossref = "heckerman.ea:proceedings-third:97",
}
Discovering functional and inclusion dependencies in relational databases, M. Kantola and H. Mannila and K. J. Raiha and H. Siirtola
@Article{kantola.ea:discovering-functional:92,
author = "M. Kantola and H. Mannila and K. J. Raiha and H.
Siirtola",
address = "Univ Tampere, Tampere, Finland Univ Helsinki, Sf-00100
Helsinki 10, Finland",
title = "Discovering functional and inclusion dependencies in
relational databases",
journal = "International J. Of Intelligent Systems",
year = "1992",
volume = "7",
issue = "7",
pages = "591--607",
abstract = "We consider the problem of discovering the functional
and inclusion dependencies that a given database
instance satisfies. This technique is used in a
database design tool that uses example databases to
give feedback to the designer. If the examples show
deficiencies in the design, the designer can directly
modify the examples. The tool then infers new
dependencies and the database schema can be modified,
if necessary. The discovery of the functional and
inclusion dependencies can also be used in analyzing an
existing database. The problem of inferring functional
dependencies has several connections to other topics in
knowledge discovery and machine learning. In this
article we discuss the use of examples in the design of
databases, and give an overview of the complexity
results and algorithms that have been developed for
this problem.",
keywords = "DESIGN",
}
Scalable, Distributed Data Mining-An Agent Architecture, Hillol Kargupta and Ilker Hamzaoglu and Brian Stafford
@InProceedings{kargupta.ea:scalable-distributed:97,
title = "Scalable, Distributed Data Mining-An Agent
Architecture",
author = "Hillol Kargupta and Ilker Hamzaoglu and Brian
Stafford",
pages = "211",
crossref = "heckerman.ea:proceedings-third:97",
}
Mining for Knowledge in Databases: Goals and General Description of the INLEN system, Kenneth A. Kaufman and Ryszard S. Michalski and Larry Kerschberg
@InCollection{kaufman.ea:goals-general:91,
crossref = "piatetsky-shapiro.ea:knowledge-discovery:91",
editor = "Gregory Piatetsky-Shapiro and William J. Frawley",
booktitle = "Knowledge Discovery in Databases",
publisher = "AAAI Press / The MIT Press",
address = "Menlo Park, California",
edition = "1st",
year = "1991",
author = "Kenneth A. Kaufman and Ryszard S. Michalski and Larry
Kerschberg",
title = "Mining for Knowledge in Databases: Goals and General
Description of the {INLEN} system",
}
A Method for Reasoning with Structured and Continuous Attributes in the INLEN-2 Multistrategy Knowledge Discovery System, Kenneth A. Kaufman and Ryszard S. Michalski
@InProceedings{kaufman.ea:method-reasoning:96,
title = "A Method for Reasoning with Structured and Continuous
Attributes in the {INLEN}-2 Multistrategy Knowledge
Discovery System",
pages = "232",
author = "Kenneth A. Kaufman and Ryszard S. Michalski",
crossref = "simoudis.ea:proceedings-second:96",
}
Reverse Engineering Databases for Knowledge Discovery, Stephen Mc Kearney and Huw Roberts
@InProceedings{kearney.ea:reverse-engineering:96,
title = "Reverse Engineering Databases for Knowledge
Discovery",
pages = "375",
author = "Stephen Mc Kearney and Huw Roberts",
crossref = "simoudis.ea:proceedings-second:96",
}
Supporting Data Mining of Large Databases by Visual Feedback Queries, D. A. Keim and H.-P. Kriegel and T. Seidl
@TechReport{keim.ea:supporting-large:93,
author = "D. A. Keim and H.-P. Kriegel and T. Seidl",
title = "Supporting Data Mining of Large Databases by Visual
Feedback Queries",
address = "Muenchen",
year = "1993",
descriptor = "Anfrage-Bearbeitung, Benutzerschnittstelle, Datenbank,
Feedback, Visualisierungskomponente",
}
Supporting Data Mining of Large Databases by Visual Feedback Queries, D. A. Keim and H.-P. Kriegel and T. Seidl
@InProceedings{keim.ea:supporting-large:94,
author = "D. A. Keim and H.-P. Kriegel and T. Seidl",
title = "Supporting Data Mining of Large Databases by Visual
Feedback Queries",
pages = "302--313",
editor = "Ahmed K. Elmagarmid and Erich Neuhold",
booktitle = "Proceedings of the 10th International Conference on
Data Engineering",
address = "Houston, TX",
month = feb,
year = "1994",
URL = "http://www.dbs.informatik.uni-muenchen.de/dbs/projekt/papers/datamining.ps",
publisher = "IEEE Computer Society Press",
}
Visualization techniques for mining large databases: a comparison, D. A. Keim and H. P. Kriegel
@Article{keim.ea:techniques-large:96,
author = "D. A. Keim and H. P. Kriegel",
address = "Univ Munich, Inst Comp Sci, Oettingenstr 67, D-80538
Munich, Germany",
title = "Visualization techniques for mining large databases: a
comparison",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1996",
month = dec,
volume = "8",
issue = "6",
pages = "923--938",
abstract = "Visual data mining techniques have proven to be of
high value in exploratory data analysis, and they also
have a high potential for mining large databases. In
this article, we describe and evaluate a new
visualization-based approach to mining large databases.
The basic idea of our visual data mining techniques is
to represent as many data items as possible on the
screen at the same time by mapping each data value to a
pixel of the screen and arranging the pixels
adequately. The major goal of this article is to
evaluate our visual data mining techniques and to
compare them to other well-known visualization
techniques for multidimensional data. the parallel
coordinate and stick figure visualization techniques.
For the evaluation of visual data mining techniques, in
the first place the perception of properties of the
data counts, and only in the second place the CPU time
and the number of secondary storage accesses are
important. In addition to testing the visualization
techniques using real data, we developed a testing
environment for database visualizations similar to the
benchmark approach used for comparing the performance
of database systems. The testing environment allows the
generation of test data sets with predefined data
characteristics which are important for comparing the
perceptual abilities of visual data mining
techniques.",
keywords = "SPACE, data mining, explorative data analysis,
visualizing large databases, visualizing
multidimensional, multivariate data",
}
Using Visualization to Support Data Mining of Large Existing Databases, D. A. Keim and H.-P. Kriegel
@Article{keim.ea:using-to:94,
author = "D. A. Keim and H.-P. Kriegel",
title = "Using Visualization to Support Data Mining of Large
Existing Databases",
journal = "Lecture Notes in Computer Science",
volume = "871",
pages = "210--??",
year = "1994",
ISSN = "0302-9743",
}
VisDB: Database Exploration using Multidimensional Visualization, D. A. Keim and H. Kriegel
@Article{keim.ea:visdb-database:94,
author = "D. A. Keim and H. Kriegel",
title = "Vis{DB}: Database Exploration using Multidimensional
Visualization",
journal = "Computer Graphics and Applications",
year = "1994",
URL = "http://www.dbs.informatik.uni-muenchen.de/dbs/projekt/papers/visdb.ps",
}
Databases and Visualization, D. A. Keim
@InProceedings{keim:databases-and:96,
author = "D. A. Keim",
title = "Databases and Visualization",
note = "Tutorial",
booktitle = "Proc. ACM SIGMOD Int. Conf. on Management of Data",
address = "Montreal, Canada",
year = "1996",
URL = "http://www.dbs.informatik.uni-muenchen.de/~daniel/Sigmod96TutorialNotes.ps",
annote = "Comprehensive tutorial on Database visualisation for
exploratory analysis",
}
Pixel-Oriented Database Visualizations, D. A. Keim
@Article{keim:pixel-oriented-database:96,
author = "D. A. Keim",
title = "Pixel-Oriented Database Visualizations",
journal = "SIGMOD Record (ACM Special Interest Group on
Management of Data)",
volume = "25",
number = "4",
pages = "35--39",
month = dec,
year = "1996",
}
Pixel-oriented Visualization Techniques for Exploring Very Large Databases, D. A. Keim
@Article{keim:pixel-oriented-techniques:96,
author = "D. A. Keim",
title = "Pixel-oriented Visualization Techniques for Exploring
Very Large Databases",
journal = "Journal of Computational and Graphical Statistics",
number = "March",
year = "1996",
URL = "http://www.dbs.informatik.uni-muenchen.de/dbs/projekt/papers/StatisticsPaper.ps",
}
A Probabilistic Approach to Fast Pattern Matching in Time Series Databases, Eamonn Keogh and Padhraic Smyth
@InProceedings{keogh.ea:probabilistic-approach:97,
title = "A Probabilistic Approach to Fast Pattern Matching in
Time Series Databases",
author = "Eamonn Keogh and Padhraic Smyth",
pages = "24",
crossref = "heckerman.ea:proceedings-third:97",
}
On the symbiosis of a data mining environment and a DBMS, Martin L. Kersten and Marcel Holsheimer
@InCollection{kersten.ea:on-symbiosis:95,
author = "Martin L. Kersten and Marcel Holsheimer",
title = "On the symbiosis of a data mining environment and a
{DBMS}",
pages = "12",
publisher = "Centrum voor Wiskunde en Informatica (CWI)",
address = "ISSN 0169-118X",
month = mar # " 30",
year = "1995",
keywords = "data mining, parallel databases, knoewledge discovery
in databases.",
URL = "ftp://ftp.cwi.nl/pub/CWIreports/AA/CS-R9521.ps.Z",
abstract = "One of the main obstacles in applying data mining
techniques to large, real-world databases is the lack
of efficient data management. In this paper, we outline
a two-level architecture, consisting of a mining tool
and a database server. Key elements in its success are
a clear separation of concerns: the mining tool
organizes and controls the search process, while all
data-handling is performed by the parallel main memory
DBMS. Data is stored as a set of binary tables. The
interaction consists of queries for statistical
information. Properties of the DBMS and the search
algorithm are exploited for optimization of the data
handling. In particular, results of previous
computations are re-used, and I/O activity is reduced
by keeping a small hot-set of binary tables in
main-memory. As test results show, this system handles
large datasets at a competitive performance.",
note = "AA (Department of Algorithmics and Architecture)",
annote = "Originally contained the fields and values -
note,CS-R9521 booktitle,92",
}
Clustering Sequences of Complex Objects, A. Ketterlin
@InProceedings{ketterlin:clustering-sequences:97,
title = "Clustering Sequences of Complex Objects",
author = "A. Ketterlin",
pages = "215",
crossref = "heckerman.ea:proceedings-third:97",
}
Privacy and knowledge discovery - a response, Y. T. Khaw and H. Y. Lee
@Article{khaw.ea:privacy-response:95,
author = "Y. T. Khaw and H. Y. Lee",
address = "Natl Comp Board, Inst Informat Technol, Ncb Bldg, 71
Sci Pk Dr, Singapore 0511, Singapore",
title = "Privacy and knowledge discovery - a response",
journal = "Ieee Expert-Intelligent Systems \& Their
Applications",
year = "1995",
volume = "10",
issue = "2",
pages = "58--58",
}
Learning rules with local exceptions, Jyrki Kivinen and Heikki Mannila and Esko Ukkonen
@TechReport{kivinen.ea:learning-rules:93,
author = "Jyrki Kivinen and Heikki Mannila and Esko Ukkonen",
title = "Learning rules with local exceptions",
year = "1993",
institution = "University of Helsinki",
}
Finding interesting rules from large sets of discovered association rules, Mika Klemettinen and Heikki Mannila and Pirjo Ronkainen and Hannu Toivonen and A. Inkeri Verkamo
@InProceedings{klemettinen.ea:finding-interesting:94,
author = "Mika Klemettinen and Heikki Mannila and Pirjo
Ronkainen and Hannu Toivonen and A. Inkeri Verkamo",
booktitle = "Third International Conference on Information and
Knowledge Management (CIKM'94)",
title = "Finding interesting rules from large sets of
discovered association rules",
year = "1994",
URL = "ftp://ftp.cs.helsinki.fi/pub/Reports/by_Project/PMDM/Finding_Interesting_Rules_from_Large_Sets_of_Discovered_Association_Rules.ps.gz",
editor = "Nabil R. Adam and Bharat K. Bhargava and Yelena
Yesha",
keywords = "Knowledge discovery, Data mining, Association rules,
Rule selection, Visualization",
month = nov,
pages = "401--407",
publisher = "ACM Press",
abstract = "Association rules, introduced by Agrawal, Imielinski,
and Swami, are rules of the form ``for 90 \% of the
rows of the relation, if the row has value 1 in the
columns in set $W$, then it has 1 also in column $B$''.
Efficient methods exist for discovering association
rules from large collections of data. The number of
discovered rules can, however, be so large that
browsing the rule set and finding interesting rules
from it can be quite difficult for the user. We show
how a simple formalism of {\em rule templates} makes it
possible to easily describe the structure of
interesting rules. We also give examples of
visualization of rules, and show how a visualization
tool interfaces with rule templates.",
}
Knowledge discovery in databases and data mining, W. Kloesgen
@Article{kloesgen:knowledge-discovery:96,
author = "W. Kloesgen",
title = "Knowledge discovery in databases and data mining",
journal = "Lecture Notes in Computer Science",
volume = "1079",
pages = "623--??",
year = "1996",
ISSN = "0302-9743",
}
Efficient Discovery of Interesting statements in Databases,
@TechReport{klosgen:efficient-interesting:93,
author = "Willi Kl{\"o}sgen",
title = "Efficient Discovery of Interesting statements in
Databases",
institution = "GMD",
year = "1993",
}
Problems for knowledge discovery in databases and their treatment in the statistics interpreter explora, W. Klosgen
@Article{klosgen:problems-their:92,
crossref = "ijis-special-issue:92",
author = "W. Klosgen",
address = "German Natl Res Ctr Comp Sci, St Augustin 1, Germany",
title = "Problems for knowledge discovery in databases and
their treatment in the statistics interpreter explora",
journal = "International J. Of Intelligent Systems",
year = "1992",
volume = "7",
issue = "7",
pages = "649--673",
abstract = "In this article we describe some goals and problems of
KDD. Approaches are presented which have been
implemented in the Statistics Interpreter Explora, a
prototype assistant system for discovering interesting
findings in recurrent datasets. We introduce patterns
to identify what is interesting in data and give some
examples of patterns for difference-, change-, and
trend-detection. Then we summarize what must be
specified to define a pattern. Besides some descriptive
parts, this includes a procedural verification method.
Object-oriented programming techniques can simplify the
specializations of general patterns. We identify search
as a constituent principle of discovery and introduce
object structures as a basis to induce a graph
structure on the search space. We mention several
strategies for graph search and describe approaches for
dealing with the aggregation, redundancy, and
overlapping problems. Then we address the presentation
of findings in natural language and graphical form,
focusing on the methods to design good graphical
presentations by knowledge-based techniques. Finally,
we discuss the paradigm of an adaptive discovery
assistant, including the problem of how to reuse the
discovered knowledge for further discovery.",
}
What makes a compelling empirical-evaluation, K. Knight
@Article{knight:what-makes:96,
author = "K. Knight",
address = "Univ So Calif, Inst Sci Informat, 4676 Admiralty Way,
Marina Del Rey, Ca, 90292 Univ Massachusetts, Dept Comp
Sci, Amherst, Ma, 01003 Inst Study Learning \&
Expertise, Palo Alto, Ca, 94306 Stanford Univ,
Stanford, Ca, 94305",
title = "What makes a compelling empirical-evaluation",
journal = "Ieee Expert-Intelligent Systems \& Their
Applications",
year = "1996",
volume = "11",
issue = "5",
pages = "10--14",
}
Analysing Binary Associations, Arno J. Knobbe and Pieter W. Adriaans
@InProceedings{knobbe.ea:analysing-binary:96,
title = "Analysing Binary Associations",
pages = "311",
author = "Arno J. Knobbe and Pieter W. Adriaans",
crossref = "simoudis.ea:proceedings-second:96",
}
Extraction of Spatial Proximity Patterns by Concept Generalization, Edwin M. Knorr and Raymond T. Ng
@InProceedings{knorr.ea:extraction-spatial:96,
title = "Extraction of Spatial Proximity Patterns by Concept
Generalization",
pages = "347",
author = "Edwin M. Knorr and Raymond T. Ng",
crossref = "simoudis.ea:proceedings-second:96",
}
Finding aggregate proximity relationships and commonalities in spatial data mining, E. M. Knorr and R. T. Ng
@Article{knorr.ea:finding-aggregate:96,
author = "E. M. Knorr and R. T. Ng",
address = "Univ British Columbia, Dept Comp Sci, 2366 Main Mall,
Vancouver, Bc V6T 1Z4, Canada",
title = "Finding aggregate proximity relationships and
commonalities in spatial data mining",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1996",
month = dec,
volume = "8",
issue = "6",
pages = "884--897",
abstract = "In this paper, we study two spatial knowledge
discovery problems involving proximity relationships
between clusters and features. The first problem is:
Given a cluster of points, how can we efficiently find
features (represented as polygons) that are closest to
the majority of points in the cluster? We measure
proximity in an aggregate sense due to the nonuniform
distribution of points in a cluster (e.g., houses on a
map), and the different shapes and sizes of features
(e.g., natural or man-made geographic features). The
second problem is: Given n clusters of points, how can
we extract the aggregate proximity commonalities (i.e.,
features) that apply to most, if not all, of the n
clusters? Regarding the first problem, the main
contribution of the paper is the development of
Algorithm CRH which uses geometric approximations
(i.e., circles, rectangles, and convex hulls) to filter
and select features. Highly scalable and incremental,
Algorithm CRH can examine over 50,000 features and
their spatial relationships with a given cluster in
approximately one second of CPU time. Regarding the
second problem, the key contribution is the development
of Algorithm GenCom that makes use of concept
generalization to effectively derive many meaningful
commonalities that cannot be found otherwise.",
keywords = "spatial knowledge discovery, concept generalization,
proximity relationships, geometric filtering, GIS",
}
A Unified Notion of Outliers: Properties and Computation, Edwin M. Knorr and Raymond T. Ng
@InProceedings{knorr.ea:unified-notion:97,
title = "A Unified Notion of Outliers: Properties and
Computation",
author = "Edwin M. Knorr and Raymond T. Ng",
pages = "219",
crossref = "heckerman.ea:proceedings-third:97",
}
Machine Learning, an Artificial Intelligence approach, Yves Kodratoff and Ryszard S. Michalski (Eds)
@Book{kodratoff.ea:machine-learning:90,
editor = "Yves Kodratoff and Ryszard S. Michalski",
title = "Machine Learning, an {Artificial Intelligence}
approach",
publisher = "Morgan Kaufmann",
year = "1990",
volume = "3",
address = "San Mateo, California",
}
Automatic Parameter Selection by Minimizing Estimated Error, Ron Kohavi and George John
@InProceedings{kohavi.ea:automatic-parameter:95,
author = "Ron Kohavi and George John",
title = "Automatic Parameter Selection by Minimizing Estimated
Error",
booktitle = "Machine Learning: Proceedings of the Twelfth
International Conference",
month = jul,
publisher = "Morgan Kaufmann",
editor = "Armand Prieditis and Stuart Russell",
pages = "304--312",
URL = "http://robotics.stanford.edu/users/ronnyk",
year = "1995",
contributedby = "Ronny Kohavi, ronnyk(at)sgi.com",
}
Bias Plus Variance Decomposition for Zero-One Loss Functions, Ron Kohavi and David H. Wolpert
@InProceedings{kohavi.ea:bias-plus:96,
author = "Ron Kohavi and David H. Wolpert",
title = "Bias Plus Variance Decomposition for Zero-One Loss
Functions",
booktitle = "Machine Learning: Proceedings of the Thirteenth
International Conference",
year = "1996",
publisher = "Morgan Kaufmann",
editor = "Lorenza Saitta",
pages = "275--283",
URL = "http://robotics.stanford.edu/users/ronnyk",
month = jul,
contributedby = "Ronny Kohavi, ronnyk(at)sgi.com",
}
Error-Based and Entropy-Based Discretization of Continuous Features, Ron Kohavi and Mehran Sahami
@InProceedings{kohavi.ea:error-based-entropy-based:96,
author = "Ron Kohavi and Mehran Sahami",
title = "Error-Based and Entropy-Based Discretization of
Continuous Features",
booktitle = "Proceedings of the Second International Conference on
Knowledge Discovery and Data Mining",
pages = "114--119",
URL = "http://robotics.stanford.edu/users/ronnyk",
url2 = "ftp://starry.stanford.edu/pub/ronnyk/disc2.ps",
year = "1996",
contributedby = "Ronny Kohavi, ronnyk(at)sgi.com",
affiliation = "Silicon Graphics Inc.; Stanford University",
crossref = "simoudis.ea:proceedings-second:96",
}
Feature Subset Selection Using the Wrapper Method: Overfitting and Dynamic Search Space Topology, Ron Kohavi and Sommerfield Dan
@InProceedings{kohavi.ea:feature-subset:95,
author = "Ron Kohavi and Sommerfield Dan",
booktitle = "First International Conference on Knowledge Discovery
and Data Mining (KDD-95)",
title = "Feature Subset Selection Using the Wrapper Method:
Overfitting and Dynamic Search Space Topology",
year = "1995",
URL = "ftp://starry.stanford.edu/pub/ronnyk/fssWrapper.ps",
editor = "Usama M Fayyad and Ramasamy Uthurusamy",
keywords = "feature subset selection relevant/irrelevant features
accuracy estimation, cross-validation",
month = aug,
}
Option Decision Trees with Majority Votes, Ron Kohavi and Clayton Kunz
@InProceedings{kohavi.ea:option-decision:97,
author = "Ron Kohavi and Clayton Kunz",
title = "Option Decision Trees with Majority Votes",
booktitle = "Machine Learning: Proceedings of the Fourteenth
International Conference",
year = "1997",
publisher = "Morgan Kaufmann Publishers, Inc.",
editor = "Doug Fisher",
URL = "http://robotics.stanford.edu/users/ronnyk",
month = jul,
contributedby = "Ronny Kohavi, ronnyk(at)sgi.com",
}
Data Mining Using MLC++: A Machine Learning Library in C++, Ron Kohavi and Dan Sommerfield and James Dougherty
@InProceedings{kohavi.ea:using-mlc:96,
author = "Ron Kohavi and Dan Sommerfield and James Dougherty",
title = "Data Mining Using {MLC}++: {A} Machine Learning
Library in {C}++",
booktitle = "Tools with Artificial Intelligence",
year = "1996",
pages = "234--245",
note = "Received the best paper award",
publisher = "IEEE Computer Society Press",
URL = "ftp://starry.stanford.edu/pub/ronnyk/mlc96.ps.Z",
contributedby = "Ronny Kohavi, ronnyk(at)sgi.com",
}
Wrappers for Feature Subset Selection, Ron Kohavi and George H. John
@Article{kohavi.ea:wrappers-feature:,
author = "Ron Kohavi and George H. John",
title = "Wrappers for Feature Subset Selection",
journal = "Artificial Intelligence",
URL = "http://robotics.stanford.edu/users/ronnyk",
year = "(to appear)",
contributedby = "Ronny Kohavi, ronnyk(at)sgi.com",
}
Scaling Up the Accuracy of Naive-Bayes Classifiers: a Decision-Tree Hybrid, Ron Kohavi
@InProceedings{kohavi:scaling-up:96,
author = "Ron Kohavi",
title = "Scaling Up the Accuracy of {N}aive-{B}ayes
Classifiers: a Decision-Tree Hybrid",
booktitle = "Proceedings of the Second International Conference on
Knowledge Discovery and Data Mining",
url2 = "http://robotics.stanford.edu/users/ronnyk",
URL = "ftp://starry.stanford.edu/pub/ronnyk/nbtree.ps",
pages = "202--207",
year = "1996",
contributedby = "Ronny Kohavi, ronnyk(at)sgi.com",
crossref = "simoudis.ea:proceedings-second:96",
}
Wrappers for Performance Enhancement and Oblivious Decision Graphs, Ron Kohavi
@PhdThesis{kohavi:wrappers-performance:95,
author = "Ron Kohavi",
title = "Wrappers for Performance Enhancement and Oblivious
Decision Graphs",
year = "1995",
address = "Computer Science department",
note = "STAN-CS-TR-95-1560",
URL = "ftp://starry.stanford.edu/pub/ronnyk",
school = "Stanford University",
contributedby = "Ronny Kohavi, ronnyk(at)sgi.com",
}
Predictive Data Mining with Finite Mixtures, Petri Kontkanen and Petri Myllymaki and Henry Tirri
@InProceedings{kontkanen.ea:predictive-with:96,
title = "Predictive Data Mining with Finite Mixtures",
pages = "176",
author = "Petri Kontkanen and Petri Myllymaki and Henry Tirri",
crossref = "simoudis.ea:proceedings-second:96",
}
Discovery of spatial association rules in geographic information databases, K. Koperski and J. W. Han
@Article{koperski.ea:spatial-association:95,
author = "K. Koperski and J. W. Han",
address = "Simon Fraser Univ, Sch Comp Sci, Burnaby, Bc V5A 1S6,
Canada",
title = "Discovery of spatial association rules in geographic
information databases",
journal = "Lecture Notes In Computer Science",
year = "1995",
volume = "951",
pages = "47--66",
abstract = "Spatial data mining, i.e., discovery of interesting,
implicit knowledge in spatial databases, is an
important task for understanding and use of spatial
data- and knowledge- bases. In this paper, an efficient
method for mining strong spatial association rules in
geographic information databases is proposed and
studied. A spatial association rule is a rule
indicating certain association relationship among a set
of spatial and possibly some nonspatial predicates. A
strong rule indicates that the patterns in the rule
have relatively frequent occurrences in the database
and strong implication relationships. Several
optimization techniques are explored, including a
two-step spatial computation technique (approximate
computation on large sets, and refined computations on
small promising patterns), shared processing in the
derivation of large predicates at multiple concept
levels, etc. Our analysis shows that interesting
association rules can be discovered efficiently in
large spatial databases.",
}
Quantifiable Data Mining Using Principal Component Analysis, Flip Korn and Alexandros Labrinidis and Yannis Kotidis and Christos Faloutsos and Alex Kaplunovich and Dejan Perkovic
@TechReport{korn.ea:quantifiable-using:97,
author = "Flip Korn and Alexandros Labrinidis and Yannis Kotidis
and Christos Faloutsos and Alex Kaplunovich and Dejan
Perkovic",
title = "Quantifiable Data Mining Using Principal Component
Analysis",
institution = "University of Maryland Institute for Advanced Computer
Studies Dept. of Computer Science, Univ. of Maryland",
number = "CS-TR-3754",
address = "College Park, MD",
month = feb,
year = "1997",
URL = "ftp://ftp.cs.umd.edu/pub/papers/papers/3754/3754.ps.Z",
abstract = "Association Rule Mining algorithms operate on a data
matrix (e.g., customers x products) to derive rules. We
propose a single-pass algorithm for mining linear rules
in such a matrix based on Principal Component Analysis.
PCA detects correlated columns of the matrix, which
correspond to, e.g., products that sell together.\par
The first contribution of this work is that we propose
to quantify the ``goodness'' of a set of discovered
rules. We define the ``guessing error'': the
root-mean-square error of the reconstructed values of
the cells of the given matrix, when we pretend that
they are unknown. The second contribution is a novel
method to guess missing/hidden values from the linear
rules that our method derives. For example, if somebody
bought \$10 of milk and \$3 of bread, our rules can
``guess'' the amount spent on, say, butter. Thus, we
can perform a variety of important tasks such as
forecasting, `what-if' scenarios, outlier detection,
and visualization. Moreover, we show that we can
compute the principal components with a single pass
over the dataset.\par Experiments on real datasets
(e.g., NBA statistics) demonstrate that the proposed
method consistently achieves a ``guessing error'' of up
to 5 times lower than the straightforward
competitor.\par (Also cross-referenced as
UMIACS-TR-97-13)",
}
Mining for Causes of Cancer: Machine Learning Experiments at Various Levels of Detail, Stefan Kramer and Bernhard Pfahringer and Christoph Helma
@InProceedings{kramer.ea:causes-cancer:97,
title = "Mining for Causes of Cancer: Machine Learning
Experiments at Various Levels of Detail",
author = "Stefan Kramer and Bernhard Pfahringer and Christoph
Helma",
pages = "223",
crossref = "heckerman.ea:proceedings-third:97",
}
Efficient Search for Strong Partial Determinations, Stefan Kramer and Bernhard Pfahringer
@InProceedings{kramer.ea:efficient-search:96,
title = "Efficient Search for Strong Partial Determinations",
pages = "371",
author = "Stefan Kramer and Bernhard Pfahringer",
crossref = "simoudis.ea:proceedings-second:96",
}
The Complexity of Data Mining on the Web, Evangelos Kranakis and Danny Krizanc and Andrzej Pelc and David Peleg
@InProceedings{kranakis.ea:complexity-on:96,
author = "Evangelos Kranakis and Danny Krizanc and Andrzej Pelc
and David Peleg",
title = "The Complexity of Data Mining on the Web",
pages = "153--153",
booktitle = "Proceedings of the 15th Annual {ACM} Symposium on
Principles of Distributed Computing",
month = may,
publisher = "ACM",
address = "New York",
year = "1996",
}
Data-Mining Dynamite --- Supercharge your data-mining projects with data cleansing, data warehouses, parallel processing, and mega-storage, Cheryl D. Krivda
@Article{krivda:data-mining-dynamite:95,
author = "Cheryl D. Krivda",
title = "Data-Mining Dynamite --- Supercharge your data-mining
projects with data cleansing, data warehouses, parallel
processing, and mega-storage",
journal = "Byte Magazine",
volume = "20",
number = "10",
pages = "97--??",
month = oct,
year = "1995",
ISSN = "0360-5280",
}
Unearthing Underground Data, Cheryl D Krivda
@Article{krivda:unearthing-underground:96,
author = "Cheryl D Krivda",
title = "Unearthing Underground Data",
journal = "LAN Magazine",
year = "1996",
note = "May 20 - June 2",
URL = "http://www.lanmag.com/9605mine.htm",
}
Multi-class problems and discretization in ICL, W. Van Laer and S. D\vzeroski and L. De Raedt
@InProceedings{laer.ea:multi-class-problems:96,
author = "W. Van Laer and S. D\v{z}eroski and L. De Raedt",
title = "Multi-class problems and discretization in {ICL}",
booktitle = "Proceedings of the MLnet Familiarization Workshop on
Data Mining with Inductive Logic Programing",
pages = "53--60",
year = "1996",
}
Self-Organizing Maps of Document Collections: A New Approach to Interactive Exploration, Krista Lagus and Timo Honkela and Samuel Kaski and Teuvo Kohonen
@InProceedings{lagus.ea:self-organizing-maps:96,
title = "Self-Organizing Maps of Document Collections: {A} New
Approach to Interactive Exploration",
pages = "238",
author = "Krista Lagus and Timo Honkela and Samuel Kaski and
Teuvo Kohonen",
crossref = "simoudis.ea:proceedings-second:96",
}
Discrete Sequence Prediction and Its Applications, P. Laird
@InProceedings{laird:discrete-sequence:92,
author = "P. Laird",
title = "Discrete Sequence Prediction and Its Applications",
year = "1992",
booktitle = "Proc.\ of AAAI-92",
pages = "135--140",
}
Imputation of Missing Data Using Machine Learning Techniques, Kamakshi Lakshminarayan and Steven A. Harp and Robert Goldman and Tariq Samad
@InProceedings{lakshminarayan.ea:imputation-missing:96,
title = "Imputation of Missing Data Using Machine Learning
Techniques",
pages = "140",
author = "Kamakshi Lakshminarayan and Steven A. Harp and Robert
Goldman and Tariq Samad",
crossref = "simoudis.ea:proceedings-second:96",
}
An Empirical Test of the Weighted Effect Approach to Generalized Prediction Using Recursive Neural Nets, Rense Lange
@InProceedings{lange:empirical-test:96,
title = "An Empirical Test of the Weighted Effect Approach to
Generalized Prediction Using Recursive Neural Nets",
pages = "183",
author = "Rense Lange",
crossref = "simoudis.ea:proceedings-second:96",
}
Data-Driven Approaches to Empirical Discovery, Pat Langley and Jan M. Zytkow
@Article{langley.ea:data-driven-approaches:89,
author = "Pat Langley and Jan M. Zytkow",
title = "Data-Driven Approaches to Empirical Discovery",
journal = "Artificial Intelligence",
pages = "283--312",
volume = "40",
month = sep,
year = "1989",
}
Rediscovering chemistry with the Bacon system, Pat Langley and Gary L. Bradshaw and Herbert A. Simon
@InCollection{langley.ea:rediscovering-chemistry:86,
author = "Pat Langley and Gary L. Bradshaw and Herbert A.
Simon",
title = "Rediscovering chemistry with the {Bacon} system",
crossref = "michalski.ea:machine-learning:86",
pages = "307--329",
}
The Search for Regularity: Four Aspects of Scientific Discovery, Pat Langley and Jan M. Zytkow and Herbert A. Simon and Gary L. Bradshaw
@InCollection{langley.ea:search-regularity:86,
author = "Pat Langley and Jan M. Zytkow and Herbert A. Simon and
Gary L. Bradshaw",
title = "The Search for Regularity: Four Aspects of Scientific
Discovery",
crossref = "michalski.ea:machine-learning:86",
pages = "425--469",
}
Induction of Condensed Determinations, Pat Langley
@InProceedings{langley:induction-condensed:96,
title = "Induction of Condensed Determinations",
pages = "327",
author = "Pat Langley",
crossref = "simoudis.ea:proceedings-second:96",
}
A Context-Sensitive Discretization of Numeric Attributes for Classification Learning, Changhwan Lee and Dong-Guk Shin
@InProceedings{lee.ea:context-sensitive-discretization:94,
author = "Changhwan Lee and Dong-Guk Shin",
title = "A Context-Sensitive Discretization of Numeric
Attributes for Classification Learning",
booktitle = "ECAI 94. Proceeding of the 11th European Conference on
Artificial Intelligence",
publisher = "John Wiley and Sons, Ltd",
year = "1994",
pages = "428--432",
}
Database summarization using fuzzy isa hierarchies, D. H. Lee and M. H. Kim
@Article{lee.ea:database-summarization:97,
author = "D. H. Lee and M. H. Kim",
address = "Chonnam Natl Univ, Dept Comp Sci, Kwangju, South Korea
Korea Adv Inst Sci \& Technol, Dept Comp Sci, Taejon
305701, South Korea",
title = "Database summarization using fuzzy isa hierarchies",
journal = "Ieee Trans. On Systems Man And Cybernetics Part B-
Cybernetics",
year = "1997",
volume = "27",
issue = "1",
pages = "68--78",
abstract = "Summ. discovery is one of the major components of
knowledge discovery in databases, which provides the
user with comprehensive information for grasping the
essence from a large amount of information in a
database. In this paper, we propose an interactive
top-down summary discovery process which utilizes fuzzy
ISA hierarchies as domain knowledge. We define a
generalized tuple as a representational form of a
database summary including fuzzy concepts. By virtue of
fuzzy ISA hierarchies where fuzzy ISA relationships
common in actual domains are naturally expressed, the
discovery process comes up with more accurate database
summaries, We also present an informativeness measure
for distinguishing generalized tuples that delivers
much information to users, based on Shannon's
information theory.",
keywords = "data mining, fuzzy set application, summary
discovery",
}
A Hypothesis Refinement Method for Summary Discovery in Databases, Do Heon Lee and Myoung Ho Kim
@InProceedings{lee.ea:hypothesis-refinement:93,
author = "Do Heon Lee and Myoung Ho Kim",
title = "A Hypothesis Refinement Method for Summary Discovery
in Databases",
pages = "274--282",
editor = "Bharat Bhargava and Timothy Finin and Yelena Yesha",
booktitle = "Proceedings of the 2nd International Conference on
Information and Knowledge Management",
month = nov,
publisher = "ACM Press",
address = "New York, NY, USA",
year = "1993",
}
Visualization support for data mining, H. Y. Lee and H. L. Ong
@Article{lee.ea:support:96,
author = "H. Y. Lee and H. L. Ong",
address = "Inst Informat Technol, Japan Singapore Artificial
Intelligence Ctr, 11 Sci Pk Rd, Singapore 117685,
Singapore",
title = "Visualization support for data mining",
journal = "Ieee Expert-Intelligent Systems \& Their
Applications",
year = "1996",
volume = "11",
issue = "5",
pages = "69--75",
annote = "Discusses Winvis System",
URL = "http://jsaic.iti.gov.sg/pubs/papers/papers_archive/IEEEpub.zip",
keywords = "visualisation, Winviz, parallel co-ordinates",
}
EURISKO: A program that learns new heuristics and domain concepts. The nature of heuristics III: Background and examples, D. Lenat
@Article{lenat:eurisko-program:83,
author = "D. Lenat",
title = "{EURISKO}: {A} program that learns new heuristics and
domain concepts. {T}he nature of heuristics {III}:
Background and examples",
journal = "Artificial Intelligence",
year = "1983",
pages = "61--98",
volume = "21",
}
Discovering Trends in Text Databases, Brian Lent and Rakesh Agrawal and Ramakrishnan Srikant
@InProceedings{lent.ea:discovering-trends:97,
title = "Discovering Trends in Text Databases",
author = "Brian Lent and Rakesh Agrawal and Ramakrishnan
Srikant",
pages = "227",
crossref = "heckerman.ea:proceedings-third:97",
}
A Framework for Integrating Fault Diagnosis and Incremental Knowledge Acquisition in Connectionist Expert Systems, J. H. Lim and H. C. Lui and P. Z. Wang and
@InProceedings{lim.ea:framework-integrating:92,
author = "J. H. Lim and H. C. Lui and P. Z. Wang and",
title = "A Framework for Integrating Fault Diagnosis and
Incremental Knowledge Acquisition in Connectionist
Expert Systems",
year = "1992",
booktitle = "Proceedings of AAAI-92",
pages = "159--164",
}
Data mining - tools and techniques, P. R. Limb and G. J. Meggs
@Article{limb.ea:tools-techniques:94,
author = "P. R. Limb and G. J. Meggs",
address = "British Telecommun Labs, Martlesham Heath, Ipswich 1P5
7Re, Suffolk, England",
title = "Data mining - tools and techniques",
journal = "Bt Technology J.",
year = "1994",
volume = "12",
issue = "4",
pages = "32--41",
abstract = "The amount of data collected by large
telecommunications companies like BT is vast. In order
to turn this voluminous data into valuable information
it is necessary to apply analysis techniques to build
models and characteristics of data. This paper gives an
overview of a range of techniques used for data
analysis collectively known as data mining. Three broad
categories of data mining techniques are suggested and
the reader is introduced to popular algorithms within
each category. References to additional algorithms are
also presented so that the reader may gain more
detailed information if required.",
}
Dimensionality reduction via discretization, H. Liu and R. Setiono
@Article{liu.ea:dimensionality-reduction:96,
author = "H. Liu and R. Setiono",
address = "Natl Univ Singapore, Dept Informat Syst \& Comp Sci,
Singapore 0511, Singapore",
title = "Dimensionality reduction via discretization",
journal = "Knowledge-Based Systems",
year = "1996",
volume = "9",
issue = "1",
pages = "67--72",
abstract = "The existence of numeric data and large numbers of
records in a database present a challenging task in
terms of explicit concepts extraction from the raw
data. The paper introduces a method that reduces data
vertically and horizontally, keeps the discriminating
power of the original data, and paves the way for
extracting concepts. The method is based on
discretization (vertical reduction) and feature
selection (horizontal reduction). The experimental
results show that (a) the data can be effectively
reduced by the proposed method; (b) the predictive
accuracy of a classifier (C4.5) can be improved ai-ter
data and dimensionality reduction; and (c) the
classification rules learned are simpler.",
keywords = "DIMENSIONALITY REDUCTION, DISCRETIZATION, KNOWLEDGE
DISCOVERY",
}
Using General Impressions to Analyze Discovered Classification Rules, Bing Liu and Wynne Hsu and Shu Chen
@InProceedings{liu.ea:using-general:97,
title = "Using General Impressions to Analyze Discovered
Classification Rules",
author = "Bing Liu and Wynne Hsu and Shu Chen",
pages = "31",
crossref = "heckerman.ea:proceedings-third:97",
}
Expert systems, clinical-data analyses, and knowledge discovery - the posch ai project, J. M. Long and J. R. Slagle
@Article{long.ea:expert-systems:92,
author = "J. M. Long and J. R. Slagle",
address = "Univ Minnesota, Dept Surg, Box 290 Umhc, 420 Delaware
St Se, Minneapolis, Mn, 55455",
title = "Expert systems, clinical-data analyses, and knowledge
discovery - the posch ai project",
journal = "Annals Of The New York Academy Of Sciences",
year = "1992",
volume = "670",
pages = "146--154",
}
Application of Clausal Discovery to Temporal Databases, D. Lorenzo
@InProceedings{lorenzo:application-clausal:96,
author = "D. Lorenzo",
title = "Application of Clausal Discovery to Temporal
Databases",
booktitle = "Proceedings of the MLnet Familiarization Workshop on
Data Mining with Inductive Logic Programing",
pages = "25--40",
year = "1996",
}
Effective data mining using neural networks, H. J. Lu and R. Setiono and H. Liu
@Article{lu.ea:effective-using:96,
author = "H. J. Lu and R. Setiono and H. Liu",
address = "Natl Univ Singapore, Dept Informat Syst \& Comp Sci,
Lower Kent Ridge Rd, Singapore 119260, Singapore",
title = "Effective data mining using neural networks",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1996",
volume = "8",
issue = "6",
pages = "957--961",
abstract = "Classification is one of the data mining problems
receiving great attention recently in the database
community. This paper presents an approach to discover
symbolic classification rules using neural networks.
Neural networks have not been thought suited for data
mining because how the classifications were made is not
explicitly stated as symbolic rules that are suitable
for verification or interpretation by humans. With the
proposed approach, concise symbolic rules with high
accuracy can be extracted from a neural network. The
network is first trained to achieve the required
accuracy rate. Redundant connections of the network are
then removed by a network pruning algorithm. The
activation values of the hidden units in the network
are analyzed, and classification rules are generated
using the result of this analysis. The effectiveness of
the proposed approach is clearly demonstrated by the
experimental results on a set of standard data mining
test problems.",
keywords = "data mining, neural networks, rule extraction, network
pruning, classification",
}
NeuroRule: A Connectionist Approach to Data Mining, H. Lu and R. Setiono and H. Liu
@Article{lu.ea:neurorule-connectionist:95,
author = "H. Lu and R. Setiono and H. Liu",
journal = "Proc. of VLDB95",
title = "{N}euro{R}ule: {A} Connectionist Approach to Data
Mining",
year = "1995",
URL = "http://www.iscs.nus.sg/~liuh/vldb95.ps",
keywords = "Neural Networks, Data Mining, Classification Rules",
}
Efd - a hybrid knowledge statistical-based system for the detection of fraud, J. A. Major and D. R. Riedinger
@Article{major.ea:efd-hybrid:92,
author = "J. A. Major and D. R. Riedinger",
address = "Travelers Insurance Co, Hartford, Ct, 06183",
title = "Efd - a hybrid knowledge statistical-based system for
the detection of fraud",
crossref = "ijis-special-issue:92",
journal = "International J. Of Intelligent Systems",
year = "1992",
volume = "7",
issue = "7",
pages = "687--703",
abstract = "EFD (Electronic Fraud Detection) assists Investigative
Consultants in the Managed Care \& Employee Benefits
Security Unit of The Travelers Insurance Companies in
the detection and preinvestigative analysis of
healthcare provider fraud. The task EFD performs,
scanning a large population of health insurance claims
in search of likely fraud, has never been done
manually. Furthermore, the available database has few
positive examples. Thus, neither existing knowledge
engineering techniques nor statistical methods are
sufficient for designing the identification process. To
overcome these problems, EFD uses knowledge discovery
techniques on two levels. First, EFD integrates expert
knowledge with statistical information assessment to
identify cases of unusual provider behavior. The heart
of EFD is 27 behavioral heuristics, knowledge-based
ways of viewing and measuring provider behavior. Rules
operate on them to identify providers whose behavior
merits a closer look by the Investigative Consultants.
Second, machine learning is used to develop new rules
and improve the identification process. Pilot
operations involved analysis of nearly 22 000 providers
in six metropolitan areas. The pilot is implemented in
SAS Institute's SAS(R) System, AICorp's Knowledge Base
Management System (KBMS(R)), and Borland
International's Turbo Prolog(R).",
keywords = "Statistics, Frontiers, Finance, natural language
reports",
}
CUPID - An Iterative Knowledge Discovery Framework, Max Bramer Jason Mallen
@Article{mallen:cupid-iterative:,
URL = "http://osiris.sis.port.ac.uk/technical_reports_index/kdpap.html",
title = "{CUPID} - An Iterative Knowledge Discovery Framework",
note = "Presented at ES94 (12/10/94)",
author = "Max Bramer Jason Mallen",
address = "University of Portsmouth, UK",
abstract = "This paper describes the novel Knowledge Discovery
system CUPID. Knowledge Discovery from Databases (KDD)
is concerned with utilising techniques borrowed from
fields such as machine learning (ML), statistics and
databases to search for relationships and global
patterns that may exist in large databases, but are
`hidden' among the vast amounts of data. The discovered
knowledge can be helpful for building knowledge based
systems and data analysis. The underlying principle
behind CUPID is the use of a quantitative measure for
the `interest' of a hypotheses. This measure provides a
method of ranking competing hypotheses and thus allows
the system to store the 'best' or 'most interesting'
rules describing a database. CUPID is based on the
ITRule algorithm of (Smyth \& Goodman, 1992) and
extends that algorithm with added functionality. CUPID
provides four fundamental features. One, background
knowledge in the form of attribute value generalisation
hierarchies may be utilised. Two, prior domain
knowledge which may be incorrect and incomplete may be
provided by a domain expert. Three, knowledge may be
re-used. Four, noise in the data set is handled in a
well founded manner.",
}
Induction of Decision trees from Complex Structured Data, Michel Manago and Yves Kodratoff
@InCollection{manago.ea:induction-decision:91,
crossref = "piatetsky-shapiro.ea:knowledge-discovery:91",
editor = "Gregory Piatetsky-Shapiro and William J. Frawley",
booktitle = "Knowledge Discovery in Databases",
publisher = "AAAI Press / The MIT Press",
address = "Menlo Park, California",
edition = "1st",
year = "1991",
author = "Michel Manago and Yves Kodratoff",
title = "Induction of Decision trees from Complex Structured
Data",
pages = "289--306",
}
Algorithms for inferring functional-dependencies from relations, H. Mannila and K. J. Raiha
@Article{mannila.ea:algorithms-inferring:94,
author = "H. Mannila and K. J. Raiha",
address = "Univ Helsinki, Dept Comp Sci, Pob 26, Sf-90014
Helsinki, Finland Univ Tampere, Dept Comp Sci, Sf-33101
Tampere, Finland",
title = "Algorithms for inferring functional-dependencies from
relations",
journal = "Data \& Knowledge Engineering",
year = "1994",
volume = "12",
issue = "1",
pages = "83--99",
abstract = "The dependency inference problem is to find a cover of
the set of functional dependencies that hold in a given
relation. The problem has applications in relational
database design, in query optimization, and in
artificial intelligence. The problem is exponential in
the number of attributes. We develop two algorithms
with better best case behavior than the simple one. One
algorithm reduces the problem to computing the
transversal of a hypergraph. The other is based on
repeatedly sorting the relation with respect to a set
of attributes.",
keywords = "ARMSTRONG RELATIONS, DESIGN, FUNCTIONAL DEPENDENCIES,
MACHINE DISCOVERY, DATA MINING, ALGORITHMS",
}
Discovering Frequent Episodes in Sequences, H. Mannila and H. Toivonen and A. I. Verkamo
@InProceedings{mannila.ea:discovering-frequent-episodes-in-sequences:95,
author = "H. Mannila and H. Toivonen and A. I. Verkamo",
title = "{Discovering Frequent Episodes in Sequences}",
booktitle = "Proceedings of the First International Conference on
Knowledge Discovery and Data Mining (KDD-95)",
year = "1995",
address = "Montreal, Canada",
month = aug,
publisher = "AAAI Press",
editor = "U. M. Fayyad and R. Uthurusamy",
URL = "ftp://ftp.cs.helsinki.fi/pub/Reports/by_Project/PMDM/Finding_Frequent_Episodes_in_Sequences.ps.gz",
keywords = "Knowledge discovery, Data mining, Sequence analysis,
Episode discovery",
abstract = "Sequences of events describing the behavior and
actions of users or systems can be collected in several
domains. In this paper we consider the problem of
recognizing frequent episodes in such sequences of
events. An episode is defined to be a collection of
events that occur within time intervals of a given size
in a given partial order. Once such episodes are known,
one can produce rules for describing or predicting the
behavior of the sequence. We describe an efficient
algorithm for the discovery of all frequent episodes
from a given class of episodes, and present
experimental results.",
}
Discovering Generalized Episodes Using Minimal Occurrences, Heikki Mannila and Hannu Toivonen
@InProceedings{mannila.ea:discovering-generalized:96,
title = "Discovering Generalized Episodes Using Minimal
Occurrences",
pages = "146",
author = "Heikki Mannila and Hannu Toivonen",
crossref = "simoudis.ea:proceedings-second:96",
}
Efficient algorithms for discovering association rules, Heikki Mannila and Hannu Toivonen and A. Inkeri Verkamo
@InProceedings{mannila.ea:efficient-algorithms:94,
author = "Heikki Mannila and Hannu Toivonen and A. Inkeri
Verkamo",
booktitle = "AAAI Workshop on Knowledge Discovery in Databases
(KDD-94)",
title = "Efficient algorithms for discovering association
rules",
year = "1994",
URL = "ftp://ftp.cs.helsinki.fi/pub/Reports/by_Project/PMDM/Efficient_Algorithms_for_Discovering_Association_Rules.ps.gz",
editor = "Usama M. Fayyad and Ramasamy Uthurusamy",
address = "Seattle, Washington",
publisher = "AAAI Press",
keywords = "Knowledge discovery, Data mining, Association rules",
month = jul,
pages = "181--192",
abstract = "Association rules are statements of the form ``for 90
\% of the rows of the relation, if the row has value 1
in the columns in set $W$, then it has 1 also in column
$B$''. Agrawal, Imielinski, and Swami introduced the
problem of mining association rules from large
collections of data, and gave a method based on
successive passes over the database. We give an
improved algorithm for the problem. The method is based
on careful combinatorial analysis of the information
obtained in previous passes; this makes it possible to
eliminate unnecessary candidate rules. Experiments on a
university course enrollment database indicate that the
method outperforms the previous one by a factor of 5.
We also show that sampling is in general a very
efficient way of finding such rules.",
}
Multiple Uses of Frequent Sets and Condensed Representations: Extended Abstract, Heikki Mannila and Hannu Toivonen
@InProceedings{mannila.ea:multiple-uses:96,
title = "Multiple Uses of Frequent Sets and Condensed
Representations: Extended Abstract",
pages = "189",
author = "Heikki Mannila and Hannu Toivonen",
crossref = "simoudis.ea:proceedings-second:96",
}
Data mining and machine learning (abstract), Heikki Mannila
@InProceedings{mannila:machine-learning:96,
author = "Heikki Mannila",
title = "Data mining and machine learning (abstract)",
booktitle = "Proc. 13th International Conference on Machine
Learning",
publisher = "Morgan Kaufmann",
year = "1996",
pages = "555",
}
Data mining - here we go again - guest-editors introduction, B. Mark
@Article{mark:here-we:96,
author = "B. Mark",
address = "Natl Semicond Architecture Lab, 2900 Semicond Dr, M-S
E-100, Santa Clara, Ca, 95052",
title = "Data mining - here we go again - guest-editors
introduction",
journal = "Ieee Expert-Intelligent Systems \& Their
Applications",
year = "1996",
volume = "11",
issue = "5",
pages = "18--19",
}
Inference in mls database-systems, D. G. Marks
@Article{marks:inference-mls:96,
author = "D. G. Marks",
address = "Us Dept Def, Off Infosec Comp Sci, Ft George G Meade,
Md, 20755",
title = "Inference in mls database-systems",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1996",
volume = "8",
issue = "1",
pages = "46--55",
abstract = "Database systems that contain information of varying
degrees of sensitivity pose the threat that some of the
Low data may infer High data. This study derives
conditions sufficient to identify such inference
threats. First, it is reasoned that a database can only
control material implications, as specified in formal
logic systems. These material implications are found
using Knowledge Discovery techniques. Material
implications allow reasoning about outside knowledge,
and provide the first assurance that outside knowledge
does not assist in circumventing the inference
controls. Database queries specify the properties of
sets of data and are compared to help determine
inferences. These queries are grouped into equivalence
classes based upon their inference characteristics. A
unique graph based model is developed for the
equivalence classes that 1) makes such comparisons
easy, and 2) allows implementation of an algorithm
capable of finding those material implication rules
where High data is inferred from Low data. This is the
first method that offers assurance and sufficiency
arguments that the mechanism is at least strong enough
to protect the High data in the database from inference
attacks that require Low data.",
keywords = "INFERENCE, DATABASE SECURITY, KNOWLEDGE DISCOVERY,
MLS, QUERY PATTERNS",
}
A Comparison of Approaches for Maximizing Business Payoff of Prediction Models, Brij Masand and Gregory Piatetsky-Shapiro
@InProceedings{masand.ea:comparison-approaches:96,
title = "A Comparison of Approaches for Maximizing Business
Payoff of Prediction Models",
pages = "195",
author = "Brij Masand and Gregory Piatetsky-Shapiro",
crossref = "simoudis.ea:proceedings-second:96",
}
Systems for knowledge discovery in databases, C. J. Matheus and P. K. Chan and G. Piatetsky-Shapiro
@Article{matheus.ea:systems:93,
crossref = "cercone.ea:ieee-transactions:93",
author = "C. J. Matheus and P. K. Chan and G.
Piatetsky-Shapiro",
address = "Gte Labs Inc, Tech Staff, 40 Sylvan Rd, Waltham, Ma,
02254 Gte Labs Inc, Knowledge Discovery Databases
Project, Waltham, Ma, 02254 Columbia Univ, Dept Comp
Sci, New York, Ny, 10027",
title = "Systems for knowledge discovery in databases",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1993",
month = dec,
volume = "5",
issue = "6",
pages = "903--913",
abstract = "The automated discovery of knowledge in databases is
becoming increasingly important as the world's wealth
of data continues to grow exponentially,
Knowledge-discovery systems face challenging problems
from real-world databases which tend to be dynamic,
incomplete, redundant, noisy, sparse, and very large.
This paper addresses these problems and describes some
techniques for handling them. A model of an idealized
knowledge-discovery system is presented as a reference
for studying and designing new systems. This model is
used in the comparison of three systems: CoverStory,
EXPLORA, and the Knowledge Discovery Workbench. The
deficiencies of existing systems relative to the model
reveal several open problems for future research.",
annote = "Discusses Coverstory, Explora and KDW",
keywords = "DATABASES, DISCOVERY, KDD SYSTEMS, MACHINE LEARNING",
}
Data mining and the con in econometrics - the us demand for money revisited, M. McAleer and M. R. Veall
@Article{mcaleer.ea:con-econometrics:95,
author = "M. McAleer and M. R. Veall",
address = "Univ Western Australia, Dept Econ, Nedlands, Wa 6009,
Australia Mcmaster Univ, Dept Econ, Hamilton, On,
Canada",
title = "Data mining and the con in econometrics - the us
demand for money revisited",
journal = "Mathematics And Computers In Simulation",
year = "1995",
volume = "39",
issue = "3-4",
pages = "329--333",
}
MDL-Based Decision Tree Pruning, Manish Mehta and Jorma Rissanen and Rakesh Agrawal
@InProceedings{mehta.ea:mdl-based-decision:95,
author = "Manish Mehta and Jorma Rissanen and Rakesh Agrawal",
booktitle = "Proceedings of the First International Conference on
Knowledge Discovery and Data Mining (KDD'95)",
title = "{MDL}-Based Decision Tree Pruning",
year = "1995",
abstract-url = "http://www.almaden.ibm.com/cs/people/ragrawal/abstracts.html#mra95",
URL = "http://www.almaden.ibm.com/cs/people/ragrawal/papers/kdd95_mdl.ps",
keywords = "Data Mining, Classification, Decision-Trees, MDL",
month = aug,
pages = "216--221",
abstract = "This paper explores the application of the Minimum
Description Length principle for pruning decision
trees. We present a new algorithm that intuitively
captures the primary goal of reducing the
misclassification error. An experimental comparison is
presented with three other pruning algorithms. The
results show that the MDL pruning algorithm achieves
good accuracy, small trees, and fast execution times.",
}
SLIQ: A Fast Scalable Classifier for Data Mining, M. Mehta and R. Agrawal and J. Rissanen
@Article{mehta.ea:sliq-fast:96,
author = "M. Mehta and R. Agrawal and J. Rissanen",
title = "{SLIQ}: {A} Fast Scalable Classifier for Data Mining",
journal = "Lecture Notes in Computer Science",
volume = "1057",
pages = "18--??",
year = "1996",
ISSN = "0302-9743",
}
Mining geophysical-data for knowledge, E. Mesrobian and R. Muntz and E. Shek and S. Nittel and M. Larouche and M. Kriguer and C. Mechoso and J. Farrara and P. Stolorz and H. Nakamura
@Article{mesrobian.ea:geophysical-data:96,
author = "E. Mesrobian and R. Muntz and E. Shek and S. Nittel
and M. Larouche and M. Kriguer and C. Mechoso and J.
Farrara and P. Stolorz and H. Nakamura",
address = "Univ Calif Los Angeles, Dept Comp Sci, Los Angeles,
Ca, 90024 Univ Calif Los Angeles, Dept Atmospher Sci,
Los Angeles, Ca, 90024 Univ Tokyo, Dept Earth \&
Planetary Phys, Tokyo, Japan Univ Calif Los Angeles,
Data Min Lab, Los Angeles, Ca, 90024",
title = "Mining geophysical-data for knowledge",
journal = "Ieee Expert-Intelligent Systems \& Their
Applications",
year = "1996",
volume = "11",
issue = "5",
pages = "34--44",
}
The AQ15 inductive learning system: an overview and experiments, Ryszard S. Michalski and Igor Mozetic and Jiarong Hong and Nada Lavrac
@TechReport{michalski.ea:aq15-inductive:86,
author = "Ryszard S. Michalski and Igor Mozetic and Jiarong Hong
and Nada Lavrac",
title = "The {AQ15} inductive learning system: an overview and
experiments",
institution = "University of Illinois",
number = "UIUCDCS-R-86-1260",
month = jul,
year = "1986",
}
Clustering, R. S. Michalski and R. E. Stepp
@Article{michalski.ea:clustering:92,
crossref = "shapiro:encyclopedia-artificial:92",
key = "Encyclopedia-of-ai:clustering",
author = "R. S. Michalski and R. E. Stepp",
title = "Clustering",
pages = "168--176",
}
Mining for knowledge in Databases: The INLEN Architecture, Initial Implementation and First Results., R. S. Michalski and L. Kerschberg and K. A. Kaufman
@Article{michalski.ea:inlen-architecture:92,
author = "R. S. Michalski and L. Kerschberg and K. A. Kaufman",
title = "Mining for knowledge in Databases: The {INLEN}
Architecture, Initial Implementation and First
Results.",
journal = "Journal of Intelligent Information Systems",
year = "1992",
pages = "85--113",
}
Learning from observation: conceptual clustering, Ryszard S. Michalski and Robert E. Stepp
@InCollection{michalski.ea:learning-observation:83,
author = "Ryszard S. Michalski and Robert E. Stepp",
title = "Learning from observation: conceptual clustering",
booktitle = "Machine Learning, an {Artificial Intelligence}
approach",
pages = "331--363",
crossref = "michalski.ea:machine-learning:83",
}
Machine Learning, an Artificial Intelligence approach, Ryszard S. Michalski and Jaime G. Carbonell and Tom M. Mitchell (Eds)
@Book{michalski.ea:machine-learning:83,
editor = "Ryszard S. Michalski and Jaime G. Carbonell and Tom M.
Mitchell",
title = "Machine Learning, an {Artificial Intelligence}
approach",
publisher = "Morgan Kaufmann",
year = "1983",
volume = "1",
address = "San Mateo, California",
}
Machine Learning, an Artificial Intelligence approach, Ryszard S. Michalski and Jaime G. Carbonell and Tom M. Mitchell (Eds)
@Book{michalski.ea:machine-learning:86,
editor = "Ryszard S. Michalski and Jaime G. Carbonell and Tom M.
Mitchell",
title = "Machine Learning, an {Artificial Intelligence}
approach",
publisher = "Morgan Kaufmann",
year = "1986",
volume = "2",
address = "San Mateo, California",
}
The multi-purpose incremental learning system AQ15 and its testing application to three medical domains, Ryszard S. Michalski and Igor Mozetic and Jiarong Hong and Nada Lavrac
@InProceedings{michalski.ea:multi-purpose-incremental:86,
author = "Ryszard S. Michalski and Igor Mozetic and Jiarong Hong
and Nada Lavrac",
title = "The multi-purpose incremental learning system {AQ15}
and its testing application to three medical domains",
booktitle = "Proceedings of the 5th national conference on
Artificial Intelligence",
pages = "1041--1045",
address = "Philadelphia",
year = "1986",
}
A theory and methodology of inductive learning, Ryszard S. Michalski
@InCollection{michalski:theory-methodology:83,
author = "Ryszard S. Michalski",
title = "A theory and methodology of inductive learning",
pages = "83--134",
crossref = "michalski.ea:machine-learning:83",
}
Fast Robust Visual Data Mining, Ted Mihalisin and John Timlin
@InProceedings{mihalisin.ea:fast-robust:97,
title = "Fast Robust Visual Data Mining",
author = "Ted Mihalisin and John Timlin",
pages = "231",
crossref = "heckerman.ea:proceedings-third:97",
}
An empirical comparison of selection measures for decision tree induction, J. Mingers
@Article{mingers:empirical-comparison:89,
author = "J. Mingers",
title = "An empirical comparison of selection measures for
decision tree induction",
publisher = "Kluwer Academic",
address = "Boston",
journal = "Machine Learning,3",
year = "1989",
volume = "3 (?)",
pages = "319--342",
}
A framework for representating knowledge, Marvin Minsky
@InCollection{minsky:framework-representating:75,
author = "Marvin Minsky",
title = "A framework for representating knowledge",
booktitle = "The Psychology of Computer Vision",
editor = "Patrick Henry Winston",
publisher = "McGraw-Hill",
address = "New York",
pages = "211--277",
year = "1975",
}
Learning by experimentation: acquiring and refining problem-solving heuristics, Tom M. Mitchell and Paul E. Utgoff and Ranan Banerji
@InCollection{mitchell.ea:learning-by:83,
author = "Tom M. Mitchell and Paul E. Utgoff and Ranan Banerji",
title = "Learning by experimentation: acquiring and refining
problem-solving heuristics",
crossref = "michalski.ea:machine-learning:83",
pages = "163--190",
}
Learning-theory toward genome informatics, S. Miyano
@Article{miyano:learning-theory-toward:95,
author = "S. Miyano",
address = "Kyushu Univ, Fundamental Informat Sci Res Inst,
Fukuoka 812, Japan",
title = "Learning-theory toward genome informatics",
journal = "Ieice Trans. On Information And Systems",
year = "1995",
volume = "E78D",
issue = "5",
pages = "560--567",
abstract = "This paper discusses some problems in Molecular
Biology for which learning paradigms are strongly
desired. We also present a framework of knowledge
discovery by PAC-learning paradigm together with its
theory and practice developed in our work for discovery
from amino acid sequences.",
keywords = "PAC-LEARNING, COMPUTATIONAL COMPLEXITY, KNOWLEDGE
ACQUISITION, GENOME INFORMAICS",
}
A rough set framework for data mining of propositional default rules, T. Mollestad and A. Skowron
@Article{mollestad.ea:rough-set:96,
author = "T. Mollestad and A. Skowron",
title = "A rough set framework for data mining of propositional
default rules",
journal = "Lecture Notes in Computer Science",
volume = "1079",
pages = "448--??",
year = "1996",
ISSN = "0302-9743",
}
The Field Matching Problem: Algorithms and Applications, Alvaro E. Monge and Charles P. Elkan
@InProceedings{monge.ea:field-matching:96,
title = "The Field Matching Problem: Algorithms and
Applications",
pages = "267",
author = "Alvaro E. Monge and Charles P. Elkan",
crossref = "simoudis.ea:proceedings-second:96",
}
Encouraging experimental results on learning CNF, Raymond J. Mooney
@TechReport{mooney:encouraging-experimental:92,
author = "Raymond J. Mooney",
title = "Encouraging experimental results on learning {CNF}",
institution = "University of Texas",
month = oct,
year = "1992",
}
A Multistrategy Approach to Relational Knowledge Discovery in Databases, K. Morik and P. Brockhausen
@InProceedings{morik.ea:multistrategy-approach:96,
author = "K. Morik and P. Brockhausen",
booktitle = "Proceedings of the 3nd International Workshop on
Multistrategy Learning",
publisher = "AAAI Press",
title = "A Multistrategy Approach to Relational Knowledge
Discovery in Databases",
pages = "17--28",
year = "1996",
}
Applications of Machine Learning, Katharina Morik
@InProceedings{morik:applications-machine:92,
author = "Katharina Morik",
title = "Applications of Machine Learning",
booktitle = "Proc.\ 6th European Knowledge Acquisition Workshop",
year = "1992",
publisher = "Springer-Verlag, Berlin",
pages = "9--13",
annote = "Brief overview of ML applications, Future directions
of ML - Intergration of ML into databases (data
mining), Multi-strategy learning, inductive logic
programming.",
}
Data mining using probabilistic structure analysis, J. A. Morrell
@Article{morrell:using-probabilistic:97,
author = "J. A. Morrell",
address = "Monsanto Co, St Louis, Mo, 63198",
title = "Data mining using probabilistic structure analysis",
journal = "Abstracts Of Papers Of The American Chemical Soc.",
year = "1997",
volume = "213",
issue = "Pt1",
pages = "69--CINF",
}
Advances in databases: 14th British National Conference on Databases, BNCOD 14, Edinburgh, Scotland, United Kingdom, July 3--5, 1996: proceedings, R. (Ronald) Morrison and Jessie Kennedy (Eds)
@Proceedings{morrison.ea:advances-14th:96,
editor = "R. (Ronald) Morrison and Jessie Kennedy",
booktitle = "Advances in databases: 14th British National
Conference on Databases, {BNCOD} 14, Edinburgh,
Scotland, United Kingdom, July 3--5, 1996:
proceedings",
title = "Advances in databases: 14th British National
Conference on Databases, {BNCOD} 14, Edinburgh,
Scotland, United Kingdom, July 3--5, 1996:
proceedings",
volume = "1094",
publisher = "Springer-Verlag Inc.",
address = "New York, NY, USA",
pages = "xi + 229",
year = "1996",
ISBN = "3-540-61442-7 (paperback)",
ISSN = "0302-9743",
LCCN = "QA76.9.D3 B75 1996",
series = "Lecture Notes in Computer Science",
annote = "Schema integration meta-knowledge classification and
reuse / R. M. Duwairi, N. J. Fiddian, W. A. Gray --
View mechanism for schema evolution in object-oriented
DBMS / Zohra Bellahsene -- An active rule language for
ROCK and ROLL / Andrew Dinn \ldots{} [et al.] --
Integrity constraints in multiversion databases / Anne
Doucet \ldots{} [et al.] -- The development of a
semantic integrity constraint subsystem for a
distributed database / H. Ibrahim, W. A. Gray, N. J.
Fiddian -- Understanding the tension between transition
rules and confidentiality / Xavier C. Delannoy --
extending ER for dynamic behaviour and refinement /
Simon Wiseman, Bryony Pomeroy -- Speeding up knowledge
discovery in large relational databases by means of a
new discretization algorithm / Alex Alves Freitas,
Simon H. Lavington -- Integration of load measurement
parameters into the cost evaluation of database queries
/ Guntram Flach, Holger Meyer -- High performance OO
traversals in Monet / Peter A. Boncz, Fred Kwakkel,
Martin L. Kersten -- A modular compiler architecture
for a data manipulation language / Suzanne M. Embury,
Peter M. D. Gray -- Querying graph databases using a
functional language extended with second order
facilities / Robert Ayres, Peter J. H. King -- SQL+i:
adding temporal indeterminacy to the database language
SQL / Antony Griffiths, Babis Theodoulidis -- Pearls,
swines and sows' ears: interface research inside a
multinational bank / Matthew Chalmers --
Dissemination-based information systems: your data may
be where you least expect it / Stanley B. Zdonik --
Microsoft database technologies: an inside view / Nigel
Stanley - - Predicate maintained queries: an active
OODBMS for financial applications / Mark Butterfield,
Nicholas Caine, Stephen Ross- Talbot -- Universal data
management / A. Bailey.",
keywords = "Database management -- Congresses.",
}
Case-based reasoning - market, applications, and fit with other technologies, S. Mott
@Article{mott:case-based-reasoning:93,
author = "S. Mott",
address = "Cognit Syst Inc, 234 Church St, New Haven, Ct, 06903",
title = "Case-based reasoning - market, applications, and fit
with other technologies",
journal = "Expert Systems With Applications",
year = "1993",
volume = "6",
issue = "1",
pages = "97--104",
abstract = "Case-based reasoning (CBR), the hit of the American
Association of Artificial Intelligence annual
conference in 1991 and 1992 is now enjoying a surge of
interest in its first year of commercial availability.
Knowledge-based system designers, developers,
integrators, and tool vendors are now seriously
considering the role and utility of CBR in leveraging
the vast experience within organizations for more
effective decision making. The potential market for CBR
appears enormous, particularly in more complex problem-
solving domains, but the areas of most immediate
interest are in applications where efficient
information processing needs are urgent, such as
automated help desks. Early experiments pairing CBR
with rule-based systems will soon lead to hybrid
combinations with other ''close approximation''
technologies, such as neural networks, fuzzy logic
systems, genetic algorithms, and so forth. CBR appears
headed for a sustaining role not only as a useful
complement in knowledge-based information processing
technology but also as an engine for ''mainstream ''
information tasks of the future (e.g., intelligent text
processing and retrieval, data mining, and projective
reasoning). This article will discuss this emerging
role for CBR and its implications from a marketing
perspective.",
}
Fast Sequential and Parallel Algorithms for Association Rule Mining: A Comparison, Andreas Mueller
@TechReport{mueller:fast-sequential:95,
author = "Andreas Mueller",
title = "Fast Sequential and Parallel Algorithms for
Association Rule Mining: {A} Comparison",
institution = "Dept. of Computer Science, Univ. of Maryland",
number = "CS-TR-3515",
address = "College Park, MD",
month = aug,
year = "1995",
URL = "ftp://ftp.cs.umd.edu/pub/papers/papers/3515/3515.ps.Z",
abstract = "The field of knowledge discovery in databases, or
_Data Mining_, has received increasing attention during
recent years as large organizations have begun to
realize the potential value of the information that is
stored implicitly in their databases. One specific data
mining task is the mining of Association Rules,
particularly from retail data. The task is to determine
patterns (or rules) that characterize the shopping
behavior of customers from a large database of previous
consumer transactions. The rules can then be used to
focus marketing efforts such as product placement and
sales promotions.\par Because early algorithms required
an unpredictably large number of IO operations,
reducing IO cost has been the primary target of the
algorithms presented in the literature. One of the most
recent proposed algorithms, called PARTITION, uses a
new TID-list data representation and a new partitioning
technique. The partitioning technique reduces IO cost
to a constant amount by processing one database portion
at a time in memory. We implemented an algorithm called
SPTID that incorporates both TID-lists and partitioning
to study their benefits. For comparison, a
non-partitioning algorithm called SEAR, which is based
on a new prefix-tree data structure, is used. Our
experiments with SPTID and SEAR indicate that TID-lists
have inherent inefficiencies; furthermore, because all
of the algorithms tested tend to be CPU-boundn trading
CPU-overhead against I/O operations by partitioning did
not lead to better performance.\par In order to scale
mining algorithms to the huge databases (e.g., multiple
Terabytes) that large organizations will manage in the
near future, we implemented parallel versions of SEAR
and SPEAR (its partitioned counterpart). The
performance results show that, while both algorithms
parallelize easily and obtain good speedup and scale-up
results, the parallel SEAR version performs better than
parallel SPEAR, despite the fact that it uses more
communication.",
}
Neural Networks, an introduction,
@Book{muller.ea:neural-networks:91,
author = "Berndt M{\"u}ller and Joachim Reinhardt",
title = "Neural Networks, an introduction",
publisher = "Springer-Verlag",
address = "Berlin",
series = "Physics of Neural Networks",
year = "1991",
}
Exploring the Decision Forest: An Empirical Investigation of Occam's Razor in Decision Tree Induction, Patrick M. Murphy and Michael J. Pazzani
@Article{murphy.ea:exploring-decision:,
URL = "gopher://P.GP.CS.CMU.EDU:70/00/volume1/murphy94a.ps",
title = "Exploring the Decision Forest: An Empirical
Investigation of Occam's Razor in Decision Tree
Induction",
author = "Patrick M. Murphy and Michael J. Pazzani",
abstract = "We report on a series of experiments in which all
decision trees consistent with the training data are
constructed. These experiments were run to gain an
understanding of the properties of the set of
consistent decision trees and the factors that affect
the accuracy of individual trees. In particular, we
investigated the relationship between the size of a
decision tree consistent with some training data and
the accuracy of the tree on test data. The experiments
were performed on a massively parallel Maspar computer.
The results of the experiments on several artificial
and two real world problems indicate that, for many of
the problems investigated, smaller consistent decision
trees are on average less accurate than the average
accuracy of slightly larger trees.",
}
OC1: Randomized Induction of Oblique Decision Trees, S. K. Murthy and S. Kasif and S. Salzberg and R. Beigel
@InProceedings{murthy.ea:oc1-randomized:93,
author = "S. K. Murthy and S. Kasif and S. Salzberg and R.
Beigel",
title = "{OC1}: Randomized Induction of Oblique Decision
Trees",
booktitle = "Proceedings of the Eleventh National Conference on
Artificial Intelligence",
pages = "322--327",
address = "Washington, D.C.",
year = "1993",
}
On Growing Better Decision Trees from Data, Sreerama K. Murthy
@PhdThesis{murthy:on-growing:,
title = "On Growing Better Decision Trees from Data",
author = "Sreerama K. Murthy",
abstract = "This thesis investigates the problem of growing
decision trees from data, for the purposes of
classification and prediction.
Belief Network Induction, Ron Musick
@PhdThesis{musick:belief-network:,
URL = "http://http.cs.berkeley.edu/~musick",
title = "Belief Network Induction",
author = "Ron Musick",
school = "University of California, Berkeley",
abstract = "This dissertation describes BNI (Belief Network
Inductor), a tool that automatically induces a belief
network from a database. The fundamental thrust of this
research program has been to provide a theoretically
sound method of inducing a model from data, and
performing inference over that model. Along with a
solid grounding in probability theory, BNI has proven
to be a quick, practical method of inducing data models
that are highly accurate. The results include a belief
network that stores beta distributions in the
conditional probability tables, coupled with theorems
demonstrating how to maintain these distributions
through inference; techniques for applying neural
network and other learning techniques to the task of
conditional probability table learning; and a decision
theoretic sampling theory which addresses scalability
issues by characterizing the size of the sample needed
to produce high quality inferences. The setting for
this work is in database mining.",
}
Belief Network Induction, Ron Musick
@TechReport{musick:belief-network:95,
author = "Ron Musick",
title = "Belief Network Induction",
institution = "EECS Computer Science Division, University of
California, Berkeley",
type = "Technical Report",
number = "UCB//CSD-95-863",
pages = "104",
month = dec,
year = "1995",
URL = "ftp://tr-ftp.cs.berkeley.edu/pub/tech-reports/csd/csd-95-863/all.ps",
abstract = "This dissertation describes BNI (Belief Network
Inductor), a tool that automatically induces a belief
network from a database. The fundamental thrust of this
research program has been to provide a theoretically
sound method of inducing a model from data, and
performing inference over that model. Along with a
solid grounding in probability theory, BNI has proven
to be a quick, practical method of inducing data models
that are highly accurate. The results include a belief
network that stores beta distributions in the
conditional probability tables, coupled with theorems
demonstrating how to maintain these distributions
through inference; techniques for applying neural
network and other learning techniques to the task of
conditional probability table learning; and a decision
theoretic sampling theory which addresses scalability
issues by characterizing the size of the sample needed
to produce high quality inferences. The setting for
this work is in database mining. Database mining is one
of the fastest growing topics in Artificial
Intelligence today, with industry providing at least as
much impetus as research labs and universities. The
general goal is to extract interesting quantities or
relationships that are ``hidden'' in large corporate or
scientific databases, with the potential benefits of a
successful technology being enormous. For example,
models can be built that characterize what types of
customers will respond to what types of marketing
schemes, retailers will be able to predict sales to
help determine correct inventory levels and
distribution schedules, and insurance companies will be
able to predict expected claim costs and better
classify who will buy what type of coverage.",
}
Rethinking the Learning of Belief Network Probabilities, Ron Musick
@InProceedings{musick:rethinking-learning:96,
title = "Rethinking the Learning of Belief Network
Probabilities",
pages = "120",
author = "Ron Musick",
crossref = "simoudis.ea:proceedings-second:96",
}
Development of Multi-Criteria Metrics for Evaluation of Data Mining Algorithms, Gholamreza Nakhaeizadeh and Alexander Schnabl
@InProceedings{nakhaeizadeh.ea:development-multi-criteria:97,
title = "Development of Multi-Criteria Metrics for Evaluation
of Data Mining Algorithms",
author = "Gholamreza Nakhaeizadeh and Alexander Schnabl",
pages = "37",
crossref = "heckerman.ea:proceedings-third:97",
}
A logical language for data and knowledge bases, Shamim Naqvi and Shalom Tsur
@Book{naqvi.ea:logical-language:89,
author = "Shamim Naqvi and Shalom Tsur",
title = "A logical language for data and knowledge bases",
publisher = "Computer Science Press",
year = "1989",
series = "Principles of computer science",
address = "Rockville",
}
Revisable knowledge discovery in databases, A. Narayanan
@Article{narayanan:revisable:96,
author = "A. Narayanan",
address = "Univ Exeter, Dept Comp Sci, Exeter Ex4 4Pt, Devon,
England",
title = "Revisable knowledge discovery in databases",
journal = "International J. Of Intelligent Systems",
year = "1996",
volume = "11",
issue = "2",
pages = "75--96",
abstract = "This article introduces the idea of using nonmonotonic
inheritance networks for the storage and maintenance of
knowledge discovered in data (revisable knowledge
discovery in databases). While existing data mining
strategies for knowledge discovery in databases
typically involve initial structuring through the use
of identification trees and the subsequent extraction
of rules from these trees for use in rule-based expert
systems, such strategies have difficulty in coping with
additional information which may conflict with that
already used for the automatic generation of rules. In
the worst case, the entire automatic sequence may have
to be repeated. If nonmonotonic inheritance networks
are used instead of rules for storing knowledge
discovered in databases, additional conflicting
information can be inserted directly into such
structures, thereby bypassing the need for
recompilation. (C) 1996 John Wiley \& Sons, Inc.",
keywords = "LOGIC, CIRCUMSCRIPTION, INHERITANCE",
}
High-Performance Computing Approaches for Using the WWW to Access a Large-Scale Environmental Dataset Repository, Bahram Nassersharif and Richard Marciano and Sui-ky Ling and Eugene Ho and Curt Edmonds
@InProceedings{nassersharif.ea:high-performance-computing:95,
author = "Bahram Nassersharif and Richard Marciano and Sui-ky
Ling and Eugene Ho and Curt Edmonds",
title = "High-Performance Computing Approaches for Using the
{WWW} to Access a Large-Scale Environmental Dataset
Repository",
booktitle = "Proceedings of Supercomputing'95",
publisher = "ACM/IEEE",
address = "San Diego, CA",
month = dec,
year = "1995",
keywords = "data mining, mass storage, supercomputing, World Wide
Web, Oracle, data repositories, access tools, remote
sensing, environmental data, global warming, NALC,
ecology,",
abstract = "Simple html document on CD with MPEG.",
}
Efficient and Effective Clustering Methods for Spatial Data Mining, Raymond T. Ng and Jiawei Han
@InProceedings{ng.ea:efficient-and-effective-clustering-methods-for-spatial-data-mining:94,
author = "Raymond T. Ng and Jiawei Han",
title = "{Efficient and Effective Clustering Methods for
Spatial Data Mining}",
booktitle = "Proceedings of the Twentieth International Conference
on Very Large Databases",
year = "1994",
address = "Santiago, Chile",
pages = "144--155",
abstract = "Spatial data mining is the discovery of interesting
relationships and characteristics that may exist
implicitly in spatial databases. In this paper, we
explore whether clustering methods have a role to play
in spatial data mining. To this end, we develop a new
clustering method called CLARANS which is based on
randomized search. We also develop two spatial data
mining algorithms that use CLARANS. Our analysis and
experiments show that with the assistance of CLARANS,
these two algorithms are very effective and can lead to
discoveries that are difficult to find with current
spatial data mining algorithms. Furthermore,
experiments conducted to compare the performance of
CLARANS with that of existing clustering methods show
that CLARANS is the most efficient.",
}
Efficient and Effective Clustering Methods for Spatial Data Mining, R. T. Ng and J. Han
@InProceedings{ng.ea:efficient-effective:94,
author = "R. T. Ng and J. Han",
title = "Efficient and Effective Clustering Methods for Spatial
Data Mining",
editor = "Jorgeesh Bocca and Matthias Jarke and Carlo Zaniolo",
booktitle = "20th International Conference on Very Large Data
Bases, September 12--15, 1994, Santiago, Chile
proceedings",
publisher = "Morgan Kaufmann Publishers",
address = "Los Altos, CA 94022, USA",
pages = "144--155",
year = "1994",
annote = "Also known as VLDB'94",
keywords = "very large data bases; VLDB",
}
Principles of Artificial Intelligence, Nils J. Nilsson
@Book{nilsson:principles-artificial-intelligence:82,
author = "Nils J. Nilsson",
title = "Principles of {Artificial Intelligence}",
publisher = "Springer-Verlag",
year = "1982",
series = "Symbolic Computation",
}
Some privacy issues in knowledge discovery - the oecd personal privacy guidelines, D. E. Oleary
@Article{oleary:some-privacy:95,
author = "D. E. Oleary",
address = "Univ So Calif, 3660 Trousdale Pkwy, Los Angeles, Ca,
90089",
title = "Some privacy issues in knowledge discovery - the oecd
personal privacy guidelines",
journal = "Ieee Expert-Intelligent Systems \& Their
Applications",
year = "1995",
volume = "10",
issue = "2",
pages = "48--52",
keywords = "ethics, privacy, ethical issues",
}
Using domain knowledge to guide database knowledge discovery, M. M. Owrang and F. H. Grupe
@Article{owrang.ea:using-domain:96,
author = "M. M. Owrang and F. H. Grupe",
address = "American Univ, Dept Comp Sci, 400 Massachussets Ave
Nw, Washington, Dc, 20016 Univ Nevada, Dept Accounting
\& Comp Informat Syst, Reno, Nv, 89557",
title = "Using domain knowledge to guide database knowledge
discovery",
journal = "Expert Systems With Applications",
year = "1996",
volume = "10",
issue = "2",
pages = "173--180",
abstract = "Modern database technologies process large volumes of
data to discover new knowledge. Some large databases
make discovery computationally expensive. Additional
knowledge, known as domain or background knowledge,
hidden in the database can often guide and restrict the
search for interesting knowledge. This paper discusses
mechanisms by which domain knowledge can be used
effectively in discovering knowledge from databases. In
particular we look at the use of domain knowledge to
reduce the search as well as to optimize the hypotheses
which represent the interesting knowledge to be
discovered. Also, we discuss how to use domain
knowledge to test the validity of the discovered
knowledge. Although domain knowledge can be used to
improve database searches, it should not block the
discovery of unexpected knowledge. We provide some
guidelines to use domain knowledge properly.",
}
Pattern Discovery in Temporal Databases: A Temporal Logic Approach, Balaji Padmanabhan and Alexander Tuzhilin
@InProceedings{padmanabhan.ea:pattern-temporal:96,
title = "Pattern Discovery in Temporal Databases: {A} Temporal
Logic Approach",
pages = "351",
author = "Balaji Padmanabhan and Alexander Tuzhilin",
crossref = "simoudis.ea:proceedings-second:96",
}
An effective hash-based algorithm for mining association rules, Jong Soo Park and Ming-Syan Chen and P. S. Yu
@Article{park.ea:effective-hash-based:95,
author = "Jong Soo Park and Ming-Syan Chen and P. S. Yu",
title = "An effective hash-based algorithm for mining
association rules",
journal = "SIGMOD Record (ACM Special Interest Group on
Management of Data)",
volume = "24",
number = "2",
pages = "175--186",
month = jun,
year = "1995",
ISSN = "0163-5808",
affiliation = "IBM Thomas J. Watson Res. Center, Yorktown Heights,
NY, USA",
classification = "C7170 (Marketing computing); C7180 (Retailing and
distribution computing); C6170K (Knowledge engineering
techniques); C6160 (Database management systems
(DBMS))",
keywords = "Effective hash-based algorithm; Association rules
mining; Sales transactions; Candidate set generation;
Performance bottleneck",
thesaurus = "Database management systems; Knowledge acquisition;
Pattern matching; Retail data processing; Sales
management",
xxcrossref = "Anonymous:1995:ASI",
}
An Effective Hash Based Algorithm for Mining Association Rules, Jong Soo Park and Ming-Syan Chen and Philip S. Yu
@InProceedings{park.ea:effective-hash:95,
title = "An Effective Hash Based Algorithm for Mining
Association Rules",
author = "Jong Soo Park and Ming-Syan Chen and Philip S. Yu",
editor = "Michael J. Carey and Donovan A. Schneider",
booktitle = "Proceedings of the 1995 {ACM} {SIGMOD} International
Conference on Management of Data",
address = "San Jose, California",
month = "22--25~" # may,
year = "1995",
pages = "175--186",
}
Intelligent Database Tools \& Applications, K. Parsaye and M. Chignell
@Book{parsaye.ea:intelligent-database:93,
author = "K. Parsaye and M. Chignell",
title = "Intelligent Database Tools \& Applications",
publisher = "John Wiley",
year = "1993",
}
OLAP and Data Mining: Bridging the Gap, Kamran Parsaye
@Article{parsaye:olap-bridging:97,
author = "Kamran Parsaye",
title = "{OLAP} and Data Mining: Bridging the Gap",
journal = "Database Programming and Design",
year = "1997",
issue = "February",
URL = "http://www.dbpd.com/parsfeb.htm",
annote = "OLAP and data mining--while very different--are both
integral to the decision-support process. By carefully
linking them, you can make sure one activity reinforces
the other.",
}
Knowledge Discovery from Epidemiological Databases, G. Pavilion
@Article{pavilion:epidemiological:96,
author = "G. Pavilion",
title = "Knowledge Discovery from Epidemiological Databases",
journal = "Lecture Notes in Computer Science",
volume = "1057",
pages = "201--??",
year = "1996",
ISSN = "0302-9743",
}
Beyond Concise and Colorful: Learning Intelligible Rules, Michael J. Pazzani and Subramani Mani and W. Rodman Shankle
@InProceedings{pazzani.ea:beyond-concise:97,
title = "Beyond Concise and Colorful: Learning Intelligible
Rules",
author = "Michael J. Pazzani and Subramani Mani and W. Rodman
Shankle",
pages = "235",
crossref = "heckerman.ea:proceedings-third:97",
}
Conditional fuzzy c-means, W. Pedrycz
@Article{pedrycz:conditional-fuzzy:96,
author = "W. Pedrycz",
address = "Univ Manitoba, Dept Elect \& Comp Engn, Winnipeg, Mb
R3T 5V6, Canada",
title = "Conditional fuzzy c-means",
journal = "Pattern Recognition Letters",
year = "1996",
volume = "17",
issue = "6",
pages = "625--631",
abstract = "A Fuzzy C-Means-based clustering method guided by an
auxiliary (conditional) variable is introduced. The
method reveals a structure within a family of patterns
by considering their vicinity in a feature space along
with the similarity of the values assumed by a certain
conditional variable. The usefulness of the algorithm
is exemplified in the problems of data mining.",
keywords = "FUZZY CLUSTERING, FUZZY C-MEANS, CONDITIONAL VARIABLE,
DATA MINING, RADIAL BASIS FUNCTIONS",
}
Leveraging the information asset, J. Perna
@Article{perna:leveraging-information:95,
author = "J. Perna",
title = "Leveraging the information asset",
journal = "SIGMOD Record (ACM Special Interest Group on
Management of Data)",
volume = "24",
number = "2",
pages = "451--452",
month = jun,
year = "1995",
ISSN = "0163-5808",
affiliation = "IBM Canada Ltd., Toronto, Ont., Canada",
classification = "C7100 (Business and administration); C6160 (Database
management systems (DBMS)); C7250L (Non-bibliographic
retrieval systems); C7250R (Information retrieval
techniques)",
keywords = "Information asset; Corporate asset; Database users;
Competitive advantage; Unwanted store inventory;
Capital outlay; Store sales data; Real time access;
Production point of sale information; Database mining;
Multivendor database connectivity; Heterogeneous
clients; Customer needs; Marketplace",
thesaurus = "Business data processing; Database management systems;
Information retrieval; Real-time systems",
xxcrossref = "Anonymous:1995:ASI",
}
Parallel Halo Finding in N-body Cosmology Simulations, David W. Pfitzner and John K. Salmon
@InProceedings{pfitzner.ea:parallel-halo:96,
title = "Parallel Halo Finding in {N}-body Cosmology
Simulations",
pages = "26",
author = "David W. Pfitzner and John K. Salmon",
crossref = "simoudis.ea:proceedings-second:96",
}
KDD-93: Progress and Challenges in Knowlege Discovery in Databases, G. Piatetsky-Shapiro and C. Matheus and P. Smyth and R. Uthurusamy
@Unpublished{piatetsky-shapiro.ea:kdd-93-progress:,
title = "{KDD}-93: Progress and Challenges in Knowlege
Discovery in Databases",
author = "G. Piatetsky-Shapiro and C. Matheus and P. Smyth and
R. Uthurusamy",
URL = "http://info.gte.com/~kdd/kdd-93-report.tex",
keywords = "Knowledge, Discovery, Databases",
annote = "(a long report on AAAI-93 KDD Workshop), to be
published in AI Magazine.",
}
Kdd-93 - progress and challenges in knowledge discovery in databases, G. Piatetsky-Shapiro and C. Matheus and P. Smyth and R. Uthurusamy
@Article{piatetsky-shapiro.ea:kdd-93-progress:94,
author = "G. Piatetsky-Shapiro and C. Matheus and P. Smyth and
R. Uthurusamy",
address = "Gte Labs Inc, Knowledge Discovery Databases Project,
Waltham, Ma, 02254 Jet Prop Lab, Tech Grp, Pasadena,
Ca, 91109 Gm Corp, Res Labs, Detroit, Mi, 48202",
title = "Kdd-93 - progress and challenges in knowledge
discovery in databases",
journal = "Ai Magazine",
year = "1994",
volume = "15",
issue = "3",
pages = "77--82",
abstract = "Over 60 researchers from 10 countries took part in the
Third Knowledge Discovery in Databases (KDD) Workshop,
held during the Eleventh National Conference on
Artificial Intelligence in Washington, D.C. A major
trend evident at the workshop was the transition to
applications in the core KDD area of discovery of
relatively simple patterns in relational databases; the
most successful applications are appearing in the areas
of greatest need, where the databases are so large that
manual analysis is impossible. Progress has been
facilitated by the availability of commercial KDD tools
for both generic discovery and domain-specific
applications such as marketing. At the same time,
progress has been slowed by problems such as lack of
statistical rigor, overabundance of patterns, and poor
integration. Besides applications, the main themes of
this workshop were (1) the discovery of dependencies
and models and (2) integrated and interactive KDD
systems.",
}
Knowledge Discovery in Databases, Gregory Piatetsky-Shapiro and William Frawley (Eds)
@Book{piatetsky-shapiro.ea:knowledge-discovery:91,
editor = "Gregory Piatetsky-Shapiro and William Frawley",
title = "Knowledge Discovery in Databases",
publisher = "The MIT Press",
address = "Cambridge, MA",
pages = "xii + 525",
year = "1991",
ISBN = "0-262-66070-9 (paper)",
LCCN = "Q325.5 .K68 1991",
}
An Overview of Issues in Developing Industrial Data Mining and Knowledge Discovery Applications, Gregory Piatetsky-Shapiro and Ron Brachman and Tom Khabaza and Willi Kloesgen and Evangelos Simoudis
@InProceedings{piatetsky-shapiro.ea:overview-issues:96,
title = "An Overview of Issues in Developing Industrial Data
Mining and Knowledge Discovery Applications",
pages = "89",
author = "Gregory Piatetsky-Shapiro and Ron Brachman and Tom
Khabaza and Willi Kloesgen and Evangelos Simoudis",
crossref = "simoudis.ea:proceedings-second:96",
}
Knowledge discovery workbench for exploring business databases, G. Piatetsky-Shapiro and C. J. Matheus
@Article{piatetsky-shapiro.ea:workbench-exploring:92,
author = "G. Piatetsky-Shapiro and C. J. Matheus",
address = "Gte Labs Inc, Waltham, Ma, 02254",
title = "Knowledge discovery workbench for exploring business
databases",
journal = "International J. Of Intelligent Systems",
year = "1992",
volume = "7",
issue = "7",
pages = "675--686",
abstract = "We describe the Knowledge Discovery Workbench, an
interactive system for database exploration. We then
illustrate KDW capabilities in data clustering,
summarization, classification, and discovery of
changes. We also examine extracting dependencies from
data and using them to order the multitude of data
patterns.",
}
Discovery, Analysis and Presentation of Strong Rules, Gregory Piatetsky-Shapiro
@InCollection{piatetsky-shapiro:analysis-presentation:91,
editor = "Gregory Piatetsky-Shapiro and William J. Frawley",
booktitle = "Knowledge Discovery in Databases",
publisher = "AAAI Press / The MIT Press",
address = "Menlo Park, California",
edition = "1st",
year = "1991",
author = "Gregory Piatetsky-Shapiro",
title = "Discovery, Analysis and Presentation of Strong Rules",
pages = "229--248",
}
Discovery and Analysis of Strong Rules in Databases, G Piatetsky-Shapiro
@InProceedings{piatetsky-shapiro:analysis-strong:89,
author = "G Piatetsky-Shapiro",
title = "Discovery and Analysis of Strong Rules in Databases",
booktitle = "Advanced Database System Symposium, Kyoto",
year = "1989",
month = dec,
}
Data mining and knowledge discovery in business databases, G. Piatetsky-Shapiro
@Article{piatetsky-shapiro:business:96,
author = "G. Piatetsky-Shapiro",
title = "Data mining and knowledge discovery in business
databases",
journal = "Lecture Notes in Computer Science",
volume = "1079",
pages = "56--??",
year = "1996",
ISSN = "0302-9743",
}
Introduction, Gregory Piatetsky-Shapiro
@Article{piatetsky-shapiro:introduction:92,
crossref = "ijis-special-issue:92",
year = "1992",
author = "Gregory Piatetsky-Shapiro",
title = "Introduction",
pages = "587--589",
annote = "Definition of KDD. Scientific Discovery, Commercial
Discovery, overview of papers.",
}
KDD-93: Proceedings of AAAI-93 Knowledge Discovery in Databases workshop, G. Piatetsky-Shapiro (Ed)
@TechReport{piatetsky-shapiro:kdd-93-proceedings:93,
key = "piatetsky-shapiro:kdd-93-proceedings:93",
editor = "G. Piatetsky-Shapiro",
title = "{KDD}-93: Proceedings of {AAAI}-93 Knowledge Discovery
in Databases workshop",
institution = "AAAI",
number = "WS-02",
month = jul,
year = "1993",
note = "AAAI Press technical report",
}
KDD Frequently Asked Questions, Gregory Piatetsky-Shapiro (Ed)
@Misc{piatetsky-shapiro:kdd-frequently:94,
key = "piatetsky-shapiro:kdd-frequently:94",
title = "{KDD} Frequently Asked Questions",
editor = "Gregory Piatetsky-Shapiro",
howpublished = "Published via WWW http://info.gte.com/~kdd/FAQ.txt",
month = "18th " # apr,
year = "1994",
}
KDD Nugget 94-7, Gregory Piatetsky-Shapiro (Ed)
@Misc{piatetsky-shapiro:kdd-nugget:94-7,
title = "{KDD} Nugget 94-7",
key = "piatetsky-shapiro:kdd-nugget:94-7",
editor = "Gregory Piatetsky-Shapiro",
howpublished = "Mailing list",
month = "18th " # apr,
year = "1994",
annote = "Contents: * G. Piatetsky-Shapiro, Time: Attack of the
Data Miners Business Week: Gold Mine of Data in
Customer Service ComputerWorld: Data is money, but
people are special US Census Bureau is now on WWW at
http://www.census.gov/ * Tej Anand, AT&T Data Mining
Conference * Larry Ai, TRW Smart Charts for
Pharmaceuticals * Edwin Pednault, MDL workshop at
ML/COLT 94 * Roberto Zicari, CFP: Theory and Practice
of Object Systems",
}
Knowledge discovery in databases, G. Piatetsky-Shapiro
@Article{piatetsky-shapiro:knowledge-discovery:91b,
key_modifier = "b",
author = "G. Piatetsky-Shapiro",
address = "Gte Labs Inc, Waltham, Ma, 02254",
title = "Knowledge discovery in databases",
journal = "Ieee Expert-Intelligent Systems \& Their
Applications",
year = "1991",
month = oct,
volume = "6",
issue = "5",
pages = "74--76",
note = "Discussion of second AAAI workshop on KDD",
}
Knowledge discovery in personal data vs privacy - a minisymposium, G. Piatetsky-Shapiro
@Article{piatetsky-shapiro:personal-vs:95,
author = "G. Piatetsky-Shapiro",
address = "Gte Labs Inc, Waltham, Ma, 02254",
title = "Knowledge discovery in personal data vs privacy - a
minisymposium",
journal = "Ieee Expert-Intelligent Systems \& Their
Applications",
year = "1995",
volume = "10",
issue = "2",
pages = "46--47",
}
Knowledge discovery in databases - progress report, G. Piatetsky-Shapiro
@Article{piatetsky-shapiro:progress-report:94,
author = "G. Piatetsky-Shapiro",
address = "Gte Labs Inc, 40 Sylvan Rd, Waltham, Ma, 01254",
title = "Knowledge discovery in databases - progress report",
journal = "Knowledge Engineering Review",
year = "1994",
volume = "9",
issue = "1",
pages = "57--60",
}
Knowledge Discovery in Real Databases: A Report on the IJCAI-89 Workshop, Gregory Piatetsky-Shapiro
@Article{piatetsky-shapiro:real-report:91a,
key_modifier = "a",
author = "Gregory Piatetsky-Shapiro",
title = "Knowledge Discovery in Real Databases: {A} Report on
the {IJCAI}-89 Workshop",
journal = "AI Magazine",
pages = "68--70",
volume = "11",
number = "5",
month = jan,
year = "1991",
}
Special issue - knowledge discovery in data-bases and knowledge bases - introduction, G. Piatetsky-Shapiro
@Article{piatetsky-shapiro:special-issue:92,
author = "G. Piatetsky-Shapiro",
address = "Gte Labs Inc, 40 Sylvan Rd, Waltham, Ma, 02254",
title = "Special issue - knowledge discovery in data-bases and
knowledge bases - introduction",
journal = "International J. Of Intelligent Systems",
year = "1992",
volume = "7",
issue = "7",
pages = "587--589",
}
An application of ILP in a musical database: Learning to compose the two-voice counterpoint, U. Pompe and I. Kononenko and T. Mak\vse
@InProceedings{pompe.ea:application-ilp:96,
author = "U. Pompe and I. Kononenko and T. Mak\v{s}e",
title = "An application of {ILP} in a musical database:
{L}earning to compose the two-voice counterpoint",
booktitle = "Proceedings of the MLnet Familiarization Workshop on
Data Mining with Inductive Logic Programing",
pages = "1--11",
year = "1996",
}
Starlight, star bright - data-mining the cosmos, D. Price
@Article{price:starlight-star:95,
author = "D. Price",
address = "Univ Nevada, Reno, Nv, 89557 American Univ,
Washington, Dc, 20016",
title = "Starlight, star bright - data-mining the cosmos",
journal = "Ieee Expert-Intelligent Systems \& Their
Applications",
year = "1995",
volume = "10",
issue = "4",
pages = "10--13",
}
Comparative benchmarking studies of various algorithms, ELENA project
@Article{project:comparative-benchmarking:,
author = "ELENA project",
title = "Comparative benchmarking studies of various
algorithms",
annote = "Elena project is at:
http://www.dice.ucl.ac.be/neural-nets/ELENA/ELENA.html",
URL = "ftp://ftp.dice.ucl.ac.be/pub/neural-nets/ELENA/databases/Benchmarks.ps.Z",
}
Data Mining and Model Simplicity: A Case Study in Diagnosis, Gregory M. Provan and Moninder Singh
@InProceedings{provan.ea:model-simplicity:96,
title = "Data Mining and Model Simplicity: {A} Case Study in
Diagnosis",
pages = "57",
author = "Gregory M. Provan and Moninder Singh",
crossref = "simoudis.ea:proceedings-second:96",
}
Analysis and Visualization of Classifier Performance: Comparison under Imprecise Class and Cost Distributions, Foster Provost and Tom Fawcett
@InProceedings{provost.ea:analysis-classifier:97,
title = "Analysis and Visualization of Classifier Performance:
Comparison under Imprecise Class and Cost
Distributions",
author = "Foster Provost and Tom Fawcett",
pages = "43",
crossref = "heckerman.ea:proceedings-third:97",
abstract = "When mining data with inductive methods, we often
experiment with a wide variety of learning algorithms,
using different algorithm parameters, varying output
threshold values, and using different training
regimens. Such experimentation yields a large number of
classifiers to be evaluated and compared. In order to
compare the performance of classifiers it is necessary
to know the conditions under which they will be used;
using accuracy alone is inadequate because class
distributions and misclassification costs are rarely
uniform.
Inductive Policy, F. J. Provost and B. G. Buchanan
@InProceedings{provost.ea:inductive-policy:92,
author = "F. J. Provost and B. G. Buchanan",
title = "Inductive Policy",
year = "1992",
booktitle = "Proc.\ of AAAI-92",
pages = "255--262",
}
Scaling Up Inductive Algorithms: An Overview, Foster Provost and Venkateswarlu Kolluri
@InProceedings{provost.ea:scaling-up:97,
title = "Scaling Up Inductive Algorithms: An Overview",
author = "Foster Provost and Venkateswarlu Kolluri",
pages = "239",
crossref = "heckerman.ea:proceedings-third:97",
}
The Royal Tree Problem, a Benchmark for Single and Multiple Population Genetic Programming, William F. Punch and Douglas Zongker and Erik D. Goodman
@InCollection{punch.ea:royal-tree:96,
author = "William F. Punch and Douglas Zongker and Erik D.
Goodman",
title = "The Royal Tree Problem, a Benchmark for Single and
Multiple Population Genetic Programming",
booktitle = "Advances in Genetic Programming 2",
publisher = "MIT Press",
year = "1996",
editor = "Peter J. Angeline and K. E. {Kinnear, Jr.}",
pages = "299--316",
chapter = "15",
address = "Cambridge, MA, USA",
keywords = "genetic algorithms, genetic programming",
ISBN = "0-262-01158-1",
abstract = "We have previously shown how a genetic algorithm (GA)
can be used to perform _data mining_, the discovery of
particular/important data within large datasets, by
finding optimal data classifications using known
examples. However, these approaches, while successful,
limited data relationships to those that were _fixed_
before the GA run. We report here on an extension of
our previous work, substituting a genetic program (GP)
for a GA. The GP could optimize data classification, as
did the GA, but could also determine the functional
relationships among the features. This gave improved
performance and new information on important relation
ships among features. We discuss the overall approach,
and compare the effectiveness of the GA vs. GP on a
biochemistry problem, the determination of the
involvement of bound water molecules in protein
interactions.",
note = "Also available as GARAGe96-01-01",
size = "18 pages",
}
FOIL: A Midterm Report, J. Ross Quinlan and R. M. Cameron-Jones
@Article{quinlan.ea:foil-midterm:,
author = "J. Ross Quinlan and R. M. Cameron-Jones",
title = "{FOIL}: {A} Midterm Report",
abstract = "FOIL is a learning system that constructs Horn clause
programs from examples. This paper summarises the
development of FOIL from 1989 up to early 1993 and
evaluates its effectiveness on a non-trivial sequence
of learning tasks taken from a Prolog programming text.
Although many of these are handled reasonably well, the
experiment highlights some weaknesses of the current
implementation. Areas for further research are
identified.",
}
C4.5: Programs for Machine Learning, J. Ross Quinlan
@Book{quinlan:c4-5:92,
author = "J. Ross Quinlan",
title = "{C4}.5: Programs for Machine Learning",
publisher = "Morgan Kaufmann",
year = "1992",
}
Comparing connectionist and symbolic learning methods, J. Ross Quinlan
@Unpublished{quinlan:comparing-connectionist:,
author = "J. Ross Quinlan",
title = "Comparing connectionist and symbolic learning
methods",
institution = "University of Sydney",
}
Determining literals in inductive logic programming, J. Ross Quinlan
@InProceedings{quinlan:determining-literals:91,
author = "J. Ross Quinlan",
title = "Determining literals in inductive logic programming",
booktitle = "Proceedings of the 12th International Joint Conference
on Artificial Intelligence",
pages = "746--750",
address = "Sydney, Austalia",
year = "1991",
}
The effect of noise on concept learning, J. Ross Quinlan
@InCollection{quinlan:effect-noise:86,
author = "J. Ross Quinlan",
title = "The effect of noise on concept learning",
crossref = "michalski.ea:machine-learning:86",
pages = "149--166",
}
An empirical comparision of genetic and decision-tree classifiers, J. Ross Quinlan
@InProceedings{quinlan:empirical-comparision:88,
author = "J. Ross Quinlan",
title = "An empirical comparision of genetic and decision-tree
classifiers",
booktitle = "Proceedings of the 5th International Conference on
Machine Learning",
pages = "135--141",
address = "Ann Arbor",
year = "1988",
}
Induction of Decision Trees, J. Ross Quinlan
@Article{quinlan:induction-decision:86,
author = "J. Ross Quinlan",
title = "Induction of Decision Trees",
journal = "Machine Learning",
year = "1986",
volume = "1",
pages = "81--106",
}
Learning efficient classification procedures and their application to chess end games, J. Ross Quinlan
@InCollection{quinlan:learning-efficient:83,
author = "J. Ross Quinlan",
title = "Learning efficient classification procedures and their
application to chess end games",
crossref = "michalski.ea:machine-learning:83",
pages = "463--482",
}
Cooperation through Hierarchical Competition in Genetic Data Mining, N J Radcliffe and P D Surry
@Article{radcliffe.ea:cooperation-through:,
URL = "ftp://ftp.epcc.ed.ac.uk/pub/tr/94/tr9409.ps.Z",
title = "Cooperation through Hierarchical Competition in
Genetic Data Mining",
author = "N J Radcliffe and P D Surry",
note = "Parallel Computing Centre,Edinburgh",
}
Information Sharing and Knowledge Discovery in Large Scientific Databases : Introduction, Sudha Ram
@InProceedings{ram:information-sharing:94,
author = "Sudha Ram",
title = "Information Sharing and Knowledge Discovery in Large
Scientific Databases : Introduction",
pages = "397--397",
editor = "Jay F. Nunamaker and Ralph H. Sprague",
booktitle = "Proceedings of the 27th Annual Hawaii International
Conference on System Science. Volume 3 : Information
Systems: {DSS}/Knowledge-Based Systems",
month = jan,
publisher = "IEEE Computer Society Press",
address = "Los Alamitos, CA, USA",
year = "1994",
}
Information sharing and knowledge discovery in large scientific databases - introduction, S. Ram
@InProceedings{ram:information-sharing:95,
author = "S. Ram",
title = "Information sharing and knowledge discovery in large
scientific databases - introduction",
pages = "252--252",
editor = "Jay F. Nunamaker and Ralph H. Sprague",
booktitle = "Proceedings of the 28th Annual Hawaii International
Conference on System Sciences. Volume 3: Information
Systems - Decision Support and Knowledge-Based
Systems",
month = jan,
publisher = "IEEE Computer Society Press",
address = "Los Alamitos, CA, USA",
year = "1995",
}
A Knowledge-Based Equation Discovery System for Engineering Domains, R. Bharat Rao and Stephen Y. Lu
@Article{rao.ea:knowledge-based-equation:93,
author = "R. Bharat Rao and Stephen Y. Lu",
title = "A Knowledge-Based Equation Discovery System for
Engineering Domains",
journal = "IEEE Expert",
year = "1993",
pages = "37--42",
month = aug,
annote = "Deals with KEDS system",
}
Visualizing Bagged Decision Trees, J. Sunil Rao and William J. E. Potts
@InProceedings{rao.ea:visualizing-bagged:97,
title = "Visualizing Bagged Decision Trees",
author = "J. Sunil Rao and William J. E. Potts",
pages = "243",
crossref = "heckerman.ea:proceedings-third:97",
}
Calculating salience and breadth of knowledge, L. F. Rau
@Article{rau:calculating-salience:93,
author = "L. F. Rau",
address = "Ge Co, Ctr Res \& Dev, Artificial Intelligence Lab,
Schenectady, Ny, 12301",
title = "Calculating salience and breadth of knowledge",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1993",
volume = "5",
issue = "6",
pages = "996--998",
abstract = "As computer programs grow to contain more information,
it will become more important, when faced with a new
system, to be able to ask, ''What do you know about?''
This correspondence paper overviews some recently
completed research [1] investigating three questions:
1) what it means for a computer to know what it knows
about, 2) how a computer can construct a representation
of what it knows about, and 3) how such a
representation ran be used for practical applications
that advance the state-of-the-art in understanding the
content of large databases.",
keywords = "ARTIFICIAL INTELLIGENCE, COGNITIVE MODELING, COMPUTER
SCIENCE, DATABASE MANAGEMENT, INFORMATION MANAGEMENT,
INFORMATION RETRIEVAL, KNOWLEDGE DISCOVERY",
}
Genetic Programming for Improved Data Mining: An Application to the Biochemistry of Protein Interactions, M. L. Raymer and W. F. Punch and E. D. Goodman and L. A. Kuhn
@InProceedings{raymer.ea:genetic-programming:96,
author = "M. L. Raymer and W. F. Punch and E. D. Goodman and L.
A. Kuhn",
title = "Genetic Programming for Improved Data Mining: An
Application to the Biochemistry of Protein
Interactions",
booktitle = "Genetic Programming 1996: Proceedings of the First
Annual Conference",
editor = "John R. Koza and David E. Goldberg and David B. Fogel
and Rick L. Riolo",
year = "1996",
month = "28--31 " # jul,
keywords = "Genetic Programming, Genetic Algorithms",
pages = "375--380",
address = "Stanford University, CA, USA",
publisher = "MIT Press",
URL = "http://isl.cps.msu.edu/GA/papers/GARAGe96-04-01.ps",
size = "6 pages",
note = "GP-96 Also available as TR GARAGe96-04-01",
}
Parallelism speeds data mining, S. Reese Hedberg
@Article{reese-hedberg:parallelism-speeds:95,
author = "S. {Reese Hedberg}",
title = "Parallelism speeds data mining",
journal = "IEEE parallel and distributed technology: systems and
applications",
volume = "3",
number = "4",
pages = "3--6",
month = "Winter",
year = "1995",
ISSN = "1063-6552",
classification = "C6110P (Parallel programming); C6160K (Deductive
databases); C6170K (Knowledge engineering techniques);
C7120 (Financial computing); C7130 (Public
administration)",
keywords = "6-processor; artificial intelligence; banks; cash;
casinos; data mining; data pattern recognition; data
processing; deductive databases; financial data
processing; genetic algorithms; government; knowledge
acquisition; knowledge discovery; machine learning;
money laundering; multidimensional database querying;
neural networks; online application processing tools;
parallel processing; parallelism; pattern; prediction;
query processing; rule-based; siftware; statistical
techniques; stored data; Sun server; systems;
techniques; transactions; US Department of Treasury",
}
Performing Effective Feature Selection by Investigating the Deep Structure of the Data, Marco Richeldi and Pier Luca Lanzi
@InProceedings{richeldi.ea:performing-effective:96,
title = "Performing Effective Feature Selection by
Investigating the Deep Structure of the Data",
pages = "379",
author = "Marco Richeldi and Pier Luca Lanzi",
crossref = "simoudis.ea:proceedings-second:96",
}
Approaches to Knowledge Representation: An Introduction, G. A. Ringland and D. A. Duce (Eds)
@Book{ringland.ea:approaches-to:88,
editor = "G. A. Ringland and D. A. Duce",
title = "Approaches to Knowledge Representation: An
Introduction",
publisher = "Research studies press Ltd.",
year = "1988",
address = "Letchworth, England",
}
Discretization of Numerical Attributes, Knut Magne Risvik
@Misc{risvik:discretization-numerical:97,
author = "Knut Magne Risvik",
title = "Discretization of Numerical Attributes",
year = "1997",
month = apr,
howpublished = "Unpublished article. Report from undergraduate student
project.",
URL = "http://www.pvv.ntnu.no/~kmr/report/discretization.ps",
contributedby = "Knut Magne Risvik, kmr(at)idi.ntnu.no",
}
Learning Decision Lists, Ronald L. Rivest
@Article{rivest:learning-decision:87,
author = "Ronald L. Rivest",
title = "Learning Decision Lists",
journal = "Machine Learning",
year = "1987",
volume = "2",
pages = "229--246",
}
Dealing with Duplicate Tuples in Multi-Join Query Processing, Roberto J. Bayardo, Jr.
@TechReport{roberto-j-bayardo:dealing-with:96,
author = "Roberto J. {Bayardo, Jr.}",
title = "Dealing with Duplicate Tuples in Multi-Join Query
Processing",
institution = "The University of Texas at Austin, Department of
Computer Sciences",
type = "Technical Report",
number = "UTEXAS.CS//CS-TR-96-11",
pages = "7",
month = may,
year = "1996",
keywords = "query processing, multi-join queries, query
optimization",
URL = "ftp://ftp.cs.utexas.edu/pub/techreports/tr96-11.ps.Z",
abstract = "This paper presents and evaluates several schemes for
handling duplicate tuple elimination during
optimization and execution of large select-project-join
queries. The primary issues investigated are (1)
precisely when to apply duplicate tuple removal during
query evaluation, and (2) how an optimizer should
predict the effects of removing duplicates. We also
develop a realistic model of multiple join queries
inspired by a proposed data- mining application.
Through experiments on this model, we find two critical
techniques for high performance execution of
select-project-join queries: First, the optimizer
should decide where duplicates are removed within the
query plan independent of the projections creating
them. Second, join algorithms should remove duplicates
when sorting or hashing their input, and the optimizer
should be capable of predicting its effects.",
}
Handling discovered structure in database-systems, J. F. Roddick and N. G. Craske and T. J. Richards
@Article{roddick.ea:handling-discovered:96,
author = "J. F. Roddick and N. G. Craske and T. J. Richards",
address = "Univ S Australia, Sch Comp \& Informat Sci, Adv Comp
Res Ctr, Levels Campus, the Levels, Sa 5095, Australia
Monash Univ, Dept Comp Technol, Caulfield, Vic 3145,
Australia Qualitat Solut \& Res Pty Ltd, Bundoora, Vic
3083, Australia",
title = "Handling discovered structure in database-systems",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1996",
volume = "8",
issue = "2",
pages = "227--240",
abstract = "Most database systems research assumes that the
database schema is determined by a database
administrator. With the recent increase in interest in
knowledge discovery from databases and the predicted
increase in the volume of data expected to be stored it
is appropriate to reexamine this assumption and
investigate how derived or induced, rather than
database administrator supplied, structure can be
accommodated and used by database systems. This paper
investigates some of the characteristics of inductive
learning and knowledge discovery as they pertain to
database systems and the constraints that would be
imposed on appropriate inductive learning algorithms is
discussed. A formal method of defining induced
dependencies (both static and temporal) is proposed as
the inductive analogue to functional dependencies. The
Boswell database system exemplifying some of these
characteristics is also briefly discussed.",
keywords = "DESIGN, RULE, INDUCTIVE DATA MODELS, KNOWLEDGE
DISCOVERY, TEMPORAL INFERENCE, BOSWELL",
}
Knowledge Discovery in Reaction Databases, John R. Rose and Herbert Gelernter
@InProceedings{rose.ea:reaction:93,
author = "John R. Rose and Herbert Gelernter",
title = "Knowledge Discovery in Reaction Databases",
pages = "714--716",
editor = "Bharat Bhargava and Timothy Finin and Yelena Yesha",
booktitle = "Proceedings of the 2nd International Conference on
Information and Knowledge Management",
month = nov,
publisher = "ACM Press",
address = "New York, NY, USA",
year = "1993",
}
How Good Were Those Probability Predictions?), The Expected Recommendation Loss (ERL) Scoring Rule, David B. Rosen
@Article{rosen:how-good:,
title = "How Good Were Those Probability Predictions?), The
Expected Recommendation Loss ({ERL}) Scoring Rule",
author = "David B. Rosen",
note = "To appear in: Maximum Entropy and Bayesian Methods.
(Proceedings of the Thirteenth International Workshop,
August 1993.) G. Heidbreder, ed. Kluwer, Dordrecht, The
Netherlands, 1996. 8 pages.",
abstract = "We present a new way to choose an appropriate scoring
rule for evaluating the performance of a _soft
classifier_, i.e. of a supplier of predicted
(inferred/estimated/learned/guessed) probabilities. A
scoring rule (probability loss function) is a function
of a single such prediction and the corresponding
outcome event (true class); its expectation over the
data space is the generalization performance of
ultimate interest, while its sum or average over some
benchmark test data set is an empirical performance
measure.
Discriminative vs Informative Learning, Y. Dan Rubinstein and Trevor Hastie
@InProceedings{rubinstein.ea:discriminative-vs:97,
title = "Discriminative vs Informative Learning",
author = "Y. Dan Rubinstein and Trevor Hastie",
pages = "49",
crossref = "heckerman.ea:proceedings-third:97",
}
SE-Trees Outperform Decision Trees in Noisy Domains, Ron Rymon
@InProceedings{rymon:se-trees-outperform:96,
title = "{SE}-Trees Outperform Decision Trees in Noisy
Domains",
pages = "331",
author = "Ron Rymon",
crossref = "simoudis.ea:proceedings-second:96",
}
Deriving Queries From Examples Using Genetic Programming, Tae-Wan Ryu and Christoph F. Eick
@InProceedings{ryu.ea:deriving-queries:96,
author = "Tae-Wan Ryu and Christoph F. Eick",
title = "Deriving Queries From Examples Using Genetic
Programming",
booktitle = "The Second International Conference on Knowledge
Discovery and Data Mining (KDD-96)",
editor = "Evangelos Simoudis and Jia Wei Han and Usama Fayyad",
year = "1996",
month = aug # " 2-4",
keywords = "Genetic Programming, Genetic Algorithms, MASSON",
pages = "303",
address = "Portland, Oregon, USA",
publisher = "AAAI",
URL = "http://www.cs.uh.edu/~twryu/papers/kdd96.ps",
size = "14 pages",
abstract = "This paper centers on the problem of extracting
intensional information for a set of objects from an
object-oriented database. In our approach, the
extracted intensional information for the given set of
objects are described by object- oriented queries that
compute this set of objects. The paper discusses the
architecture of a knowledge discovery system, called
MASSON, which employs genetic programming to find such
queries, moreover, we will show how interesting queries
that describe commonalities within a set of objects are
automatically generated, modified, evaluated, and
selected; we will also discuss how the search for the
_best_ query is conducted by the MASSON system. We also
report on an experiment that evaluated the knowledge
discovery capability of MASSON.",
annote = "KDD-96
http://www.aaai.org:80/Press/Proceedings/KDD/1996/kdd-96.html",
affiliation = "University of Houston",
crossref = "simoudis.ea:proceedings-second:96",
}
MASSON: Discovering Commonalties in Collection of Objects using Genetic Programming, Tae-Wan Ryu and Christoph F. Eick
@InProceedings{ryu.ea:masson--discovering:96,
author = "Tae-Wan Ryu and Christoph F. Eick",
title = "{MASSON:} Discovering Commonalties in Collection of
Objects using Genetic Programming",
booktitle = "Genetic Programming 1996: Proceedings of the First
Annual Conference",
editor = "John R. Koza and David E. Goldberg and David B. Fogel
and Rick L. Riolo",
year = "1996",
month = "28--31 " # jul,
keywords = "Genetic Programming, Genetic Algorithms",
pages = "200--208",
address = "Stanford University, CA, USA",
publisher = "MIT Press",
URL = "http://www.cs.uh.edu/~twryu/papers/gp96.ps",
size = "9 pages",
abstract = "For the current flood of data, automatic tools for
searching or analyzing data are necessary, especially
for complex databases. Accordingly, knowledge discovery
in databases is getting more and more attention. This
paper centers on the problem of discovering the common
characteristics that are shared by a set of objects
belonging to an object-oriented database. In our
approach, commonalities within a set of objects are
described by object-oriented queries that compute this
set of objects. The paper discusses the architecture of
a knowledge discovery system, called MASSON, which
employs genetic programming to find such queries, and
presents an example run of the system to illustrate how
the system works; we will show how interesting queries
that describe commonalities within a set of objects are
automatically generated, modified, evaluated, and
selected; we will also discuss how the search for the
_best_ query is conducted by the MASSON system.
Specific problems such as the generation of constants
in queries, how to cope with type violations and other
constraints when creating object-oriented queries, and
query evaluation are discussed in some detail.",
}
Learning Limited Dependence Bayesian Classifiers, Mehran Sahami
@InProceedings{sahami:learning-limited:96,
title = "Learning Limited Dependence Bayesian Classifiers",
pages = "335",
author = "Mehran Sahami",
crossref = "simoudis.ea:proceedings-second:96",
}
On Comparing Classifiers: A Critique of Current Research and Methods, Steven Salzberg
@Article{salzberg:on-comparing:,
URL = "http://www.cs.jhu.edu/salzberg/critique.ps",
title = "On Comparing Classifiers: {A} Critique of Current
Research and Methods",
author = "Steven Salzberg",
abstract = "Experimental machine learning research needs to
scrutinize its approach to experimental design. If not
done very carefully, comparative studies of
classification algorithms can easily result in
statistically invalid conclusions. This paper describes
several phenomena that can, if ignored, invalidate an
experimental comparison. It also divides machine
learning research into several different types, and
discusses why comparative analysis is more important
for some than for others.",
annote = "homepage with decision tree papers is at:
http://www.cs.jhu.edu/salzberg/home.html",
}
Learning concepts by asking questions, Claude Sammut and Ranan B. Banerji
@InCollection{sammut.ea:learning-concepts:86,
author = "Claude Sammut and Ranan B. Banerji",
title = "Learning concepts by asking questions",
crossref = "michalski.ea:machine-learning:86",
pages = "167--191",
}
Knowledge Discovery in Temporal Databases: The Initial Step, Mohamed H. Saraee and Babis Theodoulidis
@InProceedings{saraee.ea:temporal-initial:95,
author = "Mohamed H. Saraee and Babis Theodoulidis",
title = "Knowledge Discovery in Temporal Databases: The Initial
Step",
booktitle = "Knowledge Discovery Workshop of the International
Conference on Deductive and Object Oriented Databases
Workshop (DOOD)",
address = "Singapore",
month = dec,
year = "1995",
}
Data mining and forecasting in large-scale telecommunication networks, R. Sasisekharan and V. Seshadri and S. M. Weiss
@Article{sasisekharan.ea:forecasting-large-scale:96,
author = "R. Sasisekharan and V. Seshadri and S. M. Weiss",
address = "At\&T Bell Labs, Tech Staff, Middletown, Nj, 07748
Rutgers State Univ, Dept Comp Sci, New Brunswick, Nj,
08903",
title = "Data mining and forecasting in large-scale
telecommunication networks",
journal = "Ieee Expert-Intelligent Systems \& Their
Applications",
year = "1996",
volume = "11",
issue = "1",
pages = "37--43",
}
Bottom-up induction of functional dependencies from relations, I. Savnik and P. A. Flach
@InProceedings{savnik.ea:bottom-up-induction:93,
author = "I. Savnik and P. A. Flach",
booktitle = "Proc. of AAAI-93 Workshop: Knowledge Discovery in
Databases",
title = "Bottom-up induction of functional dependencies from
relations",
year = "1993",
URL = "ftp://martin.ijs.si/pub/CSD/Reports/CSD-TR-93-3.ps.gz",
editor = "G. Piatetsky-Shapiro",
keywords = "Functional dependency, Knowledge Discovery,
Databases",
month = jul,
pages = "174--185",
}
Finding Latent Variable Models in Large Databases, Richard Scheines and Peter Spirtes
@Article{scheines.ea:finding-latent:92,
crossref = "ijis-special-issue:92",
author = "Richard Scheines and Peter Spirtes",
title = "Finding Latent Variable Models in Large Databases",
pages = "609--621",
}
CoverStory- Automated news finding in marketing, J. Armstrong Schmitz and Little. J. D. C.
@Article{schmitz.ea:coverstory--automated:90,
author = "J. Armstrong Schmitz and Little. J. D. C.",
title = "CoverStory- Automated news finding in marketing",
journal = "Decision Support Systems Transaction",
year = "1990",
page = "46--54",
keywords = "marketing, sales data, cranberry, ocean spray",
}
Extracting support data for a given task,
@InCollection{scholkopf.ea:extracting-support:95,
author = "B. Sch{\"o}lkopf and C. Burges and V. Vapnik",
title = "Extracting support data for a given task",
booktitle = "Proceedings, First International Conference on
Knowledge Discovery and Data Mining",
publisher = "AAAI Press",
address = "Menlo Park, CA",
editor = "U. M. Fayyad and R. Uthurusamy",
year = "1995",
}
Book Review of Computer Systems That Learn., Alberto Serge and Geoffrey Gordon
@Article{serge.ea:book-review:93,
author = "Alberto Serge and Geoffrey Gordon",
title = "Book Review of Computer Systems That Learn.",
journal = "Artificial Intelligence",
year = "1993",
number = "62",
pages = "363--378",
annote = "Review / overview of Computer Systems That Learn by
Sholom M. Weiss and Casimir A. Kulikowski. Identifies
dimensions of classification of learning methods. Looks
at Neural Nets, Statistical Methods and Machine
Learning approaches (ID3, CART, C4). 17 References.",
}
Using single layered neural networks for the extraction of conjunctive rules and hierarchical classifications, Sabrina Sestito and Tharam Dillon
@Article{sestito.ea:using-single:91,
author = "Sabrina Sestito and Tharam Dillon",
title = "Using single layered neural networks for the
extraction of conjunctive rules and hierarchical
classifications",
journal = "Journal of Applied Intelligence",
year = "1991",
pages = "157--173",
volume = "1",
}
Advances in knowledge acquisition: 9th European Knowledge Acquisition Workshop, EKAW '96, Nottingham, United Kingdom, May 14--17, 1996: proceedings, Nigel Shadbolt and Kieron O'Hara and Guus Schreiber (Eds)
@Proceedings{shadbolt.ea:advances-acquisition:96,
editor = "Nigel Shadbolt and Kieron O'Hara and Guus Schreiber",
booktitle = "Advances in knowledge acquisition: 9th European
Knowledge Acquisition Workshop, {EKAW} '96, Nottingham,
United Kingdom, May 14--17, 1996: proceedings",
title = "Advances in knowledge acquisition: 9th European
Knowledge Acquisition Workshop, {EKAW} '96, Nottingham,
United Kingdom, May 14--17, 1996: proceedings",
volume = "1076",
publisher = "Springer-Verlag Inc.",
address = "New York, NY, USA",
pages = "xii + 369",
year = "1996",
ISBN = "3-540-61273-4 (softcover)",
ISSN = "0302-9743",
LCCN = "QA76.73.E95 E92 1996",
series = "Lecture Notes in Artificial Intelligence and Lecture
Notes in Computer Science",
annote = "Assumptions of problem-solving methods / Richard
Benjamins and Christine Pierret-Golbreich --
Problem-solving methods: making assumptions for
efficiency reasons / Dieter Fensel and Remco Straatman
-- The thin end of the wedge: efficiency and the
generalised directive model methodology / Kieron O'Hara
and Nigel Shadbolt -- Principles for libraries of task
decomposition methods: conclusions from a case-study /
Klas Orsvarn -- A purpose driven method for language
comparison / The REVISE Project -- A conceptual and
formal model of a diagnostic reasoner / Richard
Benjamins and Manfred Aben -- Ontology construction for
technical domains / Jan Benjamin \ldots{} [et al.] --
Text clustering to help knowledge acquisition from
documents / Stephane Lapalut -- A quality-based
terminological reasoning model for text knowledge
acquisitions / Udo Hahn, Manfred Klenner and Klemens
Schnattinger -- Extracting conceptual knowledge from
text using explicit relation markers / Paul R. Bowden,
Peter Halstead and Tony G. Rose -- Structuring
information in a distributed hypermedia system / Celia
Ghedini Ralha -- Diagrammatic knowledge acquisition:
elicitation, analysis and issues / Peter C.-H. Cheng --
An approach to measuring theory quality / Edgar Sommer
-- Some late- breaking news from the data mines and a
preview of the KOALA system: a prospector's report /
Franz Schmalhofer and Christoph Kozieja -- A knowledge
acquisition tool for multi-perspective concept
formation / Joao Jose Furtado Vasco, Colette Faucher
and Eugene Chouraqui -- Knowledge discovery in
databases: exploiting knowledge-level redescription /
James Cupit and Nigel Shadbolt -- Towards painless
knowledge acquisition / Derek Sleeman and Fraser
Mitchell -- The acquisition of a shared task model /
Frances Brazier, Jan Treur and Niek Wijngaards -- The
group elicitation method: an introduction / Guy Boy --
Formalising the repair of schedules through knowledge
acquisition / Janet Efstathiou -- Intelligent tools for
planning knowledge base development and verification /
Steve A. Chien -- Configuring service recovery planning
with the CommonKADS library / V. Arlanzon, A. Bernaras
and I. Laresgoiti -- Domain and system influences in
problem solving models for planning / Hugh Cottam and
Nigel Shadbolt.",
keywords = "Knowledge acquisition (Expert systems) --
Congresses.",
}
Data-based acquisition and incremental modification of classification rules, N. Shan and W. Ziarko
@Article{shan.ea:data-based-acquisition:95,
author = "N. Shan and W. Ziarko",
address = "Univ Regina, Dept Comp Sci, Regina, Sk S4S 0A2,
Canada",
title = "Data-based acquisition and incremental modification of
classification rules",
journal = "Computational Intelligence",
year = "1995",
volume = "11",
issue = "2",
pages = "357--370",
abstract = "One of the most important problems in the application
of knowledge discovery systems is the identification
and subsequent updating of rules. Many applications
require that the classification rules be derived from
data representing exemplar occurrences of data patterns
belonging to different classes. The problem of
identifying such rules in data has been researched
within the field of machine learning, and more recently
in the context of rough set theory and knowledge
discovery in databases. In this paper we present an
incremental methodology for finding all maximally
generalized rules and for adaptive modification of them
when new data become available. The methodology is
developed in the context of rough set theory and is
based on the earlier idea of discernibility matrix
introduced by Skowron.",
keywords = "ROUGH SETS, DECISION RULES, KNOWLEDGE DISCOVERY,
MACHINE LEARNING, INCREMENTAL LEARNING, ADAPTIVE
SYSTEMS",
}
Discovering Classification Knowledge in Databases Using Rough Sets, Ning Shan and Wojciech Ziarko and Howard J. Hamilton and Nick Cercone
@InProceedings{shan.ea:discovering-classification:96,
title = "Discovering Classification Knowledge in Databases
Using Rough Sets",
pages = "271",
author = "Ning Shan and Wojciech Ziarko and Howard J. Hamilton
and Nick Cercone",
crossref = "simoudis.ea:proceedings-second:96",
}
The mathematical theory of communication, Claude E. Shannon and Warren Weaver
@Book{shannon.ea:mathematical-theory:49,
author = "Claude E. Shannon and Warren Weaver",
title = "The mathematical theory of communication",
publisher = "University of Illinois Press",
year = "1949",
annote = "Book by the father of information theory.",
}
Encyclopedia of artificial intelligence, Stuart C. Shapiro (Ed)
@Book{shapiro:encyclopedia-artificial:92,
editor = "Stuart C. Shapiro",
title = "Encyclopedia of artificial intelligence",
publisher = "Wiley",
year = "1992",
}
Combining Explanation-based and Neural Learning: An algorithm and Emperical Results, Jude W. Shavlik and Geoffrey G. Towell
@TechReport{shavlik.ea:combining-explanation-based:89,
author = "Jude W. Shavlik and Geoffrey G. Towell",
title = "Combining Explanation-based and Neural Learning: An
algorithm and Emperical Results",
institution = "University of Wisconsin",
number = "859",
month = jun,
year = "1989",
}
Symbolic and Neural Learning Algorithms: An Experimental Comparison (Revised), Jude W. Shavlik and Raymond J. Mooney and Geoffrey G. Towell
@TechReport{shavlik.ea:symbolic-neural:90,
author = "Jude W. Shavlik and Raymond J. Mooney and Geoffrey G.
Towell",
title = "Symbolic and Neural Learning Algorithms: An
Experimental Comparison (Revised)",
institution = "Department of Computer Sciences, University of
Wisconsin",
number = "Technical Report No. 955 (August 1990)",
year = "1990",
keywords = "Empirical Learning, Connectionism, Neural Networks,
Inductive Learning, ID3, Perceptron, Backpropagation",
annote = "To Appear in Machine Learning, Volume 6, 1991.
Comparison of ID3, Backprop and Perceptron on 5 large,
real-world data sets.",
}
Scalable Exploratory Data Mining of Distributed Geoscientific Data, Eddie C. Shek and Richard R. Muntz and Edmond Mesrobian and Kenneth Ng
@InProceedings{shek.ea:scalable-exploratory:96,
title = "Scalable Exploratory Data Mining of Distributed
Geoscientific Data",
pages = "32",
author = "Eddie C. Shek and Richard R. Muntz and Edmond
Mesrobian and Kenneth Ng",
crossref = "simoudis.ea:proceedings-second:96",
}
A metapattern-based automated discovery loop for integrated data mining - unsupervised learning of relational patterns, W. M. Shen and B. Leng
@Article{shen.ea:metapattern-based-automated:96,
author = "W. M. Shen and B. Leng",
address = "Univ So Calif, Inst Informat Sci, 4676 Admiralty Way,
Marina Del Rey, Ca, 90292 Univ So Calif, Dept Comp Sci,
Marina Del Rey, Ca, 90292 Inference Corp, Chicago, Il,
60631",
title = "A metapattern-based automated discovery loop for
integrated data mining - unsupervised learning of
relational patterns",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1996",
month = dec,
volume = "8",
issue = "6",
pages = "898--910",
abstract = "Metapattern (also known as metaquery) is a new
approach for integrated data mining systems. Different
from a typical ''tool-box'' like integration, where
components must be picked and chosen by users without
much help, metapatterns provide a common representation
for intercomponent communication as well as a human
interface for hypothesis development and search
control. One weakness of this approach, however, is
that the task of generating fruitful metapatterns is
still a heavy burden for human users. In this paper, we
describe a metapattern generator and an integrated
discovery loop that can automatically generate
metapatterns. Experiments in both artificial and
real-world databases have shown that this new system
goes beyond the existing machine learning technologies,
and can discover relational patterns without requiring
humans to prelabel the data as positive or negative
examples for some given target concepts. With this
technology, future data mining systems could discover
high- quality, human comprehensible knowledge in a much
more efficient and focused manner, and data mining
could be managed easily by both expert and less expert
users.",
keywords = "induction, deduction, human interaction, integration,
unsupervised learning, relational concepts, metaquery,
metapattern",
}
Metapattern Generation for Integrated Data Mining, Wei-Min Shen and Bing Leng
@InProceedings{shen.ea:metapattern-generation:96,
title = "Metapattern Generation for Integrated Data Mining",
pages = "152",
author = "Wei-Min Shen and Bing Leng",
crossref = "simoudis.ea:proceedings-second:96",
}
Complementary Discrimination Learning with Decision Lists, W. M. Shen
@InProceedings{shen:complementary-discrimination:92,
author = "W. M. Shen",
title = "Complementary Discrimination Learning with Decision
Lists",
year = "1992",
booktitle = "Proc.\ of AAAI-92",
pages = "153--158",
}
Discovering Regularities from Knowledge Bases, Wei-Min Shen
@Article{shen:discovering-regularities:92,
crossref = "ijis-special-issue:92",
author = "Wei-Min Shen",
title = "Discovering Regularities from Knowledge Bases",
pages = "623--635",
}
A survey of techniques for inference under uncertainty, F. K. J. Sheridan
@Article{sheridan:survey-techniques:91,
author = "F. K. J. Sheridan",
title = "A survey of techniques for inference under
uncertainty",
journal = "Artificial Intelligence Review",
year = "1991",
pages = "89--119",
volume = "5",
}
Trend Recognition with Time Series Database, Hiromitsu Shimakawa and Kouji Kikkawa
@InProceedings{shimakawa.ea:trend-recognition:,
author = "Hiromitsu Shimakawa and Kouji Kikkawa",
title = "Trend Recognition with Time Series Database",
booktitle = "FUTURE DATABASES 92",
volume = "3",
publisher = "WORLD SCIENTIFIC PUBL CO PTE LTD,SINGAPORE",
chapter = "46",
pages = "373--383",
annote = "2ND FAR-EAST WORKSHOP ON FUTURE DATABASE SYSTEMS
KYOTO,JAPAN D920426-28",
}
Evaluating the Interestingness of Characteristic Rules, Micheline Kamberand Rajjan Shinghal
@InProceedings{shinghal:evaluating-interestingness:96,
title = "Evaluating the Interestingness of Characteristic
Rules",
pages = "263",
author = "Micheline Kamberand Rajjan Shinghal",
crossref = "simoudis.ea:proceedings-second:96",
}
Data mining applications in bt, R. Shortland and R. Scarfe
@Article{shortland.ea:applications-bt:94,
author = "R. Shortland and R. Scarfe",
address = "British Telecommun Labs, Martlesham Heath, Ipswich 1P5
7Re, Suffolk, England",
title = "Data mining applications in bt",
journal = "Bt Technology J.",
year = "1994",
volume = "12",
issue = "4",
pages = "17--22",
abstract = "With the increased use of computers there is an ever
increasing volume of data being generated and stored.
This can lead to companies becoming 'data rich and
information poor'. This paper describes how BT has used
data mining techniques to convert volume data into
high- value information which can be used to aid
decision making in a number of key business processes.
The benefit of actively using data, as opposed to
passively storing it, is demonstrated via a number of
case studies which cover areas as diverse as fault
diagnosis, fraud detection, market segmentation, credit
vetting and litigation assessment.",
}
KESO: Minimizing Database Interaction, Arno Siebes and Martin L. Kersten
@InProceedings{siebes.ea:keso-minimizing:97,
title = "{KESO}: Minimizing Database Interaction",
author = "Arno Siebes and Martin L. Kersten",
pages = "247",
crossref = "heckerman.ea:proceedings-third:97",
}
Homogeneous Discoveries Contain no Surprises: Inferring Risk-profiles from Large Databases, Arno Siebes
@TechReport{siebes:homogeneous-discoveries:,
URL = "ftp://ftp.cwi.nl/pub/CWIreports/AA/CS-R9430.ps.Z",
title = "Homogeneous Discoveries Contain no Surprises:
Inferring Risk-profiles from Large Databases",
author = "Arno Siebes",
abstract = "Many models of reality are probabilistic. For example,
not everyone orders crisps with their beer, but a
certain percentage does. Inferring such probabilistic
knowledge from databases is one of the major challenges
for data mining. Recently Agrawal et al.
investigated a class of such problems. In this paper a
new class of such problems is investigated, viz.,
inferring risk-profiles. The proto-typical example of
this class is: ``what is the probability that a given
policy-holder will file a claim with the insurance
company in the next year''. A risk-profile is then a
description of a group of insurants that have the same
probability for filing a claim.
It is shown in this
paper that homogeneous descriptions are the most
plausible risk-profiles. Moreover, under modest
assumptions it is shown that covers of such homogeneous
descriptions are essentially unique. A direct
consequence of this result is that it suffices to
search for the homogeneous description with the highest
associated probability.
The main result of this
paper is thus that we show that the inference problem
for risk-profiles reduces to the well studied problem
of maximising a quality function.",
annote = "CR subject classification (1991): Computer based
methods in probability and statistics (G.3), Database
applications (H.2.8), Information search and retrieval
(H.3.3) clustering, search process, Learning (I.2.6)
concept learning, induction, knowledge acquisition",
keywords = "Data Mining, Probabilistic Knowledge, Probabilistic
Search, Probability Theory",
}
What makes patterns interesting in knowledge discovery systems, A. Silberschatz and A. Tuzhilin
@Article{silberschatz.ea:what-makes:96,
author = "A. Silberschatz and A. Tuzhilin",
address = "At\&T Bell Labs, Lucent Technol, 600 Mt Ave, Murray
Hill, Nj, 07974 Nyu, Stern Sch Business, Dept Informat
Syst, New York, Ny, 10012",
title = "What makes patterns interesting in knowledge discovery
systems",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1996",
volume = "8",
issue = "6",
pages = "970--974",
abstract = "One of the central problems in the field of knowledge
discovery is the development of good measures of
interestingness of discovered patterns. Such measures
of interestingness are divided into objective
measures-those that depend only on the structure of a
pattern and the underlying data used in the discovery
process, and the subjective measures-those that also
depend on the class of users who examine the pattern.
The focus of this paper is on studying subjective
measures of interestingness. These measures are
classified into actionable and unexpected, and the
relationship between them is examined. The unexpected
measure of interestingness is defined in terms of the
belief system that the user has. Interestingness of a
pattern is expressed in terms of how it affects the
belief system. The paper also discusses how this
unexpected measure of interestingness can be used in
the discovery process.",
keywords = "measures of interestingness, patterns, actionability,
unexpectedness, belief systems",
}
Proceedings of the Second International Conference on Knowledge Discovery and Data Mining (KDD-96), Evangelos Simoudis and Jia Wei Han and Usama Fayyad (Eds)
@Proceedings{simoudis.ea:proceedings-second:96,
title = "Proceedings of the Second International Conference on
Knowledge Discovery and Data Mining ({KDD}-96)",
year = "1996",
editor = "Evangelos Simoudis and Jia Wei Han and Usama Fayyad",
publisher = "AAAI Press",
}
Reality check for data mining, E. Simoudis
@Article{simoudis:reality-check:96,
author = "E. Simoudis",
address = "Ibm Corp, Almaden Res Ctr, 650 Harry Rd, San Jose, Ca,
95120",
title = "Reality check for data mining",
journal = "Ieee Expert-Intelligent Systems \& Their
Applications",
year = "1996",
volume = "11",
issue = "5",
pages = "26--33",
}
Extracting laws from decision tables - a rough set approach, A. Skowron
@Article{skowron:extracting-laws:95,
author = "A. Skowron",
address = "Warsaw Univ, Inst Math, Banacha 2, Pl-02097 Warsaw,
Poland",
title = "Extracting laws from decision tables - a rough set
approach",
journal = "Computational Intelligence",
year = "1995",
volume = "11",
issue = "2",
pages = "371--388",
abstract = "We present some methods, based on the rough set and
Boolean reasoning approaches, for extracting laws from
decision tables. First we discuss several procedures
for decision rules synthesis from decision tables. Next
we show how to apply some near-to-functional relations
between data to data filtration. Two methods of
searching for new classifiers (features) are described:
searching for new classifiers in a given set of logical
formulas, and searching for some functions
approximating near-to-functional relations.",
keywords = "REASONING UNDER UNCERTAINTY, ROUGH SETS, KNOWLEDGE
DISCOVERY, MACHINE LEARNING",
}
Anytime Exploratory Data Analysis for Massive Data Sets, Padhraic Smyth and David Wolpert
@InProceedings{smyth.ea:anytime-exploratory:97,
title = "Anytime Exploratory Data Analysis for Massive Data
Sets",
author = "Padhraic Smyth and David Wolpert",
pages = "54",
crossref = "heckerman.ea:proceedings-third:97",
}
Detecting Atmospheric Regimes Using Cross-Validated Clustering, Padhraic Smyth and Michael Ghil and Kayo Ide and Joe Roden and Andrew Fraser
@InProceedings{smyth.ea:detecting-atmospheric:97,
title = "Detecting Atmospheric Regimes Using Cross-Validated
Clustering",
author = "Padhraic Smyth and Michael Ghil and Kayo Ide and Joe
Roden and Andrew Fraser",
pages = "61",
crossref = "heckerman.ea:proceedings-third:97",
}
An information theoretic approach to rule induction from databases, P. Smyth and R. M. Goodman
@Article{smyth.ea:information-theoretic:92,
author = "P. Smyth and R. M. Goodman",
address = "Caltech, Jet Propuls Lab 238420, Commun Syst Res Sect,
Pasadena, Ca, 91109 Caltech, Dept Elect Engn, Pasadena,
Ca, 91125",
title = "An information theoretic approach to rule induction
from databases",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1992",
volume = "4",
issue = "4",
pages = "301--316",
abstract = "The knowledge acquisition bottleneck in obtaining
rules directly from an expert is well known. Hence, the
problem of automated rule acquisition from data is a
well-motivated one, particularly for domains where a
database of sample data exists. In this paper we
introduce a novel algorithm for the induction of rules
from examples. The algorithm is novel in the sense that
it not only learns rules for a given concept
(classification), but it simultaneously learns rules
relating multiple concepts. This type of learning,
known as generalized rule induction is considerably
more general than existing algorithms which tend to be
classification oriented. Initially we focus on the
problem of determining a quantitative, well-defined
rule preference measure. In particular, we propose a
quantity called the J-measure as an information
theoretic alternative to existing approaches. The
J-measure quantifies the information content of a rule
or a hypothesis. We will outline the information
theoretic origins of this measure and examine its
plausibility as a hypothesis preference measure. We
then define the ITRULE algorithm which uses the newly
proposed measure to learn a set of optimal rules from a
set of data samples, and we conclude the paper with an
analysis of experimental results on real-world data.",
keywords = "PRINCIPLE, INFERENCE, EXPERT, CROSS ENTROPY, EXPERT
SYSTEMS, INFORMATION THEORY, MACHINE LEARNING,
KNOWLEDGE ACQUISITION, KNOWLEDGE DISCOVERY, RULE-BASED
SYSTEMS, RULE INDUCTION",
month = aug,
annote = "Describes the ITRULE system which uses the J-measure
of average information content of a rule. Also develops
a bound on the J-measure which is used to stop
specialisation of a rule when no further improvement
can be made. Info on the application of ITRULE to
computer analysis of Bach can be found at
http://www.gold.net/online/archive/940929_Bach.html",
}
Clustering Using Monte Carlo Cross-Validation, Padhraic Smyth
@InProceedings{smyth:clustering-using:96,
title = "Clustering Using Monte Carlo Cross-Validation",
pages = "126",
author = "Padhraic Smyth",
crossref = "simoudis.ea:proceedings-second:96",
}
Learning to Extract Text-Based Information from the World Wide Web, Stephen Soderland
@InProceedings{soderland:learning-to:97,
title = "Learning to Extract Text-Based Information from the
World Wide Web",
author = "Stephen Soderland",
pages = "251",
crossref = "heckerman.ea:proceedings-third:97",
}
Neural and Intelligent Systems Integration: Fifth and Sixth Generation Integrated Reasoning Information Systems, Branko Soucek
@Book{soucek:neural-intelligent:91,
author = "Branko Soucek",
title = "Neural and Intelligent Systems Integration: Fifth and
Sixth Generation Integrated Reasoning Information
Systems",
series = "Sixth Generation Computer Technologies Series",
pages = "688",
publisher = "Wiley-Interscience",
year = "1991",
keywords = "book, text,",
abstract = "** Description ** Combines new techniques of software
automation, system adaptation, module selection,
self-organization and automated discovery. Presents
results from the IRIS Group--findings from American,
European, Korean and Japanese projects on this emerging
discipline. Explores methods of combining well-defined
intelligent modules for integration into intelligent
systems. Modules include intelligent algorithms and
programs, neural networks and computing elements, fuzzy
data comparators and correlators, spare distributed
memories, expert systems, intelligent databases,
associative and parallel processing units, and data
acquisition, control and robot units.\par ** Partial
Contents ** NEURAL, GENETIC, AND INTELLIGENT ALGORITHMS
AND COMPUTING ELEMENTS. From Modules to
Application-Oriented Integrated Systems (B. Soucek).
Neural Network Models of Concept Learning (P. Schyns).
Teaching Network Connections for Real-Time Object
Recognition (S. Wilson). Neural Networks on Parallel
Computers (H. Yoon, et al.). Neural Bit-Slice Computing
Element (J. Yestrebsky, et al.). INTEGRATED
NEURAL-KNOWLEDGE-FUZZY HYBRIDS. Fuzzy Data Comparator
with Neural Network Postprocessor: A Hardware
Implementation (P. Basehore, et al.). Injecting Symbol
Processing Into a Connectionist Model (S. Romaniuk \&
L. Hall). INTEGRATED REASONING, INFORMING, AND SERVING
SYSTEMS. An Advanced Software Paradigm for Intelligent
Systems Integration (T. Ichiko). Intelligent Data Base
and Automatic Discovery (K. Parsaye, et al.).
Index.\par ** Market ** Practicing Engineers and
Scientists, Students, Researchers.",
note = "I-0-471-53676-8 1991cloth \$89.95",
}
Using genetic algorithms for supervised concept learning, William M. Spears and Kenneth De Jong
@InProceedings{spears.ea:using-genetic:90,
author = "William M. Spears and Kenneth De Jong",
title = "Using genetic algorithms for supervised concept
learning",
booktitle = "Proceedings of tools for AI",
organisation = "IEEE",
year = "1990",
}
Mining Association Rules with Item Constraints, Ramakrishnan Srikant and Quoc Vu and Rakesh Agrawal
@InProceedings{srikant.ea:association-rules:97,
title = "Mining Association Rules with Item Constraints",
author = "Ramakrishnan Srikant and Quoc Vu and Rakesh Agrawal",
pages = "67",
crossref = "heckerman.ea:proceedings-third:97",
}
Mining Quantitative Association Rules in Large Relational Tables, Ramakrishnan Srikant and Rakesh Agrawal
@InProceedings{srikant.ea:quantitative-association:96,
title = "Mining Quantitative Association Rules in Large
Relational Tables",
author = "Ramakrishnan Srikant and Rakesh Agrawal",
editor = "H. V. Jagadish and Inderpal Singh Mumick",
booktitle = "Proceedings of the 1996 {ACM} {SIGMOD} International
Conference on Management of Data",
address = "Montreal, Quebec, Canada",
month = "4--6~" # jun,
year = "1996",
pages = "1--12",
}
JAM: Java Agents for Meta-Learning over Distributed Databases, Salvatore Stolfo and Andreas L. Prodromidis and Shelley Tselepis and Wenke Lee and Dave W. Fan and Philip K. Chan
@InProceedings{stolfo.ea:jam-java:97,
title = "{JAM}: Java Agents for Meta-Learning over Distributed
Databases",
author = "Salvatore Stolfo and Andreas L. Prodromidis and
Shelley Tselepis and Wenke Lee and Dave W. Fan and
Philip K. Chan",
pages = "74",
crossref = "heckerman.ea:proceedings-third:97",
}
Harnessing Graphical Structure in Markov Chain Monte Carlo Learning, Paul E. Stolorz and Philip C. Chew
@InProceedings{stolorz.ea:harnessing-graphical:96,
title = "Harnessing Graphical Structure in Markov Chain Monte
Carlo Learning",
pages = "134",
author = "Paul E. Stolorz and Philip C. Chew",
crossref = "simoudis.ea:proceedings-second:96",
}
Quakefinder: A Scalable Data Mining System for Detecting Earthquakes from Space, Paul Stolorz and Christopher Dean
@InProceedings{stolorz.ea:quakefinder-scalable:96,
title = "Quakefinder: {A} Scalable Data Mining System for
Detecting Earthquakes from Space",
pages = "208",
author = "Paul Stolorz and Christopher Dean",
crossref = "simoudis.ea:proceedings-second:96",
}
Fast Spatio-Temporal Data Mining of Large Geophysical Datasets, P. Stolorz
@InProceedings{stolorz:fast-spatio-temporal-data-mining-of-large-geophysical-datasets:95,
author = "P. Stolorz",
title = "{Fast Spatio-Temporal Data Mining of Large Geophysical
Datasets}",
booktitle = "Proceedings of the First International Conference on
Knowledge Discovery and Data Mining (KDD)",
year = "1995",
address = "Montreal, Canada",
month = aug,
publisher = "AAAI Press",
editor = "U. M. Fayyad and R. Uthurusamy",
}
Image Feature Reduction through Spoiling: Its Application to Multiple Matched Filters for Focus of Attention, Timothy M. Stough and Carla E. Brodley
@InProceedings{stough.ea:image-feature:97,
title = "Image Feature Reduction through Spoiling: Its
Application to Multiple Matched Filters for Focus of
Attention",
author = "Timothy M. Stough and Carla E. Brodley",
pages = "255",
crossref = "heckerman.ea:proceedings-third:97",
}
A Visual Interactive Framework for Attribute Discretization, Ramesh Subramonian and Ramana Venkata and Joyce Chen
@InProceedings{subramonian.ea:visual-interactive:97,
title = "A Visual Interactive Framework for Attribute
Discretization",
author = "Ramesh Subramonian and Ramana Venkata and Joyce Chen",
pages = "82",
crossref = "heckerman.ea:proceedings-third:97",
}
Exceptional Knowledge Discovery in Databases Based on Information Theory, Einoshin Suzuki and Masamichi Shimura
@InProceedings{suzuki.ea:exceptional-based:96,
title = "Exceptional Knowledge Discovery in Databases Based on
Information Theory",
pages = "275",
author = "Einoshin Suzuki and Masamichi Shimura",
crossref = "simoudis.ea:proceedings-second:96",
}
Autonomous Discovery of Reliable Exception Rules, Einoshin Suzuki
@InProceedings{suzuki:autonomous-reliable:97,
title = "Autonomous Discovery of Reliable Exception Rules",
author = "Einoshin Suzuki",
pages = "259",
crossref = "heckerman.ea:proceedings-third:97",
}
Data Mining with Silicon Graphics Technology, Arun Swami
@Misc{swami:with-silicon:,
title = "Data Mining with Silicon Graphics Technology",
author = "Arun Swami",
URL = "http://www-europe.sgi.com/Technology/data-mining.html",
howpublished = "www publication",
}
Undiscovered Public Knowledge: A Ten-Year Update, Don R. Swanson and Neil R. Smalheiser
@InProceedings{swanson.ea:undiscovered-public:96,
title = "Undiscovered Public Knowledge: {A} Ten-Year Update",
pages = "295",
author = "Don R. Swanson and Neil R. Smalheiser",
crossref = "simoudis.ea:proceedings-second:96",
}
Uncertainty and decisions in medical informatics, P. Szolovits
@Article{szolovits:uncertainty-decisions:95,
author = "P. Szolovits",
address = "Mit, Comp Sci Lab, 545 Technol Sq, Cambridge, Ma,
02139",
title = "Uncertainty and decisions in medical informatics",
journal = "Methods Of Information In Medicine",
year = "1995",
volume = "34",
issue = "1-2",
pages = "111--121",
abstract = "This paper presents a tutorial introduction to the
handling of uncertainty and decision-making in medical
reasoning systems. It focuses on the central role of
uncertainty in all of medicine and identifies the major
themes that arise in research papers. It then reviews
simple Bayesian formulations of the problem and pursues
the generalization to the Bayesian network methods that
are popular today. Decision making is presented from
the decision analysis viewpoint, with brief mention of
recently-developed methods. The paper concludes with
review of more abstract characterization of
uncertainty, and anticipates the growing importance of
analytic and ''data mining'' techniques as growing
amounts of clinical data become widely available.",
keywords = "COMPUTER, NETWORKS, MODEL, DECISION SUPPORT,
UNCERTAINTY, BAYES, GRAPH MODELS, DECISION TREES,
INFLUENCE DIAGRAMS",
}
Visualization techniques for data mining, G. D. Tattersall and P. R. Limb
@Article{tattersall.ea:techniques:94,
author = "G. D. Tattersall and P. R. Limb",
address = "British Telecommun Labs, Ipswich 1P5 7Re, Suffolk,
England",
title = "Visualization techniques for data mining",
journal = "Bt Technology J.",
year = "1994",
volume = "12",
issue = "4",
pages = "23--31",
abstract = "BT collects and stores large quantities of data from a
variety of sources. These large data sets typically
describe different states of a system and are difficult
to interpret because there is no obvious way of
abstracting and presenting data features in a
meaningful way for a human observer. Data mining is a
term which has recently become popular to describe
techniques for the exploration and exploitation of
data. In particular, a large part of data mining
involves the visualisation of data and subsequent
utilisation of machine- learning techniques for
classification of data. This paper describes some
techniques for data visualisation which enable the user
to enhance understanding of the structure and
properties of (often multidimensional) data prior to
applying machine-learning techniques for further
analysis and exploration.",
}
Program Evolution for Data Mining, Astro Teller and Manuela Veloso
@Article{teller.ea:program-evolution:95,
author = "Astro Teller and Manuela Veloso",
title = "Program Evolution for Data Mining",
editor = "Sushil Louis",
publisher = "JAI Press",
journal = "The International Journal of Expert Systems",
year = "1995",
volume = "8",
number = "3",
pages = "216--236",
keywords = "genetic algorithms, genetic programming, memory",
URL = "http://www.cs.cmu.edu/afs/cs/usr/astro/public/papers/Astro-ESJ.ps",
url_2 = "ftp://cs.ucl.ac.uk/genetic/papers/Astro-ESJ.ps.Z",
abstract = "Around the world there are innumerable databases of
information. The quantity of information available has
created a high demand for automatic methods for
searching these databases and extracting specific kinds
of information. Unfortunately, the information in these
databases increasingly contains signals that have no
corresponding classification symbols. Examples include
databases of images, sounds, etc. A few systems have
been written to help solve these search and retrieve
issues. But we can not write a new system for every
kind of signal we want to recognize and extract. Some
work has been done on automating (i.e. learning) the
task of identifying desired signal elements. It would
be useful to automate (learn) not just a part of the
classification function, but the entire signal
identification program. It would be helpful if we could
use the same learning architecture to automatically
create these programs for distinguishing many different
classes of the same signal type. It would be better
still if we could use the same learning architecture to
create these programs even for signal types as
different as images and sound waves. We introduce PADO
(Parallel Architecture Discovery and Orchestration), a
learning architecture designed to deliver this. PADO
has at its core a variant of genetic programming (GP)
that extends the paradigm to explore the space of
algorithms. PADO learns the entire classification
algorithm for an arbitrary signal type with arbitrary
signal class distinctions. This architecture has been
designed specifically for signal understanding and
classification. The architecture of PADO and its
achievements on the recovery of visual and acoustic
signal classes from test databases are the subjects of
this article.",
note = "Third Quarter. Special Issue on Genetic Algorithms and
Knowledge Bases.",
}
Interactive Knowledge Discovery from Marketing Questionnaire Using Simulated Breeding and Inductive Learning Methods, Takao Terano and Yoko Ishino
@InProceedings{terano.ea:interactive-marketing:96,
title = "Interactive Knowledge Discovery from Marketing
Questionnaire Using Simulated Breeding and Inductive
Learning Methods",
pages = "279",
author = "Takao Terano and Yoko Ishino",
crossref = "simoudis.ea:proceedings-second:96",
}
Data Mining \& Visualization: It's All In the Interaction, Kurt Thearling
@InProceedings{thearling:its-all:97,
author = "Kurt Thearling",
title = "Data Mining \& Visualization: It's All In the
Interaction",
booktitle = "Integration of Data Mining and Data Visualization
workshop , held in conjunction with both the KDD'97 and
Visualization '97",
year = "1997",
URL = "http://www.santafe.edu/~kurt/dmviz.shtml",
note = "Position Paper",
annote = "What is the point of visualization? It's pretty
simple: to let the user understand what is going on.
Since data mining usually involves extracting
``hidden'' information from a database, the
understanding process can get a bit complicated. The
key is to put the user in a context they feel
comfortable in and then let them poke and prod until
they understand what they didn't see before.",
}
An Efficient Algorithm for the Incremental Updation of Association Rules in Large Databases, Shiby Thomas and Sreenath Bodagala and Khaled Alsabti and Sanjay Ranka
@InProceedings{thomas.ea:efficient-algorithm:97,
title = "An Efficient Algorithm for the Incremental Updation of
Association Rules in Large Databases",
author = "Shiby Thomas and Sreenath Bodagala and Khaled Alsabti
and Sanjay Ranka",
pages = "263",
crossref = "heckerman.ea:proceedings-third:97",
}
The MONK's Problems, A Performance Comparison of Different Learning Algorithms, S. B. Thrun and et al.
@TechReport{thrun.ea:monks-problems:91,
author = "S. B. Thrun and et al.",
title = "The {MONK}'s Problems, {A} Performance Comparison of
Different Learning Algorithms",
institution = "Carnegie Mellon University",
number = "CMU-CS-91-197",
month = dec,
year = "1991",
annote = "Over 100 pages, this report is really 9 short reports
each evaluating the performance of a set of learning
algorithms on standard test data. Contains descriptions
of all the algorithms used. Packed full of references.
[from the abstract]This report summarizes a comparison
of different learning techniques which was performed at
the 2nd European Summer School on Machine Learning,
held in Belgium during summer 1991. A variety of
symbolic and non-symbolic learning techniques - namely
AQ17-DCL, AQ17-HCI, AQ17-FCLS, AQ14-NT, AQ15-GA,
Assistant Professional, mFOIL, ID5R, IDL,ID5R-hat,
TDIDT, ID3, AQR, CN2, CLASSWEB, ECOBWEB, PRISM,
Backpropagation, and Cascade Correlation - are compared
on three classification problems, the MONK's
problems.",
}
The Extraction of Refined Rules from Knowledge-Based Neural Networks, Geoffrey G. Towell and Jude W. Shavlik
@Article{towell.ea:extraction-refined:93,
author = "Geoffrey G. Towell and Jude W. Shavlik",
title = "The Extraction of Refined Rules from Knowledge-Based
Neural Networks",
journal = "Machine Learning",
year = "1993",
volume = "13",
number = "1",
pages = "71--101",
}
Knowledge Acquisition Driven by Constructive and Interactive Induction, Katsuhiko Tsujino and Vlad G. Dabija and Shogo Nishida
@Article{tsujino.ea:acquisition-driven:,
author = "Katsuhiko Tsujino and Vlad G. Dabija and Shogo
Nishida",
title = "Knowledge Acquisition Driven by Constructive and
Interactive Induction",
keywords = "Kaiser, meta-knowledge",
}
The application of rough sets-based data mining technique to differential diagnosis of meningoenchephalitis, S. Tsumoto and W. Ziarko
@Article{tsumoto.ea:application-rough:96,
author = "S. Tsumoto and W. Ziarko",
title = "The application of rough sets-based data mining
technique to differential diagnosis of
meningoenchephalitis",
journal = "Lecture Notes in Computer Science",
volume = "1079",
pages = "438--??",
year = "1996",
ISSN = "0302-9743",
}
Automated Discovery of Medical Expert System Rules from Clinical Databases Based on Rough Sets, Shusaku Tsumoto and Hiroshi Tanaka
@InProceedings{tsumoto.ea:automated-medical:96,
title = "Automated Discovery of Medical Expert System Rules
from Clinical Databases Based on Rough Sets",
pages = "63",
author = "Shusaku Tsumoto and Hiroshi Tanaka",
crossref = "simoudis.ea:proceedings-second:96",
}
Bayesian Inference for Identifying Solar Active Regions, Michael Turmon and Saleem Mukhtar and Judit Pap
@InProceedings{turmon.ea:bayesian-inference:97,
title = "Bayesian Inference for Identifying Solar Active
Regions",
author = "Michael Turmon and Saleem Mukhtar and Judit Pap",
pages = "267",
crossref = "heckerman.ea:proceedings-third:97",
}
Deductive databases: achievements and future directions, Jeffrey D. Ullman and Carlo Zaniolo
@Article{ullman.ea:deductive-achievements:90,
author = "Jeffrey D. Ullman and Carlo Zaniolo",
title = "Deductive databases: achievements and future
directions",
journal = "SIGMOD Record (ACM Special Interest Group on
Management of Data)",
volume = "19",
number = "4",
pages = "75--82",
month = dec,
year = "1990",
ISSN = "0163-5808",
abstract = "The key concepts behind deductive databases and their
newly developed enabling technology are reserved. The
declarative programming approach used for such
databases is examined at length. Current research on
extending the functionality and usability of deductive
databases and on providing a synthesis of deductive
databases with procedural and object-oriented
approaches are described.",
affiliation = "Stanford Univ",
affiliationaddress = "Stanford, CA, USA",
classification = "723; C6160Z (Other DBMS); C6170 (Expert systems)",
keywords = "Database Systems; Reviews; Computer Programming;
Deductive Databases; Declarative Programming;
Objected-Oriented Programming; Procedural Programming,
Procedural databases; Declarative queries; Deductive
databases; Rule-based style; Knowledge mining;
Computer-aided design; Enabling technology;
Object-oriented approaches",
thesaurus = "Deductive databases",
}
Efficient Implementation of Data Cubes Via Materialized Views, Jeffrey D. Ullman
@InProceedings{ullman:efficient-implementation:96,
title = "Efficient Implementation of Data Cubes Via
Materialized Views",
pages = "386",
author = "Jeffrey D. Ullman",
crossref = "simoudis.ea:proceedings-second:96",
}
Papers on Rough Sets Theory and Information Systems, a team working in University of Warsaw and Warsaw Uniwersity of Technology.
@Misc{university-of-warsaw.ea:papers-on:,
URL = "ftp://ftp.ii.pw.edu.pl/pub/Reports",
title = "Papers on Rough Sets Theory and Information Systems",
author = "a team working in University of Warsaw and Warsaw
Uniwersity of Technology.",
annote = "The main field of our work is Rough Sets Theory and
Information Systems. Roughly speaking, our work is to
find dependances in experimental datasets to simulate
decision processes with high quality.",
}
RITIO - Rule Induction Two In One, David Urpani and Xindong Wu and Jim Sykes
@InProceedings{urpani.ea:ritio-rule:96,
title = "{RITIO} - Rule Induction Two In One",
pages = "339",
author = "David Urpani and Xindong Wu and Jim Sykes",
crossref = "simoudis.ea:proceedings-second:96",
}
Incremental Induction of Decision Trees, P. E. Utgoff
@Article{utgoff:incremental-induction:89,
author = "P. E. Utgoff",
title = "Incremental Induction of Decision Trees",
journal = "Machine Learning",
year = "1989",
volume = "4",
month = "161-186",
keywords = "ID3, ID5R",
}
Shift of Bias for Inductive Concept Learning, Paul E. Utgoff
@InCollection{utgoff:shift-bias:86,
author = "Paul E. Utgoff",
title = "Shift of Bias for Inductive Concept Learning",
crossref = "michalski.ea:machine-learning:86",
pages = "107--148",
}
A Theory of the Learnable, Leslie G. Valiant
@Article{valiant:theory-learnable:84,
author = "Leslie G. Valiant",
title = "A Theory of the Learnable",
journal = "Communications of the ACM",
volume = "27",
number = "11",
pages = "1134--1142",
month = nov,
year = "1984",
ISSN = "0001-0782",
abstract = "Humans appear to be able to learn new concepts without
needing to be programmed explicitly in any conventional
sense. In this paper we regard learning as the
phenomenon of knowledge acquisition in the absence of
specific programming. We give a precise methodology for
studying this phenomenon from a computational
viewpoint. It consists of choosing an appropriate
information gathering mechanism, the learning protocol,
and exploring the class of concepts that can be learned
using it in a reasonable (polynomial) number of steps.
Although inherent algorithmic complexity appears to set
serious limits to the range of concepts that can be
learned, we show that there are some important
nontrivial classes of propositional concepts that can
be learned in a realistic sense.",
comment = "Defines `learnability' wrt EXAMPLES and ORACLE using
arbitrary probability measure on event space. Shows
k-CNF learnable from examples only.",
}
Interpretation and knowledge discovery from the multilayer perceptron network - opening the black-box, M. L. Vaughn
@Article{vaughn:interpretation-multilayer:96,
author = "M. L. Vaughn",
address = "Cranfield Univ, Comp Informat Syst Management Grp,
Rmcs, Swindon Sn6 8La, Wilts, England",
title = "Interpretation and knowledge discovery from the
multilayer perceptron network - opening the black-box",
journal = "Neural Computing \& Applications",
year = "1996",
volume = "4",
issue = "2",
pages = "72--82",
abstract = "This paper interprets the outputs from the multilayer
perceptron (MLP) network by finding the input data
features at the input layer of the network which
activate the hidden layer feature detectors. This leads
directly to the deduction of the significant data
inputs, the inputs that the network actually uses to
perform the input/output mapping for a classification
task, and the discovery of the most significant of
these data inputs. The analysis presents a method for
providing explanations for the network outputs and for
representing the knowledge learned by the network in
the form of significant input data relationships.
During network development the explanation facilities
and data relationships can be used for network
validation and verification, and after development, for
rule induction and data mining where this method
provides a potential tool for knowledge discovery in
databases (KDD).",
keywords = "DATA MINING, EXPLANATION FACILITIES, INTERPRETATION,
KNOWLEDGE DISCOVERY, RULE INDUCTION, VALIDATION AND
VERIFICATION",
}
Knowledge discovery from databases: an introductory review, B. Vickery
@Article{vickery:introductory-review:97,
author = "B. Vickery",
address = "Univ Coll London, Mortimer St, London Wc1E 6Bt,
England",
title = "Knowledge discovery from databases: an introductory
review",
journal = "J. Of Documentation",
year = "1997",
volume = "53",
issue = "2",
pages = "107--122",
abstract = "The paper aims to provide a non-technical introduction
to the new procedures being used to extract knowledge
from databases. The reasons for developing knowledge
discovery methods are discussed - primarily, the
current production of very large databases that may
include many data relations not explicit in the
database structure. The background in machine learning
is indicated. The methods used are described for such
techniques as classification (sorting data into
predefined classes), clustering (developing ab initio a
data classification) and the detection of deviations
from pre- established norms. Examples of the
applications of these methods are given. The paper
concludes with some brief thoughts about the potential
use of knowledge discovery in the information field.",
keywords = "KDD, introduction, review",
}
Proceedings of the 22nd International Conference on Very Large Data Bases, T. M. Vijayaraman and A. Buchmann and C. Mohan and N. L. Sarda (Eds)
@Proceedings{vijayaraman.ea:proceedings-22nd:96,
editor = "T. M. Vijayaraman and A. Buchmann and C. Mohan and N.
L. Sarda",
title = "Proceedings of the 22nd International Conference on
Very Large Data Bases",
address = "San Francisco",
year = "1996",
ISBN = "1-55860-382-4",
descriptor = "Data Mining, Anfragenbearbeitung, Raeumlicher
Zugriffspfad, Datenbank, VLDB",
}
How feasible is automated discovery, Michael G. Walker
@Article{walker:how-feasible:87,
author = "Michael G. Walker",
title = "How feasible is automated discovery",
journal = "IEEE Expert",
year = "1987",
pages = "69--82",
volume = "Spring 1987",
annote = "Looks at Meta-dendral, RX (radix), Bacon, Prospector,
AM. Compares them on Domain Knowledge, Search method,
Search Representation, Data Driven vs Model driven
discovery, Hypothesis Testing, Signal to Noise Ratio",
}
Automated Discovery of Active Motifs in Multiple RNA Secondary Structures, Jason T. L. Wang and Bruce A. Shapiro and Dennis Shasha and Kaizhong Zhang and Chia-Yo Chang
@InProceedings{wang.ea:automated-active:96,
title = "Automated Discovery of Active Motifs in Multiple {RNA}
Secondary Structures",
pages = "70",
author = "Jason T. L. Wang and Bruce A. Shapiro and Dennis
Shasha and Kaizhong Zhang and Chia-Yo Chang",
crossref = "simoudis.ea:proceedings-second:96",
}
Automated Discovery of Active Motifs in Three Dimensional Molecules, Xiong Wang and Jason T. L. Wang and Dennis Shasha and Bruce Shapiro and Sitaram Dikshitulu and Isidore Rigoutsos and Kaizhong Zhang
@InProceedings{wang.ea:automated-active:97,
title = "Automated Discovery of Active Motifs in Three
Dimensional Molecules",
author = "Xiong Wang and Jason T. L. Wang and Dennis Shasha and
Bruce Shapiro and Sitaram Dikshitulu and Isidore
Rigoutsos and Kaizhong Zhang",
pages = "89",
crossref = "heckerman.ea:proceedings-third:97",
}
Combinatorial Pattern Discovery for Scientific Data: Some Preliminary Results, J. Tsong-Li Wang and Gung-Wei Chirn and T. G. Marr and B. Shapiro and D. Shasha and K. Zhang
@Article{wang.ea:combinatorial-pattern:94,
author = "J. Tsong-Li Wang and Gung-Wei Chirn and T. G. Marr and
B. Shapiro and D. Shasha and K. Zhang",
title = "Combinatorial Pattern Discovery for Scientific Data:
Some Preliminary Results",
journal = "SIGMOD Record (ACM Special Interest Group on
Management of Data)",
volume = "23",
number = "2",
pages = "115--125",
month = jun,
year = "1994",
ISSN = "0163-5808",
affiliation = "Dept. of Comput. and Inf. Sci., New Jersey Inst. of
Technol., Newark, NJ, USA",
classification = "C7330 (Biology and medicine); C1250 (Pattern
recognition); C1180 (Optimisation techniques); C1160
(Combinatorial mathematics)",
keywords = "Combinatorial pattern discovery; Scientific data;
Natural entities; Distance metric; Protein databases;
String edit distance; Common externally observable
properties; Structural description; Variable-length
don't cares; String matching algorithms; Discovery
heuristics; Optimization heuristics; Protein
classification; Data mining",
thesaurus = "Biology computing; Combinatorial mathematics; Natural
sciences computing; Optimisation; Pattern recognition;
Proteins",
xxcrossref = "Anonymous:1994:ASI",
}
Discovering Active Motifs in Sets of Related Protein Sequences and Using Them for Classification, J. T. L. Wang and T. G. Marr and D. Shasha and B. A. Shapiro and G.-W. Chirn
@Article{wang.ea:discovering-active:94,
author = "J. T. L. Wang and T. G. Marr and D. Shasha and B. A.
Shapiro and G.-W. Chirn",
journal = "Nucleic Acids Research",
title = "Discovering Active Motifs in Sets of Related Protein
Sequences and Using Them for Classification",
year = "1994",
abstract-url = "http://hertz.njit.edu/~jason/nar94.html",
URL = "http://hertz.njit.edu/~jason/nar94.html",
keywords = "Data mining, combinatorial pattern discovery,
proteins, biochemisty",
month = aug,
number = "14",
pages = "2769--2775",
volume = "22",
}
Representing Discovered Patterns Using Attributed Hypergraph, Yang Wang and Andrew K. C. Wong
@InProceedings{wang.ea:representing-discovered:96,
title = "Representing Discovered Patterns Using Attributed
Hypergraph",
pages = "283",
author = "Yang Wang and Andrew K. C. Wong",
crossref = "simoudis.ea:proceedings-second:96",
}
Schema Discovery for Semistructured Data, Ke Wang and Huiqing Liu
@InProceedings{wang.ea:schema-semistructured:97,
title = "Schema Discovery for Semistructured Data",
author = "Ke Wang and Huiqing Liu",
pages = "271",
crossref = "heckerman.ea:proceedings-third:97",
}
Selecting Features by Vertical Compactness of Data, Ke Wang and Suman Sundaresh
@InProceedings{wang.ea:selecting-features:97,
title = "Selecting Features by Vertical Compactness of Data",
author = "Ke Wang and Suman Sundaresh",
pages = "275",
crossref = "heckerman.ea:proceedings-third:97",
}
Optimized Rule Induction, Sholom M. Weiss and Nitin Indurkhya
@Article{weiss.ea:optimized-rule:93,
author = "Sholom M. Weiss and Nitin Indurkhya",
title = "Optimized Rule Induction",
journal = "IEEE Expert",
year = "1993",
pages = "61--69",
month = dec,
keywords = "Swap-1, decision rules, comparison.",
annote = "Discusses the Swap-1 algorithm for learning decision
rules. It is tested on 4 real world datasets - Nettalk,
Heart, DNA and Rheum. Comparison with published info on
NN, Linear Discriminents and desision trees applied to
same problems. Possible extension through using a GA?
33 references.",
}
Rule-based Machine Learning Methods for Functional Prediction, S. M. Weiss and N. Indurkhya
@Article{weiss.ea:rule-based-machine:95,
author = "S. M. Weiss and N. Indurkhya",
year = "1995",
title = "Rule-based Machine Learning Methods for Functional
Prediction",
journal = "JAIR",
pages = "383--403",
abstract = "We describe a machine learning method for predicting
the value of a real-valued function, given the values
of multiple input variables. The method induces
solutions from samples in the form of ordered
disjunctive normal form (DNF) decision rules. A central
objective of the method and representation is the
induction of compact, easily interpretable solutions.
This rule-based decision model can be extended to
search efficiently for similar cases prior to
approximating function values. Experimental results on
real-world data demonstrate that the new techniques are
competitive with existing machine learning and
statistical methods and can sometimes yield superior
regression performance",
URL = "http://www.cs.washington.edu/research/jair/table-of-contents-vol3.html",
}
Acquisition of Knowledge from Data, Gio C. M. Wiederhold and Michael G. Walker and Robert L. Blum and Stephen M. Downs
@InProceedings{wiederhold.ea:acquisition:86,
author = "Gio C. M. Wiederhold and Michael G. Walker and Robert
L. Blum and Stephen M. Downs",
title = "Acquisition of Knowledge from Data",
booktitle = "{ACM SIGART} International Symposium on Methodologies
for Intelligent Systems",
pages = "74--84",
address = "Knoxville, Tennessee",
year = "1986",
}
A Bi-directional ILP Algorithm, M. Wiese
@InProceedings{wiese:bi-directional-ilp:96,
author = "M. Wiese",
title = "A Bi-directional {ILP} Algorithm",
booktitle = "Proceedings of the MLnet Familiarization Workshop on
Data Mining with Inductive Logic Programing",
pages = "61--72",
year = "1996",
}
Detecting Early Indicator Cars in an Automotive Database: A Multi-Strategy Approach, Ruediger Wirth and Thomas P. Reinartz
@InProceedings{wirth.ea:detecting-early:96,
title = "Detecting Early Indicator Cars in an Automotive
Database: {A} Multi-Strategy Approach",
pages = "76",
author = "Ruediger Wirth and Thomas P. Reinartz",
crossref = "simoudis.ea:proceedings-second:96",
}
Computing as compression - an overview of the sp theory and system, J. G. Wolff
@Article{wolff:computing-as:95,
author = "J. G. Wolff",
address = "Univ Coll N Wales, Sch Electr Engn \& Comp Syst, Dean
St, Bangor Ll57 1Ut, Gwynedd, Wales",
title = "Computing as compression - an overview of the sp
theory and system",
journal = "New Generation Computing",
year = "1995",
volume = "13",
issue = "2",
pages = "187--214",
abstract = "This article is an overview of a programme of research
based on the conjecture that all kinds of computing and
formal reasoning may usefully be understood as
information compression by pattern matching,
unification and metrics- guided search. The research
aims to develop this idea into a theory of computing to
integrate and simplify diverse concepts in the field.
The research also aims to develop a 'new generation'
computing system, based on the theory, to integrate and
simplify diverse kinds of computing and to achieve more
flexibility and 'intelligence' than conventional
computers. Software simulations of the proposed new
system provide a concrete expression of the developing
theory and a test-bed for the ideas. The background to
the research is briefly reviewed including evidence
that information compression is a significant element
in biological information processing systems. Concepts
of information and redundancy are described as a basis
for describing how information compression may be
achieved by the comparison or matching of patterns, the
merging or unification of patterns which are the same,
together with metrics-guided search (e.g., 'hill
climbing', 'beam search') to maximise compression for a
given computational effort. The main elements of the SP
theory and of the proposed SP system are described with
a summary of developments to date. Some of the kinds of
computing which be interpreted as information
compression are briefly reviewed. These include: the
'low level' workings of conventional computers;
information retrieval, pattern recognition and
de-referencing of identifiers; unsupervised inductive
learning (grammatical inference, data mining, automatic
organisation of software and of knowledge bases); the
execution of mathematical or computing functions;
deductive and probabilistic inference; parsing and
natural language processing; planning and problem
solving. Areas of uncertainty where further work is
needed are indicated at appropriate points throughout
the article.",
keywords = "KOLMOGOROV, COMPLEXITY, MODEL, INFORMATION
COMPRESSION, THEORY OF COMPUTING, LEARNING, INFORMATION
RETRIEVAL, PATTERN RECOGNITION, DEDUCTION, ABDUCTION",
}
Extensibility in data mining systems, Stefan Wrobel and Dietrich Wettschereck and Edgar Sommer and Werner Emde
@InProceedings{wrobel.ea:extensibility-systems:96,
author = "Stefan Wrobel and Dietrich Wettschereck and Edgar
Sommer and Werner Emde",
title = "Extensibility in data mining systems",
booktitle = "Proc. 2nd International Conference On Knowledge
Discovery and Data Mining",
editor = "Evangelos Simoudis and Jia Wei Han and Usama Fayyad",
publisher = "AAAI Press",
address = "Menlo Park, CA, USA",
month = aug,
year = "1996",
pages = "214--219",
URL = "ftp://ftp.gmd.de/ml-archive/GMD/papers/ML75.ps.gz",
}
The ILP description learning problem: Towards a general model-level definition of data mining in ILP, Stefan Wrobel and Saso Dzeroski
@InProceedings{wrobel.ea:ilp-description:95,
author = "Stefan Wrobel and Saso Dzeroski",
title = "The {ILP} description learning problem: Towards a
general model-level definition of data mining in
{ILP}",
booktitle = "Proc. Fachgruppentreffen Maschinelles Lernen
(FGML-95)",
editor = "K. Morik and J. Herrmann",
publisher = "Univ. Dortmund",
address = "44221 Dortmund",
note = "Research Report 580",
URL = "ftp://ftp.gmd.de/ml-archive/GMD/papers/ML68.ps.gz",
year = "1995",
}
User Interactivity in Very Large Scale Data Mining,
@InProceedings{wrobel.ea:user-interactivity:96,
author = "Stefan Wrobel and Dietrich Wettschereck and A. Inkeri
Verkamo and Arno Siebes and Heikki Mannila and Fred
Kwakkel and Willi Kl{\"o}sgen",
title = "User Interactivity in Very Large Scale Data Mining",
booktitle = "Proc. FGML-96 (Annual Meeting of the GI Special
Interest Group Machine Learning)",
editor = "W. Dilger and M. Schlosser and J. Zeidler and A.
Ittner",
month = aug,
year = "1996",
pages = "125--130",
publisher = "TU Chemnitz-Zwickau",
address = "09111 Chemnitz",
note = "Computer Science Technical Report No. CSR-96-06.",
URL = "ftp://ftp.gmd.de/ml-archive/GMD/papers/ML74.ps.gz",
}
Extensibility in Data Mining Systems, Stefan Wrobel
@InProceedings{wrobel:extensibility-systems:96,
title = "Extensibility in Data Mining Systems",
pages = "214",
author = "Stefan Wrobel",
crossref = "simoudis.ea:proceedings-second:96",
}
A graphical user-interface for knowledge discovery in databases, X. Wu and N. Cercone
@Article{wu.ea:graphical-user-interface:96,
author = "X. Wu and N. Cercone",
address = "Hiroshima Univ, Fac Engn, Dept Elect Engn, Ai
Architecture Lab, 1-4-1 Kagamiyama, Higashihiroshima
739, Japan Univ Regina, Regina, Sk S4S 0A2, Canada",
title = "A graphical user-interface for knowledge discovery in
databases",
journal = "Engineering Applications Of Artificial Intelligence",
year = "1996",
volume = "9",
issue = "6",
pages = "691--700",
abstract = "This paper describes a graphical user-interface for
database-oriented knowledge discovery systems, DBLEARN,
which has been developed for extracting knowledge rules
from relational databases. The interface, designed
using a query- by-example approach, provides a
graphical means of specifying knowledge-discovery
tasks. The interface supplies a graphical browsing
facility to help users to perceive the nature of the
target database structure. In order to guide users'
task specification, a cooperative, menu-based guidance
facility has been integrated into the interface. The
interface also supplies a graphical interactive
adjusting facility for helping users to refine the task
specification to improve the quality of learned
knowledge rules. Copyright (C) 1996 Elsevier Science
Ltd",
keywords = "LANGUAGE, EXAMPLE, graphical user-interfaces,
knowledge discovery systems, database mining, database
query processing, AI applications, visualisation",
}
Integration of Heuristic and Bayesian Approaches in a Pattern-Classification System, Q. Wu and P. Suetens and A. Oosterlinck
@InCollection{wu.ea:integration-heuristic:91,
crossref = "piatetsky-shapiro.ea:knowledge-discovery:91",
editor = "Gregory Piatetsky-Shapiro and William J. Frawley",
booktitle = "Knowledge Discovery in Databases",
publisher = "AAAI Press / The MIT Press",
address = "Menlo Park, California",
edition = "1st",
year = "1991",
author = "Q. Wu and P. Suetens and A. Oosterlinck",
title = "Integration of Heuristic and Bayesian Approaches in a
Pattern-Classification System",
pages = "249--260",
}
Knowledge Discovery in Databases, B. Wuethrich
@TechReport{wuethrich:knowledge-discovery:,
author = "B. Wuethrich",
URL = "http://www.cs.ust.hk/faculty/beat/bio.html",
title = "Knowledge Discovery in Databases",
abstract = "[FROM DRAFT - anp] This is a draft of a manuscript of
a postgraduate course taught at the Hong Kong
University of Science and Technology in Spring 94. The
course gives an introduction into the young and
fascinating field of knowledge discovery in databases.
The manuscript is suited for beginners who can leave
out the more advanced sections, as well as people who
would like to do research in this area. This manuscript
is partly incomplete. Table of Contents [edited -
Andy]
1. Introduction 2. Rule Languages 3.
Uncertainty 4. Time 5. Learning Propostional Rules and
Decision Trees 6. Learning Datalog Rules 7. Learning
Probabilistic Knowledge
Dr. Beat Wuethrich The Hong
Kong University of Science and Technology CS Dept (room
3512) Clear Water Bay Kowloon, Hong Kong email:
beat(at)cs.ust.hk",
annote = "The link above has report divided into sections, the
Full report in one file is also available at
ftp://ftp.cs.ust.hk/pub/techreport/95/tr95-04.ps.gz",
}
Probabilistic knowledge bases, B. Wuthrich
@Article{wuthrich:probabilistic-bases:95,
author = "B. Wuthrich",
address = "Hong Kong Univ Sci \& Technol, Kowloon, Hong Kong",
title = "Probabilistic knowledge bases",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1995",
volume = "7",
issue = "5",
pages = "691--698",
abstract = "We define a new fixpoint semantics for rule-based
reasoning in the presence of weighted information. The
semantics is illustrated on a real-world application
requiring such reasoning. Optimizations and
approximations of the semantics are shown so as to make
the semantics amenable to very large scale real-world
applications, We finally prove that the semantics is
probabilistic and reduces to the usual fixpoint
semantics of stratified Datalog if all information is
certain. We implemented various knowledge discovery
systems which automatically generate such probabilistic
decision rules. In collaboration with a bank in Hong
Kong we use one such system to forecast currency
exchange rates.",
keywords = "LOGIC, AXIOMATIC PROBABILITY THEORY, DATA MINING,
INCOMPLETE INFORMATION, KNOWLEDGE DISCOVERY IN
DATABASES, QUERY OPTIMIZATION AND APPROXIMATION,
STRATIFIED DATALOG",
}
Knowledge Discovery in Integrated Call Centers: A Framework for Effective Customer-Driven Marketing, Paul Xia
@InProceedings{xia:integrated-call:97,
title = "Knowledge Discovery in Integrated Call Centers: {A}
Framework for Effective Customer-Driven Marketing",
author = "Paul Xia",
pages = "279",
crossref = "heckerman.ea:proceedings-third:97",
}
2nd international workshop on rough sets and knowledge discovery - banff, canada, 10-15 october 1993, R. Yasdi
@Article{yasdi:2nd-international:94,
author = "R. Yasdi",
address = "Hsch Bremerhaven, Karlstadt 8, D-27568 Bremerhaven,
Germany",
title = "2nd international workshop on rough sets and knowledge
discovery - banff, canada, 10-15 october 1993",
journal = "Ai Comm.",
year = "1994",
volume = "7",
issue = "2",
pages = "128--129",
}
Learning Classification Rules from Database in the Context of Knowledge Acquisition and Representation, Ramin Yasdi
@Article{yasdi:learning-classification:91,
author = "Ramin Yasdi",
title = "Learning Classification Rules from Database in the
Context of Knowledge Acquisition and Representation",
journal = "IEEE Transactions on Knowledge and Data Engineering",
year = "1991",
volume = "3",
number = "3",
pages = "293--306",
month = sep,
}
Computing Optimized Rectilinear Regions for Association Rules, Kunikazu Yoda and Takeshi Fukuda and Yasuhiko Morimoto and Shinichi Morishita and Takeshi Tokuyama
@InProceedings{yoda.ea:computing-optimized:97,
title = "Computing Optimized Rectilinear Regions for
Association Rules",
author = "Kunikazu Yoda and Takeshi Fukuda and Yasuhiko Morimoto
and Shinichi Morishita and Takeshi Tokuyama",
pages = "96",
crossref = "heckerman.ea:proceedings-third:97",
}
A Framework for Knowledge Discovery and Evolution in Databases (78K), Jong P. Yoon and Larry Kerschberg
@TechReport{yoon.ea:framework-evolution:,
URL = "ftp://isse.gmu.edu/pub/techrep/by_index/ISSE-TR-93-109.ps.Z",
title = "A Framework for Knowledge Discovery and Evolution in
Databases (78{K})",
author = "Jong P. Yoon and Larry Kerschberg",
note = "George Mason U, ISSE. July 03, 1994.",
}
A framework for knowledge discovery and evolution in databases, J. P. Yoon and L. Kerschberg
@Article{yoon.ea:framework-evolution:93,
author = "J. P. Yoon and L. Kerschberg",
address = "George Mason Univ, Sch Informat Technol \& Engn, Ctr
Artificial Intelligence, Fairfax, Va, 22030",
title = "A framework for knowledge discovery and evolution in
databases",
journal = "Ieee Trans. On Knowledge And Data Engineering",
year = "1993",
month = dec,
volume = "5",
issue = "6",
pages = "973--979",
abstract = "Although knowledge discovery is increasingly important
in databases, discovered knowledge is not always useful
to users. It is mainly because the discovered knowledge
does not fit the user's interests, or it may be
redundant or inconsistent with a priori knowledge.
Knowledge discovery in databases depends critically on
how well a database is characterized and how
consistently the existing and discovered knowledge is
evolved. This paper describes a novel concept for
knowledge discovery and evolution in databases. The key
issues of this work include: using a database query to
discover new rules; using not only positive examples
(answer to a query) but also negative examples to
discover new rules; harmonizing existing rules with the
new rules. The main contribution of this paper is the
development of a new tool for 1) characterizing the
exceptions in databases and 2) evolving knowledge as a
database evolves.",
keywords = "ACTIVE DATABASE EVOLUTION, DATABASE MINING, EXPERTISE
TRANSFER, KNOWLEDGE DISCOVERY, KNOWLEDGE REFINEMENT",
}
Evaluation of Sampling for Data Mining of Association Rules, Mohammed Javeed Zaki and Srinivasan Parthasarathy and Wei Li and Mitsunori Ogihara
@TechReport{zaki.ea:evaluation-sampling:96,
author = "Mohammed Javeed Zaki and Srinivasan Parthasarathy and
Wei Li and Mitsunori Ogihara",
title = "Evaluation of Sampling for Data Mining of Association
Rules",
institution = "University of Rochester, Computer Science Department",
number = "TR 617",
month = may,
year = "1996",
keywords = "data mining; association rules; random sampling;
Chernoff bounds",
URL = "ftp://ftp.cs.rochester.edu/pub/papers/systems/96.tr617.Sampling_for_data_mining_of_association_rules.ps.gz",
abstract = "Data mining is an emerging research area, whose goal
is to extract significant patterns or interesting rules
from large databases. High-level inference from large
volumes of routine business data can provide valuable
information to businesses, such as customer buying
patterns, shelving criterion in supermarkets, and stock
trends. However, many algorithms proposed for data
mining of association rules make repeated passes over
the database to determine the commonly occurring {\em
itemsets} (or set of items). For large databases, the
I/O overhead in scanning the database can be extremely
high. .pp In this paper we show that random sampling of
transactions in the database is an effective method for
finding association rules. Sampling can speed up the
mining process by more than an order of magnitude by
reducing I/O costs and drastically shrinking the number
of transactions to be considered. We may also be able
to make the sampled database resident in main-memory.
Furthermore, we show that sampling can accurately
represent the data patterns in the database with high
confidence. We experimentally evaluate the
effectiveness of sampling on three databases.",
}
New Algorithms for Fast Discovery of Association Rules, M. J. Zaki and S. Parthasarathy and M. Ogihara and W. Li
@InProceedings{zaki.ea:new-algorithms:97,
title = "New Algorithms for Fast Discovery of Association
Rules",
author = "M. J. Zaki and S. Parthasarathy and M. Ogihara and W.
Li",
pages = "283",
crossref = "heckerman.ea:proceedings-third:97",
}
Parallel Data Mining for Association Rules on Shared-memory Multi-processors, M. J. Zaki and M. Ogihara and S. Parthasarathy and W. Li
@InProceedings{zaki.ea:parallel-association:96a,
key_modifier = "a",
author = "M. J. Zaki and M. Ogihara and S. Parthasarathy and W.
Li",
title = "Parallel Data Mining for Association Rules on
Shared-memory Multi-processors",
booktitle = "CD-ROM Proceedings of Supercomputing'96",
publisher = "IEEE",
address = "Pittsburgh, PA",
month = nov,
year = "1996",
}
Parallel Data Mining for Association Rules on Shared-Memory Multiprocessors, Mohammed Javeed Zaki and Mitsunori Ogihara and Srinivasan Parthasarathy and Wei Li
@TechReport{zaki.ea:parallel-association:96b,
key_modifier = "b",
author = "Mohammed Javeed Zaki and Mitsunori Ogihara and
Srinivasan Parthasarathy and Wei Li",
title = "Parallel Data Mining for Association Rules on
Shared-Memory Multiprocessors",
institution = "University of Rochester, Computer Science Department",
number = "TR 618",
month = may,
pages = "22",
year = "1996",
keywords = "data mining; association rules; load balancing; hash
tree balancing; hashing; shared-memory multiprocessor",
URL = "ftp://ftp.cs.rochester.edu/pub/papers/systems/96.tr618.Parallel_data_mining_for_association_rules.ps.gz",
abstract = "Data mining is an emerging research area, whose goal
is to extract significant patterns or interesting rules
from large databases. High-level inference from large
volumes of routine business data can provide valuable
information to businesses, such as customer buying
patterns, shelving criterion in supermarkets, and stock
trends. Many algorithms have been proposed for data
mining of association rules. However, research so far
has mainly focused on sequential algorithms. .pp In
this paper we present parallel algorithms for data
mining of association rules, and study the degree of
parallelism, synchronization, and data locality issues
on the SGI Power Challenge shared-memory
multi-processor. We further present a set of
optimizations for the sequential and parallel
algorithms. Experiments show that a significant
improvement of performance is achieved using our
proposed optimizations. We also achieved good speed-up
for the parallel algorithm, but we observe a need for
parallel I/O techniques for further performance
gains.",
}
Fast and Intuitive Clustering of Web Documents, Oren Zamir and Oren Etzioni and Omid Madani and Richard M. Karp
@InProceedings{zamir.ea:fast-intuitive:97,
title = "Fast and Intuitive Clustering of Web Documents",
author = "Oren Zamir and Oren Etzioni and Omid Madani and
Richard M. Karp",
pages = "287",
crossref = "heckerman.ea:proceedings-third:97",
}
Discovering concept clusters by decomposing databases, N. Zhong and S. Ohsuga
@Article{zhong.ea:discovering-concept:94,
author = "N. Zhong and S. Ohsuga",
address = "Univ Tokyo, Adv Sci \& Technol Res Ctr, 4-6-1 Komaba,
Meguro Ku, Tokyo 153, Japan",
title = "Discovering concept clusters by decomposing
databases",
journal = "Data \& Knowledge Engineering",
year = "1994",
volume = "12",
issue = "2",
pages = "223--244",
abstract = "This paper introduces an approach of discovering
concept clusters by decomposing databases. This
approach is the fundamental one for developing DBI
which is one of sub- systems of the GLS discovery
system implemented by us. A key feature of this
approach is the formation of concept clusters or
sub-databases through analysis and deletion of noisy
data in decomposing a database. Its development is
based on the concept of Simon and Ando's near-complete
decomposability that has been most explicitly used in
economic theory. In this approach, the process of
discovering concept clusters from databases is a
process based on incipient hypothesis generation and
refinement, and many kinds of learning methods, in
which the methods of data-driven and knowledge- driven
are included, are cooperatively used in multiple
learning phases, so that a more robust, general
discovery system can be developed.",
keywords = "KNOWLEDGE DISCOVERY, KNOWLEDGE DISCOVERY IN DATABASES,
CONCEPTUAL CLUSTERING, NEAR- COMPLETE DECOMPOSABILITY,
MULTIPLE LEARNING PHASES, INTEGRATION",
}
A hierarchical model learning approach for refining and managing concept clusters discovered from databases, N. Zhong and S. Ohsuga
@Article{zhong.ea:hierarchical-model:96,
author = "N. Zhong and S. Ohsuga",
address = "Yamaguchi Univ, Fac Engn, Dept Comp Sci \& Syst Engn,
2557 Tokiwadai, Ube, Yamaguchi 755, Japan Waseda Univ,
Sch Sci \& Engn, Dept Informat \& Comp Sci, Shinjuku
Ku, Tokyo 169, Japan",
title = "A hierarchical model learning approach for refining
and managing concept clusters discovered from
databases",
journal = "Data \& Knowledge Engineering",
year = "1996",
volume = "20",
issue = "2",
pages = "227--252",
abstract = "The contents of most databases are ever-changing, and
erroneous data can be a significant problem in
real-world databases. Therefore, the process of
discovering knowledge from databases is a process based
on incipient hypothesis generation/evaluation and
refinement/management. Although many systems for
knowledge discovery in databases have been proposed,
most systems have not addressed the capabilities of
refining/managing the discovered knowledge. This paper
describes a hierarchical model learning approach for
refining/managing concept clusters discovered from
databases. This approach is the basic one for
developing HML (Hierarchical Model Learning), which is
one sub- system of our GLS (Global Learning Scheme)
discovery system and can be cooperatively used with
other sub-systems of GLS such as DBI (Decomposition
Based Induction). By means of HML, concept clusters
discovered from a database by DBI can be represented as
the Multi- Layer Logic formulae with hierarchical
models in a knowledge-base and can be easily
refined/managed according to data change in a database
and/or domain knowledge. HML is based on the model
representation of Multi- Layer Logic (MLL). Its key
feature is the quantitative evaluation for selecting
the best representation of the MLL formulae by using
cooperatively a criterion based on information theory
and domain knowledge. Experience with a prototype of
HML implemented by the knowledge-based system KAUS is
discussed.",
keywords = "KNOWLEDGE DISCOVERY, INDUCTION, SYSTEMS, RULES,
KNOWLEDGE DISCOVERY IN DATABASES, MULTILAYER LOGIC,
MACHINE LEARNING, INFORMATION THEORY, HIERARCHICAL
MODELING, REFINEMENT, MANAGEMENT",
}
KDD Process Planning, Ning Zhong and Chunnian Liu and Yoshitsugu Kakemoto and Setsuo Ohsuga
@InProceedings{zhong.ea:kdd-process:97,
title = "{KDD} Process Planning",
author = "Ning Zhong and Chunnian Liu and Yoshitsugu Kakemoto
and Setsuo Ohsuga",
pages = "291",
crossref = "heckerman.ea:proceedings-third:97",
}
System for managing and refining structural characteristics discovered from databases, N. Zhong and S. Ohsuga
@Article{zhong.ea:system-managing:96,
author = "N. Zhong and S. Ohsuga",
address = "Univ Tokyo, Adv Sci \& Technol Res Ctr, Meguro Ku,
4-6-1 Komaba, Tokyo 153, Japan Waseda Univ, Dept
Informat \& Comp Sci, Shinjuku Ku, Tokyo 169, Japan",
title = "System for managing and refining structural
characteristics discovered from databases",
journal = "Knowledge-Based Systems",
year = "1996",
volume = "9",
issue = "4",
pages = "267--279",
abstract = "Systems that allow automatic knowledge discovery from
databases will play an increasingly important role in
building/sharing large scale knowledge bases. Although
many systems for knowledge discovery in databases have
been proposed, few of them have addressed the
capabilities of managing and refining the discovered
knowledge. In particular, the contents of most
databases are ever changing and erroneous data can be a
significant problem in real-world databases. Hence, the
process of discovering knowledge from databases is a
process based on incipient hypothesis
generation/evaluation and refinement/management. The
paper describes a system named IIBR (Inheritance
Inference Based Refinement) for managing and refining
structural characteristics discovered from databases.
Structural characteristics are a kind of important
regularity hidden in databases, and are denoted by
regression models for describing three kinds of
functional relations: the exact, strong and weak ones.
IIBR is one subsystem of the authors' GLS (Global
Learning Scheme) discovery system, and can be
cooperatively used with other subsystems of GLS such as
KOSI (Knowledge Oriented Statistic Inference). By means
of IIBR, the structural characteristics discovered by
KOSI can be added to a knowledge base as the deductive
rules and the sets of data for showing their errors,
and can be easily managed and refined according to data
change in a database. IIBR is based on inheritance
inference and error analysis, as well as the model
representation of knowledge, multiple worlds/levels,
and metareasoning in the knowledge-based system KAUS.
Experience with a prototype of IIBR implemented by KAUS
is discussed.",
keywords = "KNOWLEDGE DISCOVERY, KNOWLEDGE DISCOVERY IN DATABASES,
INHERITANCE INFERENCE, ERROR ANALYSIS, DATA CHANGE,
KNOWLEDGE REPRESENTATION",
}
A method for computing all maximally general rules in attribute-value systems, W. Ziarko and N. Shan
@Article{ziarko.ea:method-computing:96,
author = "W. Ziarko and N. Shan",
address = "Univ Regina, Dept Comp Sci, Regina, Sk S4S 0A2,
Canada",
title = "A method for computing all maximally general rules in
attribute-value systems",
journal = "Computational Intelligence",
year = "1996",
volume = "12",
issue = "2",
pages = "223--234",
abstract = "A method for finding all deterministic and maximally
general rules for a target classification is explained
in detail and illustrated with examples: Maximally
general rules are rules with minimal numbers of
conditions. The method has been developed within the
context of the rough sets model and is based on the
concepts of a decision matrix and a decision function.
The problem of finding ail the rules is reduced to the
problem of computing prime implicants of a group of
associated Boolean expressions. The method is
particularly applicable to identifying all potentially
interesting deterministic rules in a knowledge
discovery system but can also be used to produce
possible rules or nondeterministic rules with decision
probabilities, by adapting the method to the
definitions of the variable precision rough sets
model.",
keywords = "KNOWLEDGE DISCOVERY, MACHINE LEARNING, ROUGH SETS,
RULES",
}
Introduction to the special issue on rough sets and knowledge discovery, W. Ziarko
@Article{ziarko:introduction-to:95,
author = "W. Ziarko",
address = "Univ Regina, Dept Comp Sci, Regina, Sk S4S 0A2,
Canada",
title = "Introduction to the special issue on rough sets and
knowledge discovery",
journal = "Computational Intelligence",
year = "1995",
volume = "11",
issue = "2",
pages = "223--226",
}
Some privacy issues in knowledge discovery - oecd personal privacy guidelines - response, W. Ziarko
@Article{ziarko:some-privacy:95,
author = "W. Ziarko",
address = "Univ Regina, Dept Comp Sci, Regina, Sk S4S 0A2,
Canada",
title = "Some privacy issues in knowledge discovery - oecd
personal privacy guidelines - response",
journal = "Ieee Expert-Intelligent Systems \& Their
Applications",
year = "1995",
volume = "10",
issue = "2",
pages = "59--59",
keywords = "ethics, ethical",
}
Optimal Multiple Intervals Discretization of Continuous Attributes for Supervised Learning, D. A. Zighed and R. Rakotomalala and F. Feschet
@InProceedings{zighed.ea:optimal-multiple:97,
title = "Optimal Multiple Intervals Discretization of
Continuous Attributes for Supervised Learning",
author = "D. A. Zighed and R. Rakotomalala and F. Feschet",
pages = "295",
crossref = "heckerman.ea:proceedings-third:97",
}
A Dataset Decomposition Approach to Data Mining and Machine Discovery, Blaz Zupan and Marko Bohanec and Ivan Bratko and Bojan Cestnik
@InProceedings{zupan.ea:dataset-decomposition:97,
title = "A Dataset Decomposition Approach to Data Mining and
Machine Discovery",
author = "Blaz Zupan and Marko Bohanec and Ivan Bratko and Bojan
Cestnik",
pages = "299",
crossref = "heckerman.ea:proceedings-third:97",
}
Automated Pattern Mining with a Scale Dimension, Jan M. Zytkow and Robert Zembowicz
@InProceedings{zytkow.ea:automated-pattern:96,
title = "Automated Pattern Mining with a Scale Dimension",
pages = "158",
author = "Jan M. Zytkow and Robert Zembowicz",
crossref = "simoudis.ea:proceedings-second:96",
}
Interactive mining for regularities in Databases, Jan M. Zytkow and John Baker
@InCollection{zytkow.ea:interactive-regularities:91,
author = "Jan M. Zytkow and John Baker",
title = "Interactive mining for regularities in Databases",
booktitle = "Knowledge Discovery in Databases",
editor = "Gregory Piatetsky-Shapiro and William J. Frawley",
publisher = "{AAAI Press}",
year = "1991",
address = "Menlo Park, California",
pages = "31--53",
}
Mining patterns at each scale in massive data, J. Zytkow and R. Zembowicz
@Article{zytkow.ea:patterns-at:96,
author = "J. Zytkow and R. Zembowicz",
title = "Mining patterns at each scale in massive data",
journal = "Lecture Notes in Computer Science",
volume = "1079",
pages = "139--??",
year = "1996",
ISSN = "0302-9743",
}
Combining many searches in the FAHRENHEIT discovery system, Jan M. Zytkow
@InProceedings{zytkow:combining-many:87,
author = "Jan M. Zytkow",
title = "Combining many searches in the {FAHRENHEIT} discovery
system",
booktitle = "Proceedings of the fourth international workshop on
machine learning",
year = "1987",
address = "San Mateo, California",
publisher = "Morgan Kaufmann",
pages = "281--287",
}
Knowledge = Concepts: A Harmful Equation, Jan M. Zytkow
@InProceedings{zytkow:concepts-harmful:97,
title = "Knowledge = Concepts: {A} Harmful Equation",
author = "Jan M. Zytkow",
pages = "104",
crossref = "heckerman.ea:proceedings-third:97",
}