general.bib
@ARTICLE{avizienis04c,
AUTHOR = {A. Avizienis and J.-C. Laprie and B. Randell and
C. Landwehr},
TITLE = {Basic Concepts and Taxonomy of Dependable and Secure
Computing},
JOURNAL = {IEEE Trans.\ on Dependable and Secure Computing},
YEAR = 2004,
VOLUME = 1,
NUMBER = 1,
PAGES = {11--33},
MONTH = JAN,
WHERE = {FaultTolerance/avizienis-tdsc04.pdf},
ANNOTE = {See~\cite{laprie04} for a French version.}
}
@INPROCEEDINGS{baleani03,
AUTHOR = {M. Baleani and A. Ferrari and L. Mangeruca and
M. Peri and S. Pezzini and
A. Sangiovanni-Vincentelli},
TITLE = {Fault-Tolerant Platforms for Automotive
Safety-Critical Applications},
BOOKTITLE = {International Conference on Compilers, Architectures
and Synthesis for Embedded Systems, CASES'03},
YEAR = 2003,
PUBLISHER = {ACM},
ADDRESS = {San Jose (CA), USA},
MONTH = NOV,
WHERE = {FaultTolerance/baleani-cases03.pdf},
ANNOTE = {The authors show that fail-silent SoCs can be
realized with limited area overhead and virtually no
performance penalty. Five architectures are
compared: (1) a single CPU with no error correcting
coding techniques (ECC); (2) a lock-step dual
processor architecture with ECC for bus and memory
access; (3) a shared memory loosely synchronized
dual processor architecture with ECC for memory
access; (4) a triple modular redundant architecture
with ECC for bus and memory access; (5) a shared
memory dual lock-step architecture (i.e. with 4
processors) with with ECC for bus and memory
access. For low range X-by-wire automotive systems,
architectures (2) or (3) are the best solutions. For
high range systems, architecture (5) is the best
solution.}
}
@ARTICLE{bouraqadi01,
AUTHOR = {N. Bouraqadi-Sa\^{a}dani and T. Ledoux},
TITLE = {Le Point sur la Programmation par Aspects},
JOURNAL = {Technique et Science Informatique},
KEY = {aop},
YEAR = 2001,
VOLUME = 20,
NUMBER = 4,
PAGES = {505--528},
WHERE = {Aspects/bouraqadi-tsi01.pdf}
}
@ARTICLE{cristian93,
AUTHOR = {F. Cristian},
TITLE = {Understanding Fault-Tolerant Distributed Systems},
JOURNAL = {Communication of the ACM},
YEAR = 1993,
VOLUME = 34,
NUMBER = 2,
MONTH = FEB,
PAGES = {56--78},
WHERE = {FaultTolerance/cristian-cacm93.pdf},
ANNOTE = {The paper proposes basic concepts which are used to
explain hardware and software architecture for
fault-tolerant distributed systems. For example, the
concepts of server, depends on relation, failure,
failure semantics. The general issues in hardware
and software architectures are presented. For
software fault-tolerance, the issues related to
synchronisation (close ou loose) are discussed. The
paper was revised in 1993, but the references are
before 1990. Some examples of industrial
faul-tolerant architectures are given.}
}
@ARTICLE{gartner99,
AUTHOR = {F. G{\"a}rtner},
TITLE = {Fundamentals of Fault-Tolerant Distributed Computing
in Asynchronous Environments},
JOURNAL = {ACM Computing Surveys},
YEAR = 1999,
VOLUME = 31,
NUMBER = 1,
PAGES = {1--26},
MONTH = MAR,
WHERE = {FaultTolerance/gartner-csur.pdf},
ANNOTE = {AN EXCELLENT ARTICLE. The author defines four basic
forms of fault tolerance, depending on whether the
program satisfies both its safety (S) and liveness
(L) properties in the presence of faults (from a
fault class F): masking (S&L), fail safe (S),
non-masking (L), and none. Then he shows that a
system cannot be fault tolerant without some form of
redundancy. A consequence of this point of view is
that fault tolerance requires fault detection
(safety property), while fault masking requires
fault correction (liveness property). The author
also surveys different models of computation: the
synchronous model where there are real-time bounds
on message transmission and process response times,
and the asynchronous model where there are no such
bounds. The asynchronous model is more realistic but
leads to many impossibility results regarding fault
detection~\cite{fisher85,chandra96}.}
}
@INPROCEEDINGS{grandpierre99,
AUTHOR = {T. Grandpierre and C. Lavarenne and Y. Sorel},
TITLE = {Optimized Rapid Prototyping for Real-Time Embedded
Heterogeneous Multiprocessors},
BOOKTITLE = {7th International Workshop on Hardware/Software
Co-Design, CODES'99},
PUBLISHER = {ACM},
ADDRESS = {Rome, Italy},
YEAR = 1999,
MONTH = MAY,
ANNOTE = {Heterogeneous extension, and parallel/diffusing
routing extension, of the adequation heuristic
described in~\cite{syndex-aaa} and
in~\cite{syndex-en}. Some previous errors are
fixed.}
}
@BOOK{jalote94,
AUTHOR = {P. Jalote},
TITLE = {Fault-Tolerance in Distributed Systems},
PUBLISHER = {Prentice-Hall},
YEAR = 1994,
ADDRESS = {Englewood Cliffs, New Jersey},
ANNOTE = {Seems to be the more recent and complete book (in
English) on fault-tolerance. It is based on concepts
given in~\cite{laprie92} but contains more technical
details.}
}
@INPROCEEDINGS{kiczales97,
AUTHOR = {G. Kiczales and J. Lamping and A. Mendhekar and
C. Maeda and C. Videira Lopes and J.-M. Loingtier
and J. Irwin},
TITLE = {Aspect-Oriented Programming},
BOOKTITLE = {European Conference on Object-Oriented Programming,
ECOOP'97},
KEY = {aop},
PAGES = {220--242},
YEAR = 1997,
VOLUME = 1241,
SERIES = {LNCS},
ADDRESS = {Jyv\"askyl\"a, Finland},
MONTH = JUN,
PUBLISHER = {Springer-Verlag},
WHERE = {Aspects/kiczales-ecoop97.pdf}
}
@TECHREPORT{laprie04,
AUTHOR = {J.-C. Laprie},
TITLE = {S{\^u}ret{\'e} de fonctionnement informatique~:
concepts de base et terminologie},
INSTITUTION = {LAAS-CNRS},
YEAR = 2004,
TYPE = {{\Techreport}},
ADDRESS = {Toulouse, France},
WHERE = {Rapports/Postscript/FaultTolerance/laprie-rr04.pdf},
ANNOTE = {See~\cite{avizienis04c} for an English version.}
}
@INPROCEEDINGS{powell92,
AUTHOR = {D. Powell},
TITLE = {Failure Mode Assumption and Assumption Coverage},
BOOKTITLE = {International Symposium on Fault-Tolerant Computing,
FTCS-22},
YEAR = 1992,
PUBLISHER = {IEEE},
ADDRESS = {Boston (MA), USA},
MONTH = JUL,
PAGES = {386--395},
NOTE = {Research report LAAS 91462},
WHERE = {FaultTolerance/powell-ftcs92.pdf},
WHERE = {FaultTolerance/powell-laas91462.pdf},
ANNOTE = {This article introduces two new concepts: failure
mode assumption and assumption coverage. The service
delivered by a system is defined as a sequence of
service items $(s_i)_{i\in\NN}$, each defined as a
pair $(vs_i,ts_i)$, where $vs_i$ and $ts_i$ are
respectively the value and the time
of~$s_i$. Moreover, $SV_i$ and $ST_i$ are
respectively the sets of possible values and times
for~$s_i$. Then, the classes of incorrect services
are defined: arbitrary value errors, non-code value
errors, arbitrary timing errors, early timing
errors, late timing errors, infinitely late timing
errors (i.e., omission errors), and impromptu
errors. Then, failure mode assumptions are defined
as assertions: for instance, $\forall i, (ts_i \in
ST_i) \vee (ts_i = +\infty)$ stand for omission
errors. Logical implications between these
assertions allow the author to derive partial order
relationships between the failure mode
assumptions. This partial order $\ra$ implies that
if a system's fault-tolerance mechanisms will
correctly process errors according to assertion~$Y$,
then the same mechanisms will be able to process
errors according to assertion~$X$ if $X \ra Y$.
Then, the failure mode assumption coverage is
defined as the probability $p_X$ that the assertion
$X$ defining the assumed behavior of a component
proves to be true in practice, conditioned on the
fact that the component has failed: $p_X =
Pr\{X=true | \text{component failed}\}$. In
particular, the coverage of arbitrary timing/value
errors is equal to~1, while the coverage of a failed
component causing no error at all is~0. Similarly,
the coverage of the mechanisms of a fault-tolerant
system designed to process errors according to
assumption~$X$ is $Pr\{\text{correct error
processing} | X=true\}$. Hence, the overall coverage
is the product $Pr\{\text{correct error processing}
| X=true\} \times Pr\{X=true | \text{component
failed}\}$.
Finally, two case studies are presented: a
life-critical application without maintenance (a
fly-by-wire civil flight control, for which the
reliability should be greater than
$1-10^{-9}=0.999999999$ over 10 hours), and a
money-critical application with maintenance. In both
cases, several failure modes are considered for the
components, and the dependability of the overall
system is analyzed thanks to the failure mode
assumption coverage.}
}
@ARTICLE{ramadge87,
AUTHOR = {P.J. Ramadge and W.M. Wonham},
TITLE = {Supervisory Control of a Class of Discrete Event
Processes},
KEY = {dcs},
JOURNAL = {SIAM J.\ Control Optimization},
YEAR = 1987,
VOLUME = 25,
NUMBER = 1,
MONTH = JAN,
PAGES = {206--230},
WHERE = {ControllerSynthesis/ramadge-sjco87.pdf}
}
@ARTICLE{rushby94,
AUTHOR = {J. Rushby},
TITLE = {Critical System Properties: Survey and Taxonomy},
JOURNAL = {Reliability Engineering and Systems Safety},
YEAR = 1994,
VOLUME = 43,
NUMBER = 2,
PAGES = {189--219},
NOTE = {Research report CSL-93-01},
ANNOTE = {It is a survey of the methods employed in four
approaches used for the treatment of critical
systems: dependability, safety, security, and
real-time. The application of formal methods to
these domains is also studied and a taxonomy of
these approaches from the point of view of theirs
properties is given. Well written, with a lot of
interesting references.}
}
This file has been generated by
bibtex2html 1.69