AUTHOR = {A. Avizienis and J.-C. Laprie and B. Randell and
                  C. Landwehr},
  TITLE = {Basic Concepts and Taxonomy of Dependable and Secure
  JOURNAL = {IEEE Trans.\ on Dependable and Secure Computing},
  YEAR = 2004,
  VOLUME = 1,
  NUMBER = 1,
  PAGES = {11--33},
  WHERE = {FaultTolerance/avizienis-tdsc04.pdf},
  ANNOTE = {See~\cite{laprie04} for a French version.}
  AUTHOR = {M. Baleani and A. Ferrari and L. Mangeruca and
                  M. Peri and S. Pezzini and
                  A. Sangiovanni-Vincentelli},
  TITLE = {Fault-Tolerant Platforms for Automotive
                  Safety-Critical Applications},
  BOOKTITLE = {International Conference on Compilers, Architectures
                  and Synthesis for Embedded Systems, CASES'03},
  YEAR = 2003,
  ADDRESS = {San Jose (CA), USA},
  WHERE = {FaultTolerance/baleani-cases03.pdf},
  ANNOTE = {The authors show that fail-silent SoCs can be
                  realized with limited area overhead and virtually no
                  performance penalty. Five architectures are
                  compared: (1) a single CPU with no error correcting
                  coding techniques (ECC); (2) a lock-step dual
                  processor architecture with ECC for bus and memory
                  access; (3) a shared memory loosely synchronized
                  dual processor architecture with ECC for memory
                  access; (4) a triple modular redundant architecture
                  with ECC for bus and memory access; (5) a shared
                  memory dual lock-step architecture (i.e. with 4
                  processors) with with ECC for bus and memory
                  access. For low range X-by-wire automotive systems,
                  architectures (2) or (3) are the best solutions. For
                  high range systems, architecture (5) is the best
  AUTHOR = {N. Bouraqadi-Sa\^{a}dani and T. Ledoux},
  TITLE = {Le Point sur la Programmation par Aspects},
  JOURNAL = {Technique et Science Informatique},
  KEY = {aop},
  YEAR = 2001,
  VOLUME = 20,
  NUMBER = 4,
  PAGES = {505--528},
  WHERE = {Aspects/bouraqadi-tsi01.pdf}
  AUTHOR = {F. Cristian},
  TITLE = {Understanding Fault-Tolerant Distributed Systems},
  JOURNAL = {Communication of the ACM},
  YEAR = 1993,
  VOLUME = 34,
  NUMBER = 2,
  PAGES = {56--78},
  WHERE = {FaultTolerance/cristian-cacm93.pdf},
  ANNOTE = {The paper proposes basic concepts which are used to
                  explain hardware and software architecture for
                  fault-tolerant distributed systems. For example, the
                  concepts of server, depends on relation, failure,
                  failure semantics. The general issues in hardware
                  and software architectures are presented. For
                  software fault-tolerance, the issues related to
                  synchronisation (close ou loose) are discussed. The
                  paper was revised in 1993, but the references are
                  before 1990. Some examples of industrial
                  faul-tolerant architectures are given.}
  AUTHOR = {F. G{\"a}rtner},
  TITLE = {Fundamentals of Fault-Tolerant Distributed Computing
                  in Asynchronous Environments},
  JOURNAL = {ACM Computing Surveys},
  YEAR = 1999,
  VOLUME = 31,
  NUMBER = 1,
  PAGES = {1--26},
  WHERE = {FaultTolerance/gartner-csur.pdf},
  ANNOTE = {AN EXCELLENT ARTICLE. The author defines four basic
                  forms of fault tolerance, depending on whether the
                  program satisfies both its safety (S) and liveness
                  (L) properties in the presence of faults (from a
                  fault class F): masking (S&L), fail safe (S),
                  non-masking (L), and none. Then he shows that a
                  system cannot be fault tolerant without some form of
                  redundancy. A consequence of this point of view is
                  that fault tolerance requires fault detection
                  (safety property), while fault masking requires
                  fault correction (liveness property). The author
                  also surveys different models of computation: the
                  synchronous model where there are real-time bounds
                  on message transmission and process response times,
                  and the asynchronous model where there are no such
                  bounds. The asynchronous model is more realistic but
                  leads to many impossibility results regarding fault
  AUTHOR = {T. Grandpierre and C. Lavarenne and Y. Sorel},
  TITLE = {Optimized Rapid Prototyping for Real-Time Embedded
                  Heterogeneous Multiprocessors},
  BOOKTITLE = {7th International Workshop on Hardware/Software
                  Co-Design, CODES'99},
  ADDRESS = {Rome, Italy},
  YEAR = 1999,
  ANNOTE = {Heterogeneous extension, and parallel/diffusing
                  routing extension, of the adequation heuristic
                  described in~\cite{syndex-aaa} and
                  in~\cite{syndex-en}. Some previous errors are
  AUTHOR = {P. Jalote},
  TITLE = {Fault-Tolerance in Distributed Systems},
  PUBLISHER = {Prentice-Hall},
  YEAR = 1994,
  ADDRESS = {Englewood Cliffs, New Jersey},
  ANNOTE = {Seems to be the more recent and complete book (in
                  English) on fault-tolerance. It is based on concepts
                  given in~\cite{laprie92} but contains more technical
  AUTHOR = {G. Kiczales and J. Lamping and A. Mendhekar and
                  C. Maeda and C. Videira Lopes and J.-M. Loingtier
                  and J. Irwin},
  TITLE = {Aspect-Oriented Programming},
  BOOKTITLE = {European Conference on Object-Oriented Programming,
  KEY = {aop},
  PAGES = {220--242},
  YEAR = 1997,
  VOLUME = 1241,
  ADDRESS = {Jyv\"askyl\"a, Finland},
  PUBLISHER = {Springer-Verlag},
  WHERE = {Aspects/kiczales-ecoop97.pdf}
  AUTHOR = {J.-C. Laprie},
  TITLE = {S{\^u}ret{\'e} de fonctionnement informatique~:
                  concepts de base et terminologie},
  YEAR = 2004,
  TYPE = {{\Techreport}},
  ADDRESS = {Toulouse, France},
  WHERE = {Rapports/Postscript/FaultTolerance/laprie-rr04.pdf},
  ANNOTE = {See~\cite{avizienis04c} for an English version.}
  AUTHOR = {D. Powell},
  TITLE = {Failure Mode Assumption and Assumption Coverage},
  BOOKTITLE = {International Symposium on Fault-Tolerant Computing,
  YEAR = 1992,
  ADDRESS = {Boston (MA), USA},
  PAGES = {386--395},
  NOTE = {Research report LAAS 91462},
  WHERE = {FaultTolerance/powell-ftcs92.pdf},
  WHERE = {FaultTolerance/powell-laas91462.pdf},
  ANNOTE = {This article introduces two new concepts: failure
                  mode assumption and assumption coverage. The service
                  delivered by a system is defined as a sequence of
                  service items $(s_i)_{i\in\NN}$, each defined as a
                  pair $(vs_i,ts_i)$, where $vs_i$ and $ts_i$ are
                  respectively the value and the time
                  of~$s_i$. Moreover, $SV_i$ and $ST_i$ are
                  respectively the sets of possible values and times
                  for~$s_i$. Then, the classes of incorrect services
                  are defined: arbitrary value errors, non-code value
                  errors, arbitrary timing errors, early timing
                  errors, late timing errors, infinitely late timing
                  errors (i.e., omission errors), and impromptu
                  errors. Then, failure mode assumptions are defined
                  as assertions: for instance, $\forall i, (ts_i \in
                  ST_i) \vee (ts_i = +\infty)$ stand for omission
                  errors. Logical implications between these
                  assertions allow the author to derive partial order
                  relationships between the failure mode
                  assumptions. This partial order $\ra$ implies that
                  if a system's fault-tolerance mechanisms will
                  correctly process errors according to assertion~$Y$,
                  then the same mechanisms will be able to process
                  errors according to assertion~$X$ if $X \ra Y$.

                  Then, the failure mode assumption coverage is
                  defined as the probability $p_X$ that the assertion
                  $X$ defining the assumed behavior of a component
                  proves to be true in practice, conditioned on the
                  fact that the component has failed: $p_X =
                  Pr\{X=true | \text{component failed}\}$. In
                  particular, the coverage of arbitrary timing/value
                  errors is equal to~1, while the coverage of a failed
                  component causing no error at all is~0. Similarly,
                  the coverage of the mechanisms of a fault-tolerant
                  system designed to process errors according to
                  assumption~$X$ is $Pr\{\text{correct error
                  processing} | X=true\}$. Hence, the overall coverage
                  is the product $Pr\{\text{correct error processing}
                  | X=true\} \times Pr\{X=true | \text{component

                  Finally, two case studies are presented: a
                  life-critical application without maintenance (a
                  fly-by-wire civil flight control, for which the
                  reliability should be greater than
                  $1-10^{-9}=0.999999999$ over 10 hours), and a
                  money-critical application with maintenance. In both
                  cases, several failure modes are considered for the
                  components, and the dependability of the overall
                  system is analyzed thanks to the failure mode
                  assumption coverage.}
  AUTHOR = {P.J. Ramadge and W.M. Wonham},
  TITLE = {Supervisory Control of a Class of Discrete Event
  KEY = {dcs},
  JOURNAL = {SIAM J.\ Control Optimization},
  YEAR = 1987,
  VOLUME = 25,
  NUMBER = 1,
  PAGES = {206--230},
  WHERE = {ControllerSynthesis/ramadge-sjco87.pdf}
  AUTHOR = {J. Rushby},
  TITLE = {Critical System Properties: Survey and Taxonomy},
  JOURNAL = {Reliability Engineering and Systems Safety},
  YEAR = 1994,
  VOLUME = 43,
  NUMBER = 2,
  PAGES = {189--219},
  NOTE = {Research report CSL-93-01},
  ANNOTE = {It is a survey of the methods employed in four
                  approaches used for the treatment of critical
                  systems: dependability, safety, security, and
                  real-time. The application of formal methods to
                  these domains is also studied and a taxonomy of
                  these approaches from the point of view of theirs
                  properties is given. Well written, with a lot of
                  interesting references.}

This file has been generated by bibtex2html 1.69