% LaTeX source for Further Exercises on Bayesian Statistics
\documentclass[oneside]{book}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{amsbsy}
\usepackage{makeidx}
\usepackage{epsf}

\setcounter{secnumdepth}{1}

\setcounter{tocdepth}{1}

% Set up environment for exercises at ends of chapters
\newcounter{qno}
\newcommand{\startqs}{\setcounter{qno}{0}\vspace{-1.5\baselineskip}}
\newcommand{\nextq}
    {\vspace{1.5\baselineskip}\noindent\addtocounter{qno}{1}\arabic{qno}.\quad}

% Allow for blank lines
\newcommand{\blankline}{\vspace{\baselineskip}\noindent}

% Define digitwidth and dotwidth (TeXbook p. 241)
\newdimen\digitwidth
\setbox0=\hbox{\rm0}
\digitwidth=\wd0
\newdimen\dotwidth
\setbox0=\hbox{\rm.}
\dotwidth=\wd0

% Notation for vectors, matrices, estimates, random variables and sample means
\newcommand{\vect}{\boldsymbol}
\newcommand{\matr}{\boldsymbol}
\newcommand{\est}{\widehat}
\newcommand{\random}{\widetilde}
\newcommand{\mean}{\overline}

% Notation for dots in subscripts
\newcommand {\bdot}{\hbox{\Huge .}}
\newcommand {\dotdot}{{\hbox{\Huge .}\kern-0.1667em\hbox{\Huge .}}}
\newcommand {\onedot}{1\kern-0.1667em\bdot}
\newcommand {\twodot}{2\kern-0.1667em\bdot}
\newcommand {\idot}{i\kern-0.1667em\bdot}
\newcommand {\jdot}{j\kern-0.1667em\bdot}
\newcommand {\mdot}{m\kern-0.1667em\bdot}
\newcommand {\dotj}{\kern-0.1667em\bdot\kern-0.1667em j}

% Define sech, arc sin and arc cos
\newcommand{\sech}{\operatorname{sech}}
\renewcommand{\arcsin}{\operatorname{arc\,sin}}
\renewcommand{\arccos}{\operatorname{arc\,cos}}

% Define Probability, Expectation, Variance, Covariance, Median, Mode
\renewcommand{\Pr}{\mbox{$\mathsf P$}}
\newcommand{\E}{\mbox{$\mathsf E$}}
\newcommand{\Var}{\mbox{$\mathcal V$}}
\newcommand{\Cov}{\mbox{$\mathcal C$}}
\newcommand{\median}{\mbox{median\,}}
\newcommand{\mode}{\mbox{mode\,}}

% Define notation for evidence
\newcommand{\Ev}{\mbox{Ev}}

% Define small common fractions for use in display formulae
\newcommand{\half}{\mbox{$\frac{1}{2}$}}
\newcommand{\smallhalf}{\mbox{\small$\frac{1}{2}$}}
\newcommand{\quarter}{\mbox{$\frac{1}{4}$}}
\newcommand{\threequarters}{\mbox{$\frac{3}{4}$}}
\newcommand{\third}{\mbox{$\frac{1}{3}$}}
\newcommand{\twothirds}{\mbox{$\frac{2}{3}$}}
\newcommand{\ninth}{\mbox{$\frac{1}{9}$}}

% Alternative notation for fractions (TeXbook, exercise 11.6)
\newcommand{\slopefrac}[2]{\leavevmode\kern.1em
\raise .5ex\hbox{\the\scriptfont0 #1}\kern-.1em
/\kern-.15em\lower .25ex\hbox{\the\scriptfont0 #2}}

% Notation for beta function
\newcommand{\Betafn}{\mbox{B}}

% Define names of distributions
\newcommand{\N}{\mbox{N}}              % A.1
\newcommand{\G}{\mbox{G}}              % A.4
\newcommand{\Ex}{\mbox{E}}             % A.4
\renewcommand{\t}{\mbox{t}}            % A.8
\newcommand{\Be}{\mbox{Be}}            % A.10
\newcommand{\B}{\mbox{B}}              % A.11
\renewcommand{\P}{\mbox{P}}            % A.12
\newcommand{\NB}{\mbox{NB}}            % A.13
\renewcommand{\H}{\mbox{H}}            % A.14
\newcommand{\U}{\mbox{U}}              % A.15
\newcommand{\UD}{\mbox{UD}}            % A.15
\newcommand{\Pa}{\mbox{Pa}}            % A.16
\newcommand{\Pabb}{\mbox{Pabb}}        % A.16
\newcommand{\M}{\mbox{M}}              % A.17
\newcommand{\BF}{\mbox{BF}}            % A.18
\newcommand{\F}{\mbox{F}}              % A.19
\newcommand{\z}{\mbox{z}}              % A.20
\newcommand{\C}{\mbox{C}}              % A.21

% Define some common bold symbols
\newcommand{\bbeta}{\mbox{$\boldsymbol\beta$}}
\newcommand{\beeta}{\mbox{$\boldsymbol\eta$}}
\newcommand{\btheta}{\mbox{$\boldsymbol\theta$}}
\newcommand{\bkappa}{\mbox{$\boldsymbol\kappa$}}
\newcommand{\blambda}{\mbox{$\boldsymbol\lambda$}}
\newcommand{\bmu}{\mbox{$\boldsymbol\mu$}}
\newcommand{\btau}{\mbox{$\boldsymbol\tau$}}
\newcommand{\bzero}{\mbox{$\boldsymbol0$}}

% Further bold symbols for use in connection with hierarchical models
\newcommand {\bpiem}{\mbox{\boldmath $\pi^{EM}$}}
\newcommand {\bhtheta}{\mbox{\boldmath $\est\theta$}}
\newcommand {\bhthetao}{\mbox{\boldmath $\est\theta^{\mbox{\scriptsize\it0}}$}}
\newcommand {\bhthetajs}{\mbox{\boldmath $\est\theta^{JS}$}}
\newcommand {\bhthetajsplus}{\mbox{\boldmath $\est\theta^{JS^{{}_+}}$}}
\newcommand {\bhthetaem}{\mbox{\boldmath $\est\theta^{EM}$}}
\newcommand {\bhthetab}{\mbox{\boldmath $\est\theta^{B}$}}
\newcommand {\bhthetaeb}{\mbox{\boldmath $\est\theta^{EB}$}}
\newcommand {\thetabar}{\mbox{$\mean\theta$}}
\newcommand {\bphi}{\mbox{\boldmath $\phi$}}
\newcommand {\BPhi}{\mbox{\boldmath $\Phi$}}
\newcommand {\bpsi}{\mbox{\boldmath $\psi$}}
\newcommand {\BPsi}{\mbox{\boldmath $\Psi$}}
\newcommand {\BSigma}{\mbox{\boldmath $\Sigma$}}

% Define transpose for matrix theory
\newcommand{\transpose}{\mbox{${}^{\text{T}}$}}

% Define differentials with roman d and thin space before
\renewcommand{\d}{\mbox{d}}
\newcommand{\dF}{\,\mbox{\d$F$}}
\newcommand{\dt}{\,\mbox{\d$t$}}
\newcommand{\du}{\,\mbox{\d$u$}}
\newcommand{\dU}{\,\mbox{\d$U$}}
\newcommand{\dx}{\,\mbox{\d$x$}}
\newcommand{\dy}{\,\mbox{\d$y$}}
\newcommand{\dz}{\,\mbox{\d$z$}}
\newcommand{\dgamma}{\,\mbox{\d$\gamma$}}
\newcommand{\dzeta}{\,\mbox{\d$\zeta$}}
\newcommand{\deta}{\,\mbox{d$\eta$}}
\newcommand{\dtheta}{\,\mbox{\d$\theta$}}
\newcommand{\dbtheta}{\,\mbox{\d$\boldsymbol\theta$}}
\newcommand{\dkappa}{\,\mbox{\d$\kappa$}}
\newcommand{\dlambda}{\,\mbox{\d$\lambda$}}
\newcommand{\dLambda}{\,\mbox{\d$\Lambda$}}
\newcommand{\dmu}{\,\mbox{\d$\mu$}}
\newcommand{\dbmu}{\,\mbox{\d$\bmu$}}
\newcommand{\drho}{\,\mbox{\d$\rho$}}
\newcommand{\dpi}{\,\mbox{\d$\pi$}}
\newcommand{\dxi}{\,\mbox{\d$\xi$}}
\newcommand{\dphi}{\,\mbox{\d$\phi$}}
\newcommand{\dpsi}{\,\mbox{\d$\psi$}}
\newcommand{\domega}{\,\mbox{\d$\omega$}}

% Hyp for hypothesis
\newcommand{\Hyp}{\mbox{H}}

% Blackboard bold Z for the integers
\newcommand{\Z}{\mbox{$\mathbb Z$}}

% Script X for a set of possible observations
\newcommand{\X}{\mbox{$\mathcal X$}}

% EM, GEM, E-step and M-step for the EM algorithm
\newcommand{\EM}{\mbox{\textit{EM}\ }}
\newcommand{\GEM}{\mbox{\textit{GEM}\ }}
\newcommand{\Estep}{\mbox{\textit{E}-step\ }}
\newcommand{\Mstep}{\mbox{\textit{M}-step\ }}

% Omit the word Chapter at the start of chapters
\renewcommand{\chaptername}{}

\begin{document}

\pagestyle{empty}

\appendix

\setcounter{chapter}{6}

\medskip

\section{Further Exercises on Chapter \arabic{section}}

\startqs

\nextq Suppose a population consists of 100 individuals, of whom 10 per cent 
have high blood pressure.  What is the probability that at most two out
of eight individuals chosen at random from this population have high
blood pressure?

\nextq Consider a collection of families in which the sexes of
successive children are independent, both sexes being equally likely.
Show that in families of exactly three children, the event that the
family has children of both sexes is independent of the event that there
is at most one girl, but that this independence does not hold among
families of of exactly two children or among families of exactly four
children. 

\nextq It is known that 5\% of all men and 0.25\% of all women are
colour-blind.  A person chosen by chance suffers from colour-blindness.
What is the probability that this is a man?  (It is assumed that there
is an equal number of men and women).

\nextq At a factory where bolts are produced, machines $A$, $B$, $C$
produce respectively 25\%, 35\% and 40\% of all bolts.  Of the output of
these machines, defects constitute 5\%, 4\% and 2\% respectively.  A
bolt selected at random from the production turned out to be defective.
What is the probability that it was produced by machine $A$?  machine
$B$?  machine $C$?

\nextq Suppose a red die and a blue die are tossed.  Let $x$ be the sum
of the number showing on the red die and three times the number showing
on the blue die.  Find the density function and the distribution
function of $x$,

\nextq Show that the probability density function
\[ p(x)=\binom{n+x-1}{x}\pi^n(1-\pi)^x \]
of the negative binomial distribution $\NB(n,\pi)$ of index n and
parameter $\pi$ (as defined in Section A.13, page 284 of the book) tends
to that of the Poisson distribution $\P(\lambda)$ of mean $\lambda$ as
$n\to\infty$ while $\pi\to1$ in such a way that $n(1-\pi)=\lambda$ where
$\lambda$ is constant.

\nextq Suppose that $i$ and $j$ have independent binomial distributions
with the same parameter $\pi$ but different parameters $m$ and $n$, so
that $i\sim\B(m,\pi)$ and $j\sim\B(n,\pi)$, and let $k=i+j$.
\begin{description}
  \item[\quad(a)] Show that $k$ has a binomial distribution which also
    has parameter $\pi$, but has index of $m+n$.
  \item[\quad(b)] Find the distribution of $i$ conditional on the value
    of $k$.
\end{description}

\nextq Find expressions for (a) the density $u=x^3$ and (b) the density 
of $v=x^4$ in terms of that of $x$, in both the continuous and discrete 
cases and find the resulting densities in the case where $x$ has a 
standard normal density.  What is the most important difference between 
the two examples?

\nextq Suppose that $x_1$, $x_2$ and $x_3$ are independent and all have
the same continuous distribution with density $p(x)$ and distribution
function $F(x)$.  Find the distribution function of
\[ x_{\text{med}} = \median\{x_1, x_2, x_3\} \]
in terms of $F(x)$, and thus find an expression for the density function
of $x_{\text{med}}$.

\nextq Given $p(x,y)=xy\exp[-\half(x^2+y^2)]$ where $x>0$ and $y>0$ and 
$p(x,y)=0$ otherwise, calculate (a) $\Pr(x<1)$, (b) $\Pr(x<1, y<1)$.

\nextq Show that 
$\E\,g(\random x)h(\random y)=(\E g(\random x))(\E h(\random y))$ for
arbitrary functions $g$ and $h$ if and only if $\random x$ and $\random y$ 
are independent.

\nextq Suppose that the random variable $k$ has a logarithmic series
distribution with parameter $\theta$ (where $0<\theta<1$), so that
\[ p(k) = \frac{\alpha\theta^k}{k}\qquad(k=1,2,\dots) \]
where $\alpha=-[\log(1-\theta)]^{-1}$ and thus the probabilities are the
terms in the series expansion of $-\alpha\log(1-\theta)$.  Find the mean
and variance of $k$.

\nextq A random variable $x$ is said to have a Weibull distribution if
there are values of the parameters $c$ ($>0$) and $\alpha$ ($>0$) such  
that
\[ p(x) = c\alpha^{-1}\{x/\alpha\}^{c-1}\exp[-\{x/\alpha\}^c] \]
for $x>0$ and otherwise $p(x)=0$.  Find a function $y$ of $x$ such
that $y$ has an exponential distribution (see the last subsection of
Section 2.5 or Section Section A.4 in the book) and hence find the mean 
and variance of $x$.

\nextq The \textit{skewness} of a random variable $x$ is defined as 
$\gamma_1=\mu_3/(\mu_2)^{3/2}$ where 
\[ \mu_n=\E(x-\E x)^n \]
(but note that some authors work in terms of $\beta_1=\gamma_1^2$).
Find the skewness of a random variable $X$ with a Poisson distribution
$\P(\mu)$ of parameter $\mu$.

\nextq Show that for any random variable $X$ and any constants $t>0$ and $c>0$
\[ \E(X+c)^2\geqslant (t+c)^2\Pr(X>t) \]
and deduce from the Parallel Axes Theorem or otherwise that if $X$ has
mean 0 and variance $\sigma^2$ then
\[ \Pr(X> t)\leqslant \frac{\sigma^2+c^2}{(t+c)^2}. \]
By minimizing the right-hand side as a function of $c$ show that
\[ \Pr(X>t) \leqslant \frac{\sigma^2}{\sigma^2+t^2}. \]

\nextq Suppose that the point $(x, y)$ is uniformly distributed over the
interior of the circle $x^2+y^2=1$.  Show that $x$ and $y$ are
uncorrelated but that they are \textit{not} independent.

\nextq Let $\vect x=(x_1,x_2,x_3)$ be a continuous random vector having
joint density 
\[ p(x_1,x_2,x_3)=\left\{\begin{array}{cl}
                   6\exp(-x_1-x_2-x_3) & \qquad (0 < x_1 < x_2 < x_3) \\
                   0                   & \qquad \text{otherwise}
                   \end{array}\right. \]
Find (a) the marginal density function of $x_2$, (b) the conditional
density of $(x_1,x_3)$ given $x_2$, (c) the marginal (joint) density of
$(x_1,x_2)$, and (d) the conditional density of $x_3$ given $(x_1,x_2)$,
and (e) the conditional density of $x_1$ given $x_2$.

\nextq Suppose that $\random\lambda$ has a (negative) exponential 
distribution with parameter 1 (see Section A.4 in the book), so that its 
mean and variance are both equal to unity.  Suppose further that 
conditional on $\random\lambda=\lambda$ the value of $k$ has a Poisson 
distribution of mean $\lambda$, so that 
$\E_{\random k|\random\lambda}\left(\random k|\random\lambda\right)
  =\random\lambda$
and
$\Var_{\random k|\random\lambda}\left(\random k|\random\lambda\right)
  =\random\lambda$.
\begin{description}
  \item[\quad(a)] Show by integrating the joint density
    $p(k,\lambda)=p(\lambda)p(k|\lambda)$ over $\lambda$ that the
    unconditional distribution of $\random k$ is geometric with $\pi=\half$
    (see Section A.13 in the book), so that $p(k)=1/2^{k+1}$.
  \item[\quad(b)] Verify that the formula
  \[   \Var\,\random k = 
       \E_{\random\lambda}\Var_{\random k|\random\lambda}
          \left(\random k|\random\lambda\right)
        +\Var_{\random\lambda}
          \E_{\random k|\random\lambda}\left(\random k|\random\lambda\right)
  \]
    derived in Section 1.5 holds in this case.
\end{description}

\newpage

\section{Further Exercises on Chapter \arabic{section}}

\startqs

\nextq Suppose that $k\sim\P(\lambda)$.  Find the standardized
likelihood as a function of $\lambda$ for given $k$.  Which of the
distributions listed in Appendix A does this represent?

\nextq Suppose we are given the following 11 observations from a normal 
distribution:
\[ 148,\ 154,\ 158,\ 160,\ 161,\ 162,\ 166,\ 170,\ 182,\ 195,\ 236 \]
and we are told that the standard deviation $\sqrt{\phi}=25$.  Find a
95\% HDR for the posterior distribution of the mean assuming the usual
reference prior.

\nextq With the same data as in the previous question, what is the
predictive distribution for a possible future observation $x$?

\nextq Show that if a random sample of size $n=\phi^2$ is taken from an
$\N(\theta,\phi)$ distribution where $\theta$ has a prior distribution
which also has a variance of $\phi$, then the posterior distribution of
$\theta$ cannot also have variance $\phi$.

\nextq Your prior beliefs about a quantity $\theta$ are such that
\[       p(\theta) = \left\{\begin{array}{ll} 1 & (\theta \geqslant 0)    \\
                                              0 & (\theta <  0).   
                          \end{array}\right.                              \]
A random sample of size 16 is taken from an $\N(\theta, 1)$ distribution 
and the mean of the observations is observed to be 0.45.  Find a 90\% 
HDR for $\theta$.

\nextq Suppose that you have prior beliefs about an unknown quantity
$\theta$ which can be approximated by an $\N(\lambda, \phi)$ distribution, 
while my beliefs can be approximated by an $\N(\mu,\phi)$ distribution
(so that the variances are the same although the means are different).  
Suppose further that the reasons that have led us to these conclusions do 
not overlap with one another.  What distribution should represent our beliefs 
about $\theta$ when we take into account all the information available to both 
of us?

\nextq Suppose that in the situation described in Section 2.4
$\lambda_{\alpha}=1.96$ and the prior density is constant within
$I_{\alpha}$ and that prior to taking the sample no value of $\theta$
outside $I_{\alpha}$ is more than twice as probable as any value of
$\theta$ inside it.  How far can the true posterior density differ from 
the normal density within $I_{\alpha}$?  What difference does it make if 
$\lambda_{\alpha}$ is increased to 3.29?

\nextq Which of the following can be expressed in data-translated form:
\begin{description}
  \item[\quad(a)] Any likelihood of the form
\[ l(\sigma|\vect y)\propto h\left(\frac{s(\vect y)}{\sigma}\right); \]
  \item[\quad(b)] The likelihood
\[ l(\xi_0|\vect x)\propto\prod p(x_i\,|\,\xi_0,\alpha,c) \]
where $p(x\,|\,\xi_0,\alpha,c)$ is the Weibull density
\[ p(x\,|\,\xi_0,\alpha,c)=c\alpha^{-1}\{(x-\xi_0)/\alpha\}^{c-1}
                     \exp[-\{(x-\xi_0)/\alpha\}^c]\qquad(\xi_0 < x); \]
  \item[\quad(b)] The likelihood
\[ l(\alpha|\vect x)\propto\prod p(x_i\,|\,0,\alpha,c) \]
where $p(x\,|\,\xi_0,\alpha,c)$ is the Weibull density as above with 
$\xi_0 = 0$.
\end{description}

\nextq Suppose you are interested in investigating how variable is the
performance of schoolchildren on a new English test, and that you begin
by trying this test out on children in 15 similar schools.  It turns out
that the average standard deviation is about 20 marks.  You then want to
try the test on a sixteenth school, which is fairly similar to those you
have already investigated, and you reckon that the data on the other
schools gives you a prior for the variance in this new school which has
a mean of 120 and is worth nine direct observations on the school.  What
is the posterior distribution for the variance if you observe a sample
of size 50 from the school of which the standard deviation is 28.9?
Give an interval in which the variance lies with 95\% posterior
probability. 

\nextq The following are malt-extract values on malts made from Kindred
barley grown at various locations in the Mississippi Valley Barley Nurseries
during 1948 in percent of dry basis:
{\small
  \[ 77.7,\ 76.0,\ 76.9,\ 74.6,\ 74.7,\ 76.5,\ 74.2,\ 75.4,\ 76.0,\ 76.0,\
     73.9,\ 77.4,\ 76.6,\ 77.3. \]
}
\!\!Give 99\% HDRs for the mean and variance of the population from which
they come.

\nextq Show that if $x_j\sim\Be(\alpha,\beta)$ (cf.\ Section A.10 in the
book) for $i=1$, 2, \dots, $n$, then $\left(\prod x_j,\,\prod(1-x_j)\right)$ 
is sufficient for $(\alpha,\beta)$ given $\vect x$.  Show further that
if $\alpha$ is known, then $\prod(1-x_j)$ is sufficient for $\beta$ and that 
if $\beta$ is known then $\prod x_j$ is sufficient for $\alpha$.

\nextq Suppose that $x_1$, \dots, $x_n$ are independent, identically
distributed random variables with discrete probability density
\[ p(x\,|\,\pi,\theta)=(1-\pi)\pi^{x-\theta},\qquad
   (x=\theta,\theta+1,\theta+2,\dots), \]
where $\theta$ and $\pi$ are unknown parameters and $0<\pi<1$.  Show
that the vector $\left(\min x_j,\,\sum x_j\right)$ is sufficient for
$(\theta,\pi)$.  Show further that if $\pi$ is known, then $\min x_j$ is
sufficient for $\theta$, and that if $\theta$ is known, then $\sum x_j$
is sufficient for $\pi$.

\nextq Suppose that $x$ has a Poisson distribution of mean
$1/\lambda^2$, so that the likelihood takes the form
\[ l(\lambda|x)=\lambda^{-2x}\exp(-1/\lambda^2). \]
Find a family of conjugate priors for $\lambda$.

\nextq Show that if $y$ and $z$ are independent standard normal variates then
\[ \frac{y+z}{y-z}\sim\C(0,1). \]
(\textit{Hint:} Note that a $\C(0,1)$ distribution is the same thing as
a $\t$ distribution on 1 degree of freedom; see Section A.21 in the book.)

\nextq Suppose that $x\sim\G(\alpha,\beta)$ has a two-parameter gamma
distribution (see Section A.4 in the book) and that $y-x$ has the same
distribution (this situation arises when $x$ is the lifetime of some
component which has to be replaced and $y$ is the time when it is
replaced for a second time).  Show that the vector $(x,y)$ has a distribution
in the two-parameter exponential family.

\nextq Suppose that the results of a certain test are known, on the
basis of general theory, to be normally distributed about the same mean
$\mu$ with the same variance $\phi$, neither of which is known.  Suppose
further that your prior beliefs about $(\mu,\phi)$ can be represented by
a normal/chi-squared distribution with 
\[ \nu_0=5,\qquad S_0=250,\qquad n_0=2,\qquad \theta_0=115. \]
Now suppose that 50 observations are obtained from the population with
mean 77 and sample variance $s^2=20$.  Find the posterior distribution
of $(\mu,\phi)$.  Compare 50\% prior and posterior HDRs for $\mu$.

\nextq Suppose that your prior for $\theta$ is a $\threequarters:\quarter$
mixture of $\N(0,1)$ and $\N(0.5,1)$ and that a single observation
$x\sim\N(\theta,1)$ turns out to equal 1.5.  What is your posterior
probability that $\theta > 1$?

\nextq An experimental station has had experience with growing wheat 
which leads it to believe that the yield per plot is more or less normally 
distributed with mean 200 and standard deviation 15.  The station then 
wished to investigate the effect of a growth hormone on the yield per plot.  
In the absence of any other information, the prior distribution for the 
variance on the plots might be taken to have mean 200 and standard 
deviation 90.  As for the mean, it is expected to be about 230, and 
this information is thought to be worth about 20 observations.  Twelve plots 
treated with the hormone gave the following yields:
\begin{center}
  222,\quad 234,\quad 156,\quad 287,\quad 190,\quad 255,\quad 307,\quad 101,
      \quad 133,\quad 251,\quad 177,\quad 225,
\end{center}
Find the posterior distributions of the mean and variance.

\newpage

\section{Further Exercises on Chapter \arabic{section}}

\startqs

\nextq Give the form of the beta-binomial predictive distribution for
the number of successes in the next $m$ trials after you have observed
$n$ trials in which there were $x$ successes assuming that you used the
standard Haldane prior $\Be(0,0)$.

\nextq Find a suitable interval of 95\% posterior probability to quote
in a case where your posterior distribution for an unknown parameter
$\pi$ is $\Be(18, 13)$ and compare this with interval with similar
intervals for the cases of $\Be(18.5, 13.5)$ and $\Be(19, 14)$
posteriors.  Comment on the relevance of the results to the choice of a
reference prior for the binomial distribution.

\nextq Suppose that your prior beliefs about the probability $\pi$ of success
in Bernoulli trials have mean 1/4 and variance 1/48.  Give a 90\% HDR
for $\pi$ given that you have observed six successes in 10 trials.

\nextq Suppose that you have a prior distribution for the probability
$\pi$ of success in a certain kind of gambling game which has mean 0.7,
and that you regard your prior information as equivalent to 16 trials.
You then play the game 30 times and win 18 times.  What is your
posterior distribution for $\pi$?

\nextq Suppose that you are interested in the proportion of females in a
certain organisation and that as a first step in your investigation
you intend to find out the sex of the first 13 members on the
membership list.  Before doing so, you have prior beliefs which you
regard as equivalent to 30\% of this data, and your prior beliefs
suggest that a 50\% of the membership is female.
     
Suggest a suitable prior distribution and find its standard deviation.
     
Suppose that 4 of the first 13 members turn out to be female; find
your posterior distribution and give a 50\% posterior HDR for this 
distribution.
     
Find the mean, median and mode of the posterior distribution.
      
Would it surprise you to learn that in fact 149 of the total number
of 551 members are female?

\nextq Suppose that the distribution of a random variable $x$ is such that 
the standard deviation is always equal to the mean $\mu$.  Show that $\log x$
has a variance which is approximately constant.  What does this suggest
as a reference prior for $\mu$?

\nextq The following data (quoted by Smith, 1969, Section 20.12 from an 
article by L~F~Richardson) give the observed numbers of outbreaks of war
per year for the 432 years from 1500 to 1931 \textsc{a.d}.
\begin{center}
\begin{tabular}{lrrrrrl}
   Number of wars:   &0      &1      &2    &3     &4      &5 and more \\
   Number of years:    &223    &142    &48   &15    &4      &0.
\end{tabular}
\end{center}
Give an interval in which the mean number $\lambda$ of outbreaks of
war per year lies with 95\% probability.

\nextq Recalculate your answer to the preceding question assuming that
you had a prior distribution for $\lambda$ of mean 0.5 and standard 
deviation 0.16.

\nextq We say that $x$ has a Weibull distribution with parameters $\mu$,
$\sigma$ and $c$ it it has density
\[ \left[\frac{c}{\sigma}\right]\left[\frac{x-\mu}{\sigma}\right]^{c-1}
   \exp\left[-\left[\frac{x-\mu}{\sigma}\right]^c\,\right]\qquad
   (\mu \leqslant x < \infty) \]
Suppose that $\mu=0$ and that $c$ is known.  What prior for $\sigma$ is
suggested by Jeffreys' rule?

\nextq Suppose that the probability density function of an observation
$x$ depends on two parameters $\theta$ and $\phi$ in such a way that it
can be expressed as a product of a function of $\theta$ and $x$ and a
function of $\phi$ and $x$, and you find the prior given by the 
two-dimensional version of Jeffreys' rule.  Show that this prior 
gives the pair $(\theta,\phi)$ the distribution which results
from giving $\theta$ the Jeffreys' prior obtained by regarding $\phi$ as
constant, and $\phi$ the Jeffreys' prior obtained by regarding $\theta$ as
constant, independently of one another.

\nextq A generalized Pareto distribution due to M~Ljubo is defined by 
Johnson \textit{et al.}\ (1994--1995) by  the distribution function
\[ F(x) = 1-\left(\frac{\xi+\alpha}{x+\alpha}\right)^{\gamma}
          \text{e}^{-\beta(x-\xi)}\qquad(x > \xi)               \]
(and $F(x)=0$ for $x\leqslant\xi$).  Consider the case where $\beta=0$
and $\xi$ and $\gamma$ are known.  Use Jeffreys' rule to find a suitable
reference prior for $\alpha$.

\nextq Consider a uniform distribution on the interval $(\alpha,\beta)$,
where the values of $\alpha$ and $\beta$ are unknown, and suppose that
the joint distribution of $\alpha$ and $\beta$ is a bilateral bivariate
Pareto distribution with $\gamma=2$.  How large a random sample must be
taken from the uniform distribution in order that the coefficient of
variation (i.e.\ the standard deviation divided by the mean) of the
length $\beta-\alpha$ of the interval should be reduced to 0.01 or less? 

\nextq Suppose that observations $x_1$, $x_2$, $\dots$, $x_n$ are
available from a density
\[ p(x|\theta)\propto\exp(-\theta/x)\qquad (0 < x < \theta). \]
Explain how you would make inferences about the parameter $\theta$ 
using a conjugate prior.
     
\nextq Give an approximation to the median of the posterior distribution
for the number of tramcars given than you have observed $n$ tramcars
numbered $x_1$, $x_2$, \dots, $x_n$.

\nextq Test the assertion of Benford's Law by taking a table of physical
constants (e.g.\ the one in Abramowitz and Stegun, 1964 or 1965, Table
2.3) or tables in other standard reference works and seeing how well it
fits. 

\nextq We sometimes investigate distributions on the vertices of a
regular solid, for example a cube or a dodecahedron.  Find a Haar prior
for a distribution on the vertices of such a solid.

\nextq Suppose that the prior distribution $p(\mu,c)$ for the parameters
$\mu$ and $c$ of a distribution with a density of the form
\[ p(x\,|\,\mu,c) = \text{(const)}\times c^{-1}
                    \exp\left(-\sum|x-\mu|^3/c^3\right) \]
is uniform in $\mu$ and $c$, and that the four observations
\[ x_1=1,\qquad x_2=1,\qquad x_3=2,\qquad x_4=3, \]
are available from this distribution.  Calculated the value of the
posterior density $p(\mu,c)$ (ignoring the constant) to one decimal
place for $\mu=1$, 1.5, 2, 2.5, 3 and $c=1.2$, 1.4, 1.6, 1.8, 2.0.

Use Simpson's rule
\[ \int_a^b f(t)\dt = \frac{(b-a)}{3n}
                       \{f(t_0)+4f(t_1)+2f(t_2)+4f(t_3)+\dots+f(t_n)\} \]
to approximate the posterior density for $\mu$.

Go on to find an approximation to the posterior probability that
\[ 1.75 < \mu < 2.25. \]

\nextq Fisher (1925b, Chapter 9, Example 47) quotes a genetic example
which results in individuals of four types with probabilities
$(2+\theta)/4$, $(1-\theta)/4$, $(1-\theta)/4$ and $\theta/4$
respectively, where $\theta$ is an unknown parameter between 0 and 1.
Observations on $n$ individuals which were independently of these
four types showed that there were $a$, $b$, $c$, and $d$ respectively.  
Find the maximum likelihood estimator and the Fisher information.

Show that if $a=1997$, $b=906$, $c=904$ and $d=32$ (so that $n=3839$),
then the value of the maximum likelihood estimator is 0.0357.

Show that the method of scoring starting from a simple inefficient
estimator proposed by Fisher, namely
\[ t=\{a+d-(b+c)\}/n \]
gets to within 0.001 of the maximum likelihood estimator in two
iterations. 

\newpage

\section{Further Exercises on Chapter \arabic{section}}

\startqs

\nextq Show that if the prior probability $\pi_0$ of a hypothesis is close 
to zero, then the posterior probability $p_0$ satisfies $p_0\cong1-\pi_0B^{-1}$
and more exactly $p_0\cong1-\pi_0B^{-1}-\pi_0\left(B^{-1}-B^{-2}\right)$.

\nextq Consider the data in Section 2.2 on the age of Ennerdale granophyre.
Using the prior based on the K/Ar method and the posterior derived in the 
book, find the prior and posterior odds and the Bayes ratio for the hypothesis
that the age is greater than 400 million years.

\nextq A manufacturing process is adjusted so that the mean of a certain
dimension of the manufactured part is 20 cm.  A sample of ten parts is
checked as to this dimension, with the following results
\[ 19.66,\ 20.04,\ 19.96,\ 19.92,\ 20.04,\
   19.66,\ 19.88,\ 19.82,\ 20.10,\ 19.72. \]
Find the $P$-value for the hypothesis that the mean value has dropped
significantly below the desired value of 20 cm.

\nextq Compare the observed fraction of zeroes in the first 250 digits in 
Table 7.1 of Neave (1978) with the expected number and use Lindley's
method to see whether this data provides evidence that these digits are
not truly random.

\nextq With the data due to von Bortkiewits (1898) quoted in question 7
of Chapter 3, would it be appropriate to reject the hypothesis that on
average one man is killed per cavalry corps per year? [Use Lindley's
method.] 

\nextq Suppose that the standard test statistic 
$z=(\mean x-\theta_0)/\sqrt{(\phi/n)}$ takes the value $z = 3$ and that 
the sample size is $n = 64$.  How close to $\theta_0$ does a value of 
$\theta$ have to be for the value of the normal likelihood function at 
$\mean x$ to be within 5\% of its value at $\theta=\theta_0$?

\nextq Show that in the situation described in Section 4.5 an
observation $z$ of order $n^{1/4-\varepsilon}$  will, 
for large $n$, result in a posterior probability very near unity for the 
null hypothesis.

\nextq Suppose that $x_1$, $x_2$, $\dots$, $x_n\sim\N(\theta_0,\phi)$ where
$\phi$ is known, and take $\phi=1$ and $n=16$.  Find a value of $\varepsilon$ 
such that over the interval
$(\theta_0-\varepsilon,\,\theta_0+\varepsilon)$ the likelihood given a value 
$\mean x$ which is $2\sqrt{(\phi/n)}$ from $\theta_0$ does not vary by more 
than $2\half$\%.

\nextq At the beginning of Section 4.5, we saw that under 
the alternative hypothesis that $\theta \sim \N(\theta_0, \psi)$ the 
predictive density for $\mean x$ was $\N(\theta_0, \psi+\phi/n)$, so that
\[   p_1(\mean x)=\{2\pi(\psi+\phi/n)\}^{-\frac{1}{2}}
                 \exp [-\half(\mean x - \theta_0)^2/(\psi+\phi/n)]       \]
Find the maximum of this density considered as a function of $n$ 
on the assumption that $z \geqslant 1$.  Hence find an upper bound for
the Bayes factor under this assumption.

\nextq Add a row to the table in the subsection `Numerical examples'
of Section 4.5 for 2-tailed $P$-value 0.02.

\nextq Mendel (1865) reported finding 2001 green seeds to 6022 yellow 
in an experiment in which his theory predicted a ratio of $1:3$.  Use 
the method employed for Weldon's dice data in Section 4.5 to test whether 
his theory is confirmed by the data.  [However, Fisher (1936) cast some 
doubt on the genuineness of the data.]

\nextq Find a value for a Bayes factor when testing $\Hyp_0:\lambda=\lambda_0$ 
against the alternative hypothesis $\Hyp_1: \theta\neq\theta_0$ given a
single observation $x\sim\P(\lambda)$.  Use a gamma prior with mean
$\lambda_0$ and variance $\sigma^2$ under the alternative hypothesis.

\nextq Add a row to the table in the subsection `A bound which does not
depend on the prior distribution' of Section 4.5 for 2-tailed $P$-value 0.02.

\nextq Suppose that $x$ has a Poisson distribution $\P(\lambda)$ of 
mean $\lambda$, and that it is desired to test $\Hyp_0:\lambda=\lambda_0$ 
against the alternative hypothesis $\Hyp_1: \theta\neq\theta_0$.

\begin{description}
\item[\quad(a)]  Find lower bounds on the posterior probability of $\Hyp_0$ 
and on the Bayes factor for $\Hyp_0$ versus $\Hyp_1$, bounds which are valid 
for any $\rho_1(\theta)$.

\item[\quad(b)]  If $\lambda_0=2$ and $x=6$ is observed, calculate the 
(two-tailed) $P$-value and the lower bound on the posterior probability when 
the prior probability $\pi_0$ of the null hypothesis is $\half$.
\end{description}

\nextq Twenty observations from a normal distribution of mean $\theta$ 
and variance $\phi$ are available, of which the sample mean is 1.15 and 
the sample variance is 1.5.  Compare the Bayes factors in favour of the 
null hypothesis that $\theta=\theta_0$ assuming (a) that $\phi$ is unknown 
and (b) that it is known that $\phi = 1$.

\nextq Rewrite the argument in Jeffreys (1961, Section 5.1) leading to 
\[ B \approx \sqrt{\left(\frac{\pi\nu}{2}\right)}
             \left(1+\frac{t^2}{\nu}\right)^{-(\nu-1)/2} \]
in your own words.

\nextq Suppose that in testing a point null hypothesis you find a value of 
the usual Student's $\t$ statistic of 3.9 on 11 degrees of freedom.  Would 
the methodology of Section 4.6 require you to ``think again''?

\nextq Show that for large samples the Doogian philosophy requires that
the sample size be of order $1/P^2$ if you are not to have to ``think
again''. 

\newpage

\section{Further Exercises on Chapter \arabic{section}}

\startqs

\nextq Smith (1969, Table 21.8) quotes the following data on haemoglobin 
concentration in ten animals before and after the administration of a
drug:
\[
\begin{array}{lcccccccccc}
\text{Animal}&  1  &  2  &  3  &  4  &  5  &  6  &  7  &  8  &  9  &  10 \\
\text{Before}& 151 & 147 & 158 & 141 & 155 & 160 & 153 & 140 & 152 & 145 \\
\text{After} & 148 & 145 & 155 & 136 & 152 & 159 & 150 & 137 & 148 & 142
\end{array}
\]
Investigate the mean discrepancy $\theta$ between their results and in 
particular give an interval in which you are 90\% sure that it lies.

\nextq With the same data as in the previous question, test the
hypothesis that there is no discrepancy between the two analysts.

\nextq Suppose that you have grounds for believing that observations
$x_i$, $y_i$ for $i=1$, 2, \dots, $n$ are such that $x_i\sim\N(\theta,\phi_i)$ 
while $y_i\sim\N(\theta,2\phi_i)$, but that you are not prepared to 
assume that the $\phi_i$ are equal.  What statistic would you expect to 
base inferences about $\theta$ on?

\nextq How much difference would it make to the analysis of the data in
Section 5.1 on rat diet if we took $\omega=(\sqrt{\phi}+\sqrt{\psi})^2$ 
instead of $\omega=\phi+\psi$?

\nextq The values of oxygen consumption (in mm$^3$ per hour per gram weight) 
were measured for ten fish in rapid streams and for ten in slow-moving 
water.  The values found are reported by Smith (1969, Section 22.5, Problem 
1) as:
\[
\begin{array}{lrrrrrrrrrr}
\text{In rapid streams}& 117& 117& 122& 116& 135& 144& 106& 114& 99& 108 \\
\text{In slow water   }&  90&  58&  82&  82&  92&  75&  90& 142& 85& 107
\end{array}
\]
Investigate the mean discrepancy $\theta$ between the mean consumption 
and in particular give an interval in which you are 90\% sure that it lies
\begin{description}
\item[\quad(a)] assuming that it is known from past experience that the 
standard deviation of both sets of observations is 20, and

\item[\quad(b)] assuming simply that it is known that the standard deviations 
of the two sets of observations are equal.
\end{description}

\nextq A random sample $\vect x = (x_1, x_2, \dots, x_m)$ is available 
from an $\N(\lambda,\phi)$ distribution and a second independent random 
sample $\vect y = (y_1, y_2, \dots, y_n)$ is available from an 
$\N(\mu,3\phi)$ distribution.  Obtain, under the usual assumptions, 
the posterior distributions of $\lambda-\mu$ and of $\phi$.

\nextq In situation described at the start of Section 5.2, what is the 
posterior distribution of the variance?  Give a 90\% HDR for the
variance in the example on the weight growth of rats.

\nextq The following table gives the values of the cephalic index found
in two random samples of skulls, one consisting of fifteen and the other
of thirteen individuals:
\begin{center}
\begin{tabular}{lrrrrrrrrrrrrrrr}
   Sample I:  & 74.1 & 77.7 & 74.4 & 74.0 & 73.8 & 79.3 & 75.8        \\ 
              & 82.8 & 72.2 & 75.2 & 78.2 & 77.1 & 78.2 & 76.3 & 76.8 \\
   Sample II: & 70.8 & 74.9 & 74.2 & 70.4 & 69.2 & 72.2 & 76.8        \\
              & 72.4 & 77.4 & 78.1 & 72.8 & 74.3 & 74.7
\end{tabular}
\end{center}
Investigate the difference $\theta$ between these lengths without making
any particular assumptions about the variances of the two populations,
and in particular give an interval in which you are 90\% sure that it
lies. 

\nextq Show that if $m=n$ is large then $f_1$ in Patil's approximation is
approximately equal to unity while $f_2$ is approximately
$n^{-1}(1-\half\sin^2 2\theta)$.

\nextq Suppose that $T_x$, $T_y$ and $\theta$ are defined as in 
Section 5.3 and that
\[ T=T_x\sin\theta-T_y\cos\theta,\qquad U=T_x\cos\theta+T_y\sin\theta \]
Show that if $\gamma=\lambda-\mu$ then
\[ U=\frac{\gamma-(\mean x +\mean y)}{\sqrt{(s_x^2/m+s_y^2/n)}}
  \qquad\text{and}\qquad
  \tan \theta = \frac{s_x/\sqrt{m}}{s_y/\sqrt{n}}               \]
Show further that $U\sim\BF(\nu_x,\nu_y,\theta+\half\pi)$.  Can this fact be 
used to test the hypothesis that $\lambda+\mu=0$?

\nextq Show that if $x\sim\z_{\nu_1,\nu_2}$ then
\[ \frac{\nu_1\text{e}^{2x}}{\nu_2+\nu_1\text{e}^{2x}} 
   \sim \Be(\half\nu_1,\half\nu_2). \]

\nextq P G Hoel, \textit{An Introduction to Mathematical Statistics}
(5th edn), New York: Wiley 1984, Chapter 10, Section 5.2, quotes the
following data on the yield of corn in bushels per plot on 20 experimental 
plots of ground, half of which were treated with phosphorus as a fertilizer:
\begin{center}
\begin{tabular}{lrrrrrrrrrr}
  \textit{Treated}   & 6.2& 5.7& 6.5& 6.0& 6.3& 5.8& 5.7& 6.0& 6.0& 5.8\\
  \textit{Untreated} & 5.6& 5.9& 5.6& 5.7& 5.8& 5.7& 6.0& 5.5& 5.7& 5.5
\end{tabular}
\end{center}
Find the sample variances for the treated plots and for the untreated
plots, and give an interval in which you are 95\% sure that the ratio of
the population variances lies.

\nextq Measurement errors when using two different instruments are
more or less symmetrically distributed and are believed to be reasonably 
well approximated by a normal distribution.  Twenty measurements with 
each show a sample standard deviation twice as large with one instrument 
as with the other.  Give an interval in which you are 90\% sure that the 
ratio of the true standard deviations lies.

\nextq Repeat the analysis of Di Raimondo's data in Section 5.6 on the 
effects of penicillin of mice, this time assuming that you have prior 
knowledge worth about twelve observations in each case suggesting that 
the mean chance of survival is about a quarter with the standard injection 
but about three-quarters with the penicillin injection.

\nextq Lindley (1965, Sections 7.4 and 7.6) quotes a genetical example
involving two genes which can be summarized as follows:
\[
\renewcommand{\arraystretch}{1.25}
\begin{array}{c|cc|c} \hline
                 &      B         &  \overline{B} &  \text{Total}  \\ \hline
   A             &  \phantom{1}6  &      13       &        19      \\
   \overline{A}  &     16         &      61       &        61      \\ \hline
   \text{Total}  &     22         &      74       &        96
\end{array}
\renewcommand{\arraystretch}{1}
\]
Find the posterior probability that the log odds-ratio is positive and 
compare it with the comparable probability found by using the inverse 
root-sine transformation.

\nextq Show that if $\pi \cong \rho$ then the log odds-ratio is such that
\[ \Lambda-\Lambda' \cong \frac{\alpha(1-2\alpha)}{\pi(1-\pi)}
                         +\frac{\alpha^2}{\pi^2(1-\pi)^2} \]
where $\alpha=\pi-\rho$.

\nextq The table below [quoted from Smith (1969, Section 21.17, Example 9)] 
gives the results of examining 100 schoolboys and 100 schoolgirls of similar 
ages for heart murmur:
\[
\begin{array}{l|cc|c} \hline
                & \text{Murmur} &  \text{No murmur} & \text{Total} \\ \hline
   \text{Boys}  & \phantom{1}58 &       42          &      100     \\
   \text{Girls} & \phantom{1}46 &       54          &      100     \\ \hline
   \text{Total} &     100       &       96          &      200     \\ \hline
\end{array}
\]
What are the approximate posterior odds that the proportion of boys with
heart murmurs is at least 10 per cent greater than that of girls?

\nextq Suppose that $x\sim\P(10)$, i.e.\ $x$ is Poisson of mean 10, and
$y\sim\P(20)$.  What is the approximate distribution of $2x+y$?

\newpage

\section{Further Exercises on Chapter \arabic{section}}

\startqs

\nextq The sample correlation coefficient between length of wing and
width of band on wing in females of ten populations of the butterfly
\textit{Heliconius charitonius} was determined for each of a number of
populations.  The results were as follows:
\begin{center}
{\small
  \begin{tabular}{lcccccccccc}
  Population  &  1 &  2 &  3 &  4 &  5 &  6 &  7 &  8 &  9 & 10 \\
  Size        & 100& 46 & 28 & 74 & 33 & 27 & 52 & 26 & 20 & 17 \\
  Correlation &0.29&0.70&0.58&0.56&0.55&0.67&0.65&0.61&0.64&0.56
\end{tabular}
}
\end{center}

\noindent
Find an interval in which you are 90\% sure that the correlation
coefficient lies.

\nextq If you obtain a number of different sample correlation
coefficients (with the same sample size in each case), all of which turn
out to be positive, and combine them appropriately, is the resulting
combined estimate less than or greater than their mean?  Does the same
answer hold without the restriction that they are all positive?

\nextq Suppoase you want a 95\% HDR for the difference between two
sample correlation coefficients each based on a sample of size $n$ to be
of total length 0.2.  What sample size $n$ do you need to take?

\nextq Show that the transformation $z=\tanh^{-1}r$ can also be
expressed as 
\[ z=\half\log\{(1+r)/(1-r)\}. \]

\nextq What prior for $\zeta$ does the Jeffreys' reference prior for
$\rho$ correspond to?

\nextq The following data consist of the yield (in hundredweights per
acre) of wheat under 7 different levels of nitrogen (N):
{
\[
\begin{array}{lccccccc}
\text{N/acre}& 40\phantom{.0} & 60\phantom{.0} & 80\phantom{.0}
             & 100\phantom{.0} & 120\phantom{.0} & 140\phantom{.0}
             & 160\phantom{.0} \\
\text{cwt/acre}& 15.9 & 18.8 & 21.6 & 25.2 & 28.7 & 30.4 & 30.7
\end{array}
\]
}
\!Give an interval in which you are 90\% sure that the yield 
of wheat will lie on a plot to which 90 units of nitrogen has 
been applied and give a similar interval in which the mean yield
of all such plots lies.

\end{document}

%