fit discrete traits
Fitting Discrete Markov Models¶
This notebook demonstrates fitting discrete-state continuous-time Markov
models and inferring ancestral states with infer_ancestral_states_discrete_mk().
In [ ]:
Copied!
import toytree
tree = toytree.rtree.unittree(12, treeheight=1.0, seed=2)
import toytree
tree = toytree.rtree.unittree(12, treeheight=1.0, seed=2)
Simulate tip data and fit a model¶
In [ ]:
Copied!
traits = toytree.pcm.simulate_discrete_data(
tree=tree,
nstates=3,
model='ER',
nreplicates=1,
trait_name='state',
tips_only=True,
)
fit = tree.pcm.infer_ancestral_states_discrete_mk(
data=traits,
nstates=3,
model='ER',
inplace=False,
)
fit['model_fit']
traits = toytree.pcm.simulate_discrete_data(
tree=tree,
nstates=3,
model='ER',
nreplicates=1,
trait_name='state',
tips_only=True,
)
fit = tree.pcm.infer_ancestral_states_discrete_mk(
data=traits,
nstates=3,
model='ER',
inplace=False,
)
fit['model_fit']
Inspect inferred states and probabilities¶
In [ ]:
Copied!
df = fit['data']
df.head()
df = fit['data']
df.head()
Plot inferred states on the tree¶
In [ ]:
Copied!
cm = toytree.color.CMAPS['Set2'] if hasattr(toytree, 'color') else None
states = df['state_anc']
# use a simple palette for three states
palette = ['#4c78a8', '#f58518', '#54a24b']
node_colors = [palette[int(states[i])] if str(states[i]) != 'nan' else '#c7c7c7' for i in range(tree.nnodes)]
c, a, m = tree.draw(layout='r', node_colors=node_colors, node_sizes=14, tip_labels=True, scale_bar=True)
c
cm = toytree.color.CMAPS['Set2'] if hasattr(toytree, 'color') else None
states = df['state_anc']
# use a simple palette for three states
palette = ['#4c78a8', '#f58518', '#54a24b']
node_colors = [palette[int(states[i])] if str(states[i]) != 'nan' else '#c7c7c7' for i in range(tree.nnodes)]
c, a, m = tree.draw(layout='r', node_colors=node_colors, node_sizes=14, tip_labels=True, scale_bar=True)
c
Plot posterior probabilities as pie markers¶
Use add_node_pie_markers with the posterior probabilities to
visualize ancestral state uncertainty at each node.
In [ ]:
Copied!
# infer and store posterior probabilities on the tree
out = tree.pcm.infer_ancestral_states_discrete_mk(
data=traits,
nstates=3,
model='ER',
inplace=True,
)
c, a, m = tree.draw(layout='d', tip_labels=True, scale_bar=True)
tree.annotate.add_node_pie_markers(
a, 'state_anc_posterior', size=10, istroke_width=1, mask=False
)
c
# infer and store posterior probabilities on the tree
out = tree.pcm.infer_ancestral_states_discrete_mk(
data=traits,
nstates=3,
model='ER',
inplace=True,
)
c, a, m = tree.draw(layout='d', tip_labels=True, scale_bar=True)
tree.annotate.add_node_pie_markers(
a, 'state_anc_posterior', size=10, istroke_width=1, mask=False
)
c
Parameter effects¶
Below are short examples showing how key fitting parameters influence inference.
In [ ]:
Copied!
import numpy as np
# Example: constrain relative rates (ARD)
rates = np.array([[0, 2.0, 0.5], [1.0, 0, 0.2], [0.8, 1.5, 0]])
fit_rates = tree.pcm.infer_ancestral_states_discrete_mk(
data=traits,
nstates=3,
model='ARD',
fixed_rates=rates,
inplace=False,
)
fit_rates['model_fit']
import numpy as np
# Example: constrain relative rates (ARD)
rates = np.array([[0, 2.0, 0.5], [1.0, 0, 0.2], [0.8, 1.5, 0]])
fit_rates = tree.pcm.infer_ancestral_states_discrete_mk(
data=traits,
nstates=3,
model='ARD',
fixed_rates=rates,
inplace=False,
)
fit_rates['model_fit']
In [ ]:
Copied!
# Example: constrain state frequencies
freqs = np.array([0.7, 0.2, 0.1])
fit_freqs = tree.pcm.infer_ancestral_states_discrete_mk(
data=traits,
nstates=3,
model='ER',
fixed_state_frequencies=freqs,
inplace=False,
)
fit_freqs['model_fit']
# Example: constrain state frequencies
freqs = np.array([0.7, 0.2, 0.1])
fit_freqs = tree.pcm.infer_ancestral_states_discrete_mk(
data=traits,
nstates=3,
model='ER',
fixed_state_frequencies=freqs,
inplace=False,
)
fit_freqs['model_fit']
In [ ]:
Copied!
# Example: set a root prior
root_prior = np.array([0.05, 0.05, 0.90])
fit_root = tree.pcm.infer_ancestral_states_discrete_mk(
data=traits,
nstates=3,
model='ER',
root_prior=root_prior,
inplace=False,
)
fit_root['model_fit']
# Example: set a root prior
root_prior = np.array([0.05, 0.05, 0.90])
fit_root = tree.pcm.infer_ancestral_states_discrete_mk(
data=traits,
nstates=3,
model='ER',
root_prior=root_prior,
inplace=False,
)
fit_root['model_fit']
In [ ]:
Copied!
# Example: scale overall rate
fit_slow = tree.pcm.infer_ancestral_states_discrete_mk(
data=traits,
nstates=3,
model='ER',
rate_scalar=0.2,
inplace=False,
)
fit_fast = tree.pcm.infer_ancestral_states_discrete_mk(
data=traits,
nstates=3,
model='ER',
rate_scalar=2.0,
inplace=False,
)
fit_slow['model_fit']
fit_fast['model_fit']
# Example: scale overall rate
fit_slow = tree.pcm.infer_ancestral_states_discrete_mk(
data=traits,
nstates=3,
model='ER',
rate_scalar=0.2,
inplace=False,
)
fit_fast = tree.pcm.infer_ancestral_states_discrete_mk(
data=traits,
nstates=3,
model='ER',
rate_scalar=2.0,
inplace=False,
)
fit_slow['model_fit']
fit_fast['model_fit']
Fossil constraints: with vs without¶
Compare inference with and without internal-node observations.
In [ ]:
Copied!
# Without fossil constraint
base = tree.pcm.infer_ancestral_states_discrete_mk(
data=traits,
nstates=3,
model='ER',
inplace=False,
)
# With a fossil constraint on an internal node
fossil_series = traits.copy().reindex(range(tree.nnodes))
fossil_series.loc[tree[-2]._idx] = 1
fossil = tree.pcm.infer_ancestral_states_discrete_mk(
data=fossil_series,
nstates=3,
model='ER',
inplace=False,
)
# Plot a comparison of inferred states
palette = ['#4c78a8', '#f58518', '#54a24b']
def plot_states(df, title):
states = df['state_anc']
node_colors = [palette[int(states[i])] if str(states[i]) != 'nan' else '#c7c7c7' for i in range(tree.nnodes)]
c, a, m = tree.draw(layout='r', node_colors=node_colors, node_sizes=12, tip_labels=True, scale_bar=True)
a.label.text = title
return c
c_base = plot_states(base['data'], 'no fossil constraint')
c_fossil = plot_states(fossil['data'], 'with fossil constraint')
c_base
# Without fossil constraint
base = tree.pcm.infer_ancestral_states_discrete_mk(
data=traits,
nstates=3,
model='ER',
inplace=False,
)
# With a fossil constraint on an internal node
fossil_series = traits.copy().reindex(range(tree.nnodes))
fossil_series.loc[tree[-2]._idx] = 1
fossil = tree.pcm.infer_ancestral_states_discrete_mk(
data=fossil_series,
nstates=3,
model='ER',
inplace=False,
)
# Plot a comparison of inferred states
palette = ['#4c78a8', '#f58518', '#54a24b']
def plot_states(df, title):
states = df['state_anc']
node_colors = [palette[int(states[i])] if str(states[i]) != 'nan' else '#c7c7c7' for i in range(tree.nnodes)]
c, a, m = tree.draw(layout='r', node_colors=node_colors, node_sizes=12, tip_labels=True, scale_bar=True)
a.label.text = title
return c
c_base = plot_states(base['data'], 'no fossil constraint')
c_fossil = plot_states(fossil['data'], 'with fossil constraint')
c_base
In [ ]:
Copied!
c_fossil
c_fossil