Shakespeare Social Networks

Othello using NetworkX

XML markup of Othello from PlayShakespeare.com

Download the code for this IPython Notebook

In [1]:
import networkx as nx
from lxml import etree
import itertools

othello_file = "/Users/adampalay/projects/PlayShakespeare.com-XML/playshakespeare_editions/othello.xml"

%matplotlib inline
import matplotlib.pyplot as plt
In [2]:
with open(othello_file) as f:
    othello_xml = etree.fromstring(f.read())
In [3]:
# The <persona/> tag is going to have all the information
# we need about a character to make this graph.
# Let's take a look at what the XML looks like.
persona = list(othello_xml.iterdescendants('persona'))[0]
print(etree.tostring(persona))
<persona gender="male">
<persname short="DUKE." numberOfLines="65" numberOfVerseLines="64" numberOfProseLines="1" numberOfLyricsLines="0">Duke of Venice</persname>

<persscenes numberOfScenes="1">
	<persscene>1.3</persscene>
</persscenes>
</persona>



In [4]:
# Now we create a method to extract the metadata we want from
# the 'persona' XML tag.
def extract_metadata(persona):
    name = persona.find('persname').text
    name = "\n".join(name.split())
    gender = persona.attrib['gender']
    number_of_lines = int(persona.find('persname').attrib['numberOfLines'])
    scenes = set(scene.text for scene in persona.iterdescendants('persscene'))
    return name, gender, number_of_lines, scenes
In [5]:
def make_graph(play_xml):
    G = nx.Graph()
    for persona in play_xml.iterdescendants('persona'):
        name, gender, number_of_lines, scenes = extract_metadata(persona)

        # To avoid noise, let's only consider characters who
        # have more than 5 lines.
        if number_of_lines > 5:
            G.add_node(
                name,
                gender=gender,
                number_of_lines=number_of_lines,
                scenes=scenes
            )

    # Once we've created the nodes, we can straightforwardly create
    # edges between them by iterating over all combinations
    # of two nodes.
    for (node1, data1), (node2, data2) in itertools.combinations(G.nodes(data=True), 2):
        # Since each node's 'scenes' value is a set, we can easily count
        # how many scenes two characters have in common by counting
        # how many elements are in the sets' union.
        scenes_together = len(data1['scenes'] & data2['scenes'])
        if scenes_together:
            G.add_edge(node1, node2, weight=scenes_together)

    return G
In [6]:
G = make_graph(othello_xml)
In [7]:
# Let's use this graph to find out which characters
# are most central.
# "Degree centrality" is a normalized measure of how
# many edges connect to a node. So in this case,
# it represents how many other characters a character
# appears in a scene with.
sorted(nx.degree_centrality(G).items(), key=lambda x: -x[1])[:7]
Out[7]:
[('Desdemona', 1.0),
 ('Iago', 1.0),
 ('Othello', 1.0),
 ('Roderigo', 0.9375),
 ('Cassio', 0.8125),
 ('Emilia', 0.75),
 ('Montano', 0.625)]
In [8]:
# Now for the good stuff. Let's make a pretty graph of Othello's social network.

# Nodes should be sized by number of lines.
node_size = [data['number_of_lines'] for __, data in G.nodes(data=True)]

# Nodes should be colored by gender.
node_color = ['blue' if data['gender'] == 'male' else 'red' for __, data in G.nodes(data=True)]

plt.figure(figsize=(13,8))  # make the figure size a little larger
plt.axis('off')  # remove the axis, which isn't meaningful in this case
plt.title("Othello's Social Network", fontsize=20)

# The 'k' argument determines how spaced out the nodes will be from
# one another on the graph.
pos = nx.spring_layout(G, k=0.5)

nx.draw_networkx(
    G,
    pos=pos,
    node_size=node_size,
    node_color=node_color,
    edge_color='gray',  # change edge color
    alpha=0.3,  # make nodes more transparent to make labels clearer
    font_size=14,
)