# Shakespeare Social Networks

### Othello using NetworkX

XML markup of Othello from PlayShakespeare.com

In [1]:
import networkx as nx
from lxml import etree
import itertools

%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
with open(othello_file) as f:

In [3]:
# The <persona/> tag is going to have all the information
# we need about a character to make this graph.
# Let's take a look at what the XML looks like.
persona = list(othello_xml.iterdescendants('persona'))[0]
print(etree.tostring(persona))

<persona gender="male">
<persname short="DUKE." numberOfLines="65" numberOfVerseLines="64" numberOfProseLines="1" numberOfLyricsLines="0">Duke of Venice</persname>

</persona>


In [4]:
# Now we create a method to extract the metadata we want from
# the 'persona' XML tag.
name = persona.find('persname').text
name = "\n".join(name.split())
gender = persona.attrib['gender']
number_of_lines = int(persona.find('persname').attrib['numberOfLines'])
scenes = set(scene.text for scene in persona.iterdescendants('persscene'))
return name, gender, number_of_lines, scenes

In [5]:
def make_graph(play_xml):
G = nx.Graph()
for persona in play_xml.iterdescendants('persona'):
name, gender, number_of_lines, scenes = extract_metadata(persona)

# To avoid noise, let's only consider characters who
# have more than 5 lines.
if number_of_lines > 5:
name,
gender=gender,
number_of_lines=number_of_lines,
scenes=scenes
)

# Once we've created the nodes, we can straightforwardly create
# edges between them by iterating over all combinations
# of two nodes.
for (node1, data1), (node2, data2) in itertools.combinations(G.nodes(data=True), 2):
# Since each node's 'scenes' value is a set, we can easily count
# how many scenes two characters have in common by counting
# how many elements are in the sets' union.
scenes_together = len(data1['scenes'] & data2['scenes'])
if scenes_together:

return G

In [6]:
G = make_graph(othello_xml)

In [7]:
# Let's use this graph to find out which characters
# are most central.
# "Degree centrality" is a normalized measure of how
# many edges connect to a node. So in this case,
# it represents how many other characters a character
# appears in a scene with.
sorted(nx.degree_centrality(G).items(), key=lambda x: -x[1])[:7]

Out[7]:
[('Desdemona', 1.0),
('Iago', 1.0),
('Othello', 1.0),
('Roderigo', 0.9375),
('Cassio', 0.8125),
('Emilia', 0.75),
('Montano', 0.625)]

In [8]:
# Now for the good stuff. Let's make a pretty graph of Othello's social network.

# Nodes should be sized by number of lines.
node_size = [data['number_of_lines'] for __, data in G.nodes(data=True)]

# Nodes should be colored by gender.
node_color = ['blue' if data['gender'] == 'male' else 'red' for __, data in G.nodes(data=True)]

plt.figure(figsize=(13,8))  # make the figure size a little larger
plt.axis('off')  # remove the axis, which isn't meaningful in this case
plt.title("Othello's Social Network", fontsize=20)

# The 'k' argument determines how spaced out the nodes will be from
# one another on the graph.
pos = nx.spring_layout(G, k=0.5)

nx.draw_networkx(
G,
pos=pos,
node_size=node_size,
node_color=node_color,
edge_color='gray',  # change edge color
alpha=0.3,  # make nodes more transparent to make labels clearer
font_size=14,
)