# import the Python packages that we will need to use later on
import json
from collections import Counter
import matplotlib.pyplot as plt


# the relevant files for this novel are stored in the directory below
from pathlib import Path
dir_pride = Path("data") / "pride_prejudice"


# specify the path of the file
novel_meta_path = dir_pride / "metadata.json"
# open the file and parse all of the JSON data
with open(novel_meta_path, "r", encoding="utf-8") as fin:
    novel_metadata = json.load(fin)


print("Title is", novel_metadata["novel"]["title"])
print("Publication year is", novel_metadata["novel"]["year"])
print("Author is", novel_metadata["author"]["firstname"], novel_metadata["author"]["surname"])


# specify the path of the file
character_path = dir_pride / "characters.json"
# open the file and parse all of the JSON data
with open(character_path, "r", encoding="utf-8") as fin:
    characters = json.load(fin)


print("Novel has %d characters" % len(characters))


print("All character definitive names:")
print(list(characters.keys()))


print("Aliases for Elizabeth Bennet:")
print(characters["elizabeth bennet"]["aliases"])


print("Attributes for Elizabeth Bennet:")
print(characters["elizabeth bennet"]["attributes"])


female_count = 0
male_count = 0
# iterate over all of the characters in the character dictionary
for definitive_name in characters:
    char = characters[definitive_name]
    if "female" in char["attributes"]:
        female_count += 1
    if "male" in char["attributes"]:
        male_count += 1
# display the counts
print("%d female characters, %d male characters" % (female_count, male_count))


# create an empty Counter
counts = Counter()
# iterate over all of the characters in the character dictionary
for definitive_name in characters:
    char = characters[definitive_name]
    for attribute in char["attributes"]:
        counts[attribute] += 1
# print the most common entries in the Counter
print("Top-20 most common character attributes for this book are:")
for attribute, count in counts.most_common(20):
    print("%d \t %s" % (count, attribute))


required_attributes = ["mother", "father", "wife", "husband", "son", "daughter", "brother", "sister"]


# extract only the counts for the required attributes 
required_counts = []
for attribute in required_attributes:
    required_counts.append(counts[attribute])
# now create a plot to display them 
plt.figure(figsize=(9, 5))
ax = plt.bar(required_attributes, required_counts, color="purple")
# add axis labels to the chart
plt.xlabel("Attribute", fontsize=13);
plt.ylabel("Number of Characters", fontsize=13)
plt.show()


# the relevant files for these novels are stored in the directories below
dir_pride = Path("data") / "pride_prejudice"
dir_dracula = Path("data") / "dracula"
dir_frankenstein = Path("data") / "frankenstein"


# read the characters for the first novel
character_path_pride = dir_pride / "characters.json"
with open(character_path_pride, "r", encoding="utf-8") as fin:
    characters_pride = json.load(fin)
# read the characters for the second novel
character_path_dracula = dir_dracula / "characters.json"
with open(character_path_dracula, "r", encoding="utf-8") as fin:
    characters_dracula = json.load(fin)
# read the characters for the third novel
character_path_frankenstein = dir_frankenstein / "characters.json"
with open(character_path_frankenstein, "r", encoding="utf-8") as fin:
    characters_frankenstein = json.load(fin)


# create the values for the chart
novel_names = ["Pride and Prejudice", "Dracula", "Frankenstein"]
total_character_counts = [len(characters_pride), len(characters_dracula), len(characters_frankenstein)]
# create the chart
plt.figure(figsize=(8, 5))
ax = plt.bar(novel_names, total_character_counts, color="navy")
# add axis labels to the chart
plt.xlabel("Novel", fontsize=13);
plt.ylabel("Number of Characters", fontsize=13)
plt.show()


# create a simple function to calculate the female-to-male ratio
def calc_ratio(characters):
    female_count = 0
    male_count = 0
    for definitive_name in characters:
        char = characters[definitive_name]
        if "female" in char["attributes"]:
            female_count += 1
        if "male" in char["attributes"]:
            male_count += 1
    return female_count/male_count

# apply the function for each of our novels
total_character_counts = []
total_character_counts.append(calc_ratio(characters_pride))
total_character_counts.append(calc_ratio(characters_dracula))
total_character_counts.append(calc_ratio(characters_frankenstein))

# plot the ratios
plt.figure(figsize=(8, 5))
ax = plt.bar(novel_names, total_character_counts, color="teal")
# add axis labels to the chart
plt.xlabel("Novel", fontsize=13);
plt.ylabel("Ratio of Female-to-Male Characters", fontsize=13)
plt.show()

Worksheet 3: Working with Novel Metadata (SOLUTION)¶

Task 1: Reading Basic Novel Metadata¶

Task 2: Reading a Character Dictionary¶

Task 3: Counting Character Attributes¶

Bonus Task: Comparing Novel Metadata¶