In [ ]:
...
text = ...
head = ...
tail = ...
In [ ]:
# We can inspect the beginning of the content
print(head)
In [ ]:
# final 25,000 characters
print(tail)
In [ ]:
# entire contents
print(text)
In [ ]:
series1 = ...
series2 = ...
series3 = ...
In [ ]:
# join series into single string connected by "\n\n\n\n\n\n"
poems_text = ('\n'*6).join([series1.strip(),
series2.strip(),
series3.strip()])
In [ ]:
print(poems_text[:445])
print('\n---\n')
print(poems_text[-275:])
In [ ]:
print(poems_text[:200], '...')
In [ ]:
# Confirm that brackets occur only once
assert (poems_text.count("[") == 1) and (poems_text.count("]") == 1)
In [ ]:
poems_text_clean = ...
In [ ]:
print(poems_text_clean[:100], '...')
In [ ]:
def startswith_rn(s: str) -> bool:
'''
Returns: True if `s` starts with a roman numeral and False otherwise
Ex:
startswith_rn("III. NATURE.") -> True
startswith_rn("I'm nobody! Who are you?") -> False
'''
...
In [ ]:
grader.check("q2.4")
In [ ]:
poems_text_nonum = ...
In [ ]:
print(poems_text_nonum[:500])
In [ ]:
from pprint import pprint
In [ ]:
pprint(poems_text_nonum[:2000])
In [ ]:
poem_list = ...
In [ ]:
print(poem_list[109])
In [ ]:
print(f"There are a total of {len(poem_list)} poems.")
In [ ]:
# helper functions
# Answers: what number should I increment to having now seen a duplicate?
next_title_num = lambda x: x[-2] + 1 if x[-2].isnumeric() else 2
# Answers: what was the number of the previous poem with this title?
prev_title = lambda d, k: sorted([k for k in d.keys() if k.startswith(k)])[-1]
def update(d: dict, k: str, v: str) -> None:
'''
Adds key-value pair 'k' & 'v' to dictionary 'd'
Uses helper functions to increment key string if key already exists
Dictionary is changed inplace; Returns None.
'''
if d.get(k):
k = f'{k} ({next_title_num(prev_title(d,k))})'
d[k] = v
In [ ]:
is_editor_title = lambda x: x.endswith('.') and x.isupper()
has_editor_title = lambda x: is_editor_title(x.split('\n')[0]) # check 1st line for editor title
d = {}
for p in poem_list:
# first line will always be the key
k = p[:p.index('\n')]
if has_editor_title(p):
# find string that should be the value (poem minus title)
# YOUR CODE HERE
v = ...
# add new new pair to dictionary
# update function handles altering the key if neccessary
# (i.e., incrementing the numerical suffix of the title)
update(d, k, v)
In [ ]:
print(d['HOPE.'])
In [ ]:
# punctuation characters to remove when counting words
punc = {w[-1] for p in d.values() for w in p.lower().split() if not w[-1].isalpha()}
punc
In [ ]:
top_words = ...
In [ ]:
top_words
In [ ]:
class PoetryCollection():
"""
Attributes
----------
author : str
full name of author
collection : dict
dictionary of (title, poem) key-value pairs
size : int
number of poems in collection
Methods
-------
random_poem(seed: int = None) -> str
returns random poem; use seed for reproducibility (default seed=None)
"""
...
self.author = ...
self.collection = ...
self.size = ...
def random_poem(self, seed : int = None) -> str:
rng = np.random.default_rng(seed)
return str(rng.choice(list(self.collection.values())))
In [ ]:
poems = ...
In [ ]:
print('Author:', poems.author)
In [ ]:
print('Number of poems in collection:', poems.size)
In [ ]:
print(poems.random_poem(seed=109))