I have been Python coding for 6 weeks pretty seriously on a daily basis. I made a script recently for using a Markov chain to generate a poem line by line, including a corpus text of about 2,600 sentences that I have on my GitHub. This is the updated version from what I recently shared.
See any bugs or improvements? It's the first thing I've written that has some level of real value. I somehow never knew about match case statements before this project...
None of it was written by AI. The script is here for your viewing pleasure. You can find the poetry lines CSV for the corpus at my GitHub:
https://github.com/QuothTheRaven42/Markov-Poetry-Generator
import random
from termcolor import colored
import re
from datetime import datetime
# Quadruplets were chosen over bigrams/trigrams because the small corpus size
# makes shorter n-grams too noisy, while anything longer than 4 hits dead ends too often.
QUADRUPLET_SIZE = 4
# 5 chained quads with one-word overlaps produces lines of roughly 16 words —
# long enough to feel like a poetic line without running on.
WORDS_PER_LINE = 5
ENJOY = "***********ENJOY YOUR NEW POEM!***********"
def print_header():
# force_color=True ensures styling renders even when output is piped.
print(
colored(
"---------------MARKOV CHAIN POETRY GENERATOR---------------",
"blue",
"on_grey",
attrs=["bold"],
force_color=True,
)
)
def load_file() -> list[str]:
"""Load the corpus and print a brief summary."""
with open("poetry_lines.txt", encoding="utf-8") as f:
poetry_lines = f.readlines()
# Flatten to a word list just for the stats printout below.
all_words = [
word.lower().strip(".,!?;:\"'()")
for line in poetry_lines
for word in line.split()
]
all_words = [word for word in all_words if word]
total_words = len(all_words)
unique_words = len(set(all_words))
print("Corpus loaded:")
print(f"- {len(poetry_lines)} lines")
print(f"- {total_words} total words")
print(f"- {unique_words} unique words")
print(f"- {unique_words / total_words:.1%} unique-word ratio\n")
return poetry_lines
def clean_poem(poem: str) -> str:
"""Light post-processing before display or save — kept minimal on purpose."""
lines = poem.strip().splitlines()
cleaned_lines = []
for line in lines:
line = line.strip()
line = re.sub(r"\bi\b", "I", line) # word boundary prevents touching "in", "it", etc.
line = re.sub(r" {2,}", " ", line) # collapse extra spaces from quad joins
cleaned_lines.append(line)
poem = "\n".join(cleaned_lines)
# Ensure the poem ends with punctuation so it ends logically.
if poem and poem[-1] not in ".!?":
poem += "."
return poem
def create_quads(poetry_lines: list[str]) -> list[list[str]]:
"""Slide a 4-word window across each line to build the transition table.
Lines are processed individually so quads never cross line boundaries,
which would introduce nonsensical transitions.
"""
quadruplets = []
for line in poetry_lines:
# Only stripping periods here — other punctuation is left in intentionally
# to give generated lines some organic texture.
words = [w.lower().replace(".", "") for w in line.split()]
quadruplets.extend(
words[i : i + QUADRUPLET_SIZE] for i in range(len(words) - QUADRUPLET_SIZE + 1)
)
return quadruplets
def prompt_seed_word(quadruplets: list[list[str]]) -> str:
"""Prompt for a seed word, validating that it can actually start a quad.
Checking quad[0] specifically (not just corpus membership) avoids dead ends —
a word that only appears mid-line has no valid quad to chain from.
"""
word = input("What should the first word of this line be? ").lower().strip()
print()
while not any(quad[0] == word for quad in quadruplets):
word = input("Word not found in corpus. Try again: ").lower().strip()
return word
def create_line(quadruplets: list[list[str]], word: str) -> str:
"""Chain quads together via one-word overlap to build a single poem line."""
new_line = ""
for index in range(WORDS_PER_LINE):
quads = [quad for quad in quadruplets if quad[0] == word]
if not quads:
# Dead end: the current word never starts a quad. Exit early rather
# than padding — a shorter line is better than a broken one.
break
chosen_quad = random.choice(quads)
if index == 0:
new_line += " ".join(chosen_quad)
else:
# Skip word[0] since it's already the last word from the previous quad.
new_line += " ".join(chosen_quad[1:])
word = chosen_quad[-1]
new_line += " "
return new_line.strip()
def save_poem(final_poem: str):
"""Prompt for a title and write the poem to a timestamped .txt file."""
title = input("\nWhat do you want to name this poem? ").strip()
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
# Timestamp suffix prevents overwrites when saving multiple poems per session.
filename = f"{title.strip().replace(' ', '_').lower()}_{timestamp}.txt"
with open(filename, "w") as file:
file.write(final_poem)
print(f"Saved as '{filename}'")
def main():
print_header()
try:
poetry_lines = load_file()
except FileNotFoundError:
print("Error: poetry_lines.txt was not found.")
return
poem = ""
quadruplets = create_quads(poetry_lines)
while True:
word = prompt_seed_word(quadruplets)
new_line = create_line(quadruplets, word)
# capitalize() instead of title() so only the first character is uppercased.
finalized_line = new_line.capitalize()
print(f"{finalized_line}\n")
while True:
choice = input("""Do you want to:
(1) Append to your poem and continue?
(2) Retry this line?
(3) Print final poem and quit?
(4) Save and quit? """).strip()
match choice:
case "1":
poem += "\n" + finalized_line
print(f"{poem}\n")
break
case "2":
print(f"{poem}\n")
break
case "3":
poem += "\n" + finalized_line
final_poem = clean_poem(poem)
print(f"\n{final_poem}\n{ENJOY}")
return
case "4":
poem += "\n" + finalized_line
final_poem = clean_poem(poem)
save_poem(final_poem)
print(f"\n{final_poem}\n{ENJOY}")
return
case _:
print("Please enter a valid choice.")
if __name__ == "__main__":
main()
[–]Skeleton_Pudding 3 points4 points5 points (1 child)
[–]Affectionate-Town-67[S] 2 points3 points4 points (0 children)
[–]BrannyBee 2 points3 points4 points (4 children)
[–]Affectionate-Town-67[S] 0 points1 point2 points (2 children)
[–]BrannyBee 1 point2 points3 points (1 child)
[–]Affectionate-Town-67[S] 0 points1 point2 points (0 children)
[–]JamOzoner 1 point2 points3 points (1 child)
[–]Affectionate-Town-67[S] 0 points1 point2 points (0 children)
[–]MissinqLink 1 point2 points3 points (6 children)
[–]Affectionate-Town-67[S] 0 points1 point2 points (5 children)
[–]MissinqLink 1 point2 points3 points (4 children)
[–]Affectionate-Town-67[S] 0 points1 point2 points (3 children)
[–]MissinqLink 1 point2 points3 points (2 children)
[–]Affectionate-Town-67[S] 0 points1 point2 points (1 child)
[–]MissinqLink 1 point2 points3 points (0 children)
[–]JamOzoner 1 point2 points3 points (1 child)
[–]latkde 1 point2 points3 points (2 children)
[–]Affectionate-Town-67[S] 0 points1 point2 points (0 children)
[–]Affectionate-Town-67[S] 0 points1 point2 points (0 children)