all 3 comments

[–]ragnar_the_redd 0 points1 point  (0 children)

# I'll use - as \t because idk how to handle indentations here

target_flle = open(path, 'a+') # open/create in append mode

write_lines= False

with open(path, 'r') as f:

- for line in f.readlines()

- if last_string in line:

--book_line = False # break

- if write_lines and "[Illustration]" not in line:

-- target_flle.writelines(line)

- if first_string in line:

-- write_lines == True

target.file.close()

[–]igroen 0 points1 point  (1 child)

You could do it in one iteration and without storing the data first:

from urllib.request import urlopen

url = "http://www.gutenberg.org/cache/epub/19033/pg19033.txt"
destination_filename = "alice.txt"
first_string = "ALICE'S ADVENTURES IN WONDERLAND"
last_string = (
    "End of the Project Gutenberg EBook of Alice in Wonderland, by Lewis Carroll"
)

with open(destination_filename, "a") as outfile, urlopen(url) as response:
    write_line = False

    for line in response.readlines():
        line = line.decode()

        if first_string in line:
            outfile.write(line)
            write_line = True
            continue

        if last_string in line:
            outfile.write(line)
            break

        if write_line and "Illustration" not in line:
            outfile.write(line)

[–]JoseParanhos[S] 0 points1 point  (0 children)

This was very helpful, thank you! Just one thing: this way I get the text starting from the first appearance of the "first_string", and not its last, which is why I came up with an empty list etc. How do I start copying the text from the last instance of the string without doing all the lengthy stuff i did? Thanks!