Friday, September 25, 2009

Chapter 5 Listing 5.4

Here is a corrected version of listing 5.4. See the notes in the other listings for an explanation of the new urllib and the use of decode on the end of the readline call.


import urllib.request

def countHead(url):
"""docstring for countHead"""
page = urllib.request.urlopen(url)
numHeadLines = 0
numBodyLines = 0

line = page.readline().decode('ascii')
while '' not in line:
numHeadLines = numHeadLines + 1
line = page.readline().decode('ascii')

while "" not in line:
line = page.readline().decode('ascii')

while line != "" and "" not in line:
numBodyLines = numBodyLines + 1
print(line[:-1])
line = page.readline().decode('ascii')

print("number of lines in header = ", numHeadLines)
print("number of lines in body = ", numBodyLines)

page.close()

countHead("http://knuth.luther.edu/python/test.html")

No comments:

Post a Comment