import urllib.request
def countHead(url):
"""docstring for countHead"""
page = urllib.request.urlopen(url)
numHeadLines = 0
numBodyLines = 0
line = page.readline().decode('ascii')
while '' not in line:
numHeadLines = numHeadLines + 1
line = page.readline().decode('ascii')
while "" not in line:
line = page.readline().decode('ascii')
while line != "" and "" not in line:
numBodyLines = numBodyLines + 1
print(line[:-1])
line = page.readline().decode('ascii')
print("number of lines in header = ", numHeadLines)
print("number of lines in body = ", numBodyLines)
page.close()
countHead("http://knuth.luther.edu/python/test.html")
Friday, September 25, 2009
Chapter 5 Listing 5.4
Here is a corrected version of listing 5.4. See the notes in the other listings for an explanation of the new urllib and the use of decode on the end of the readline call.
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment