- #1
Thejaswini
- 2
- 0
I am trying to scrape some website. And this the code i have
But i m getting the error like
Anyone can help in this
Thanks
Thejaswini
Python:
import urllib2
from bs4 import BeautifulSoup
from urllib import URLopener
from urllib import FancyURLopener
import traceback,sys
headers = {
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Origin': '[URL]http://www.nrega.nic.in/netnrega/home.aspx[/URL]',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17',
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': '[URL]http://mnregaweb4.nic.in/netnrega/all_lvl_details_dashboard_new.aspx[/URL]',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'en-US,en;q=0.8',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3'
}
class MyOpener(FancyURLopener, object):
version = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17'
myopener = MyOpener()
#urllib2.urlcleanup()
url = myopener.retrieve('[URL]http://mnregaweb4.nic.in/netnrega/all_lvl_details_dashboard_new.aspx[/URL]')
# first HTTP request without form data
f = urllib2.urlopen(url)
#traceback.print_exc()
def run_user_code(envdir):
source = raw_input(">>> ")
try:
exec source in envdir
except:
print "Exception in user code:"
print '-'*60
traceback.print_exc(file=sys.stdout)
print '-'*60
envdir = {}
while 1:
run_user_code(envdir)
soup = BeautifulSoup(f)
# parse and retrieve two vital form values
viewstate = soup.findAll("input", {"type": "hidden", "name": "__VIEWSTATE"})
eventvalidation = soup.findAll("input", {"type": "hidden", "name": "__EVENTVALIDATION"})
print viewstate[0]['value']
formData = (
('__EVENTVALIDATION', eventvalidation),
('__VIEWSTATE', viewstate),
('__VIEWSTATEENCRYPTED',''),
('TextBox1', '106110006'),
('Button1', 'Show'),
)
encodedFields = urllib2.urlencode(formData)
# second HTTP request with form data
f = myopener.open(url, encodedFields)
try:
# actually we'd better use BeautifulSoup once again to
# retrieve results(instead of writing out the whole HTML file)
# Besides, since the result is split into multipages,
# we need send more HTTP requests
fout = open('census.html','w')
except:
print('Could not open output file\n')
fout.writelines(f.readlines())
fout.close()
Code:
Traceback (most recent call last):
File "C:\Python27\nerga - Copy.py", line 25, in <module>
f = urllib2.urlopen(url)
File "C:\Python27\lib\urllib2.py", line 154, in urlopen
return opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 420, in open
req.timeout = timeout
AttributeError: 'tuple' object has no attribute 'timeout'
Thanks
Thejaswini
Last edited by a moderator: