- #1
- 19,464
- 10,078
- TL;DR Summary
- Making a very basic web scraper and I have the error line 48 'str' object is not callable, and I can't figure it out.
Learning Python and this is a first attempt at a project.
Python:
import urllib
import requests
from bs4 import BeautifulSoup, Comment
# desktop user-agent
USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0"
# mobile user-agent
MOBILE_USER_AGENT = "Mozilla/5.0 (Linux; Android 7.0; SM-G930V Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 Mobile Safari/537.36"
getkeyword = "plugin"
query = "wordpress plugin"
query = query.replace(' ', '+')
URL = f"https://google.com/search?q={query}"
headers = {"user-agent": USER_AGENT}
resp = requests.get(URL, headers=headers)
if resp.status_code == 200:
soup = BeautifulSoup(resp.content, "html.parser")
results = []
links= []
for g in soup.find_all('div', class_='r'):
anchors = g.find_all('a')
if anchors:
links.append(anchors[0]['href'])
else:
print("Google may have blocked you, try again in an hour")
i=0
for num in links:
URL = links[i]
resp = requests.get(URL, headers=headers)
if resp.status_code == 200:
soup = BeautifulSoup(resp.content, "html.parser")
if soup.find('title'):
title = soup.find('title').text
if soup.find('h1'):
h1 = soup.find('h1').text
if soup.find('h2'):
h2 = soup.find('h2').text
bodytext = soup.find('body').text
print("#" + str(i+1) + ": " + links[i])
if title:
print("Title: " + title)
if h1:
print("H1: " + h1)
if h2:
print("H2: " + h2)
if getkeyword in bodytext:
print = ("Keyword: Yes")
else:
print = ("Keyword: No")
for comments in soup.findAll(text=lambda text:isinstance(text, Comment)):
getComments = comments.extract()
if "Yoast" in getComments:
print("Yoast: Yes")
break
print("\r\r")
else:
print("Site is unavailable")
break
i += 1
del links