J'essaie d'utiliser le fichier betbrain.py de Github ( https://github.com/gto76/betbrain- scraper ) qui comporte le code suivant:
#!/usr/bin/python3
#
# Usage: betbrain.py [URL or FILE] [OUTPUT-FILE]
# Scrapes odds from passed betbrain page and writes them to
# stdout, or file if specified.
import os
import sys
import urllib.request
from bs4 import BeautifulSoup
from http.cookiejar import CookieJar
import parser_betbrain
import printer
DEFAULT_URL = 'https://www.betbrain.com/football/england/premier-league/#!/matches/'
# If no arguments are present, it parses the default page.
# Argument can be an URL or a local file.
def main():
html = getHtml(sys.argv)
soup = BeautifulSoup(html, "html.parser")
matches = parser_betbrain.getMatches(soup)
string = printer.matchesToString(matches)
output(string, sys.argv)
def getHtml(argv):
if len(argv) <= 1:
return scrap(DEFAULT_URL)
Elif argv[1].startswith("http"):
return scrap(argv[1])
else:
return readFile(argv[1])
# Returns html file located at URL.
def scrap(url):
cj = CookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
try:
return opener.open(url)
except ValueError:
error("Invalid URL: " + url)
def readFile(path):
try:
return open(path, encoding='utf8')
except IOError:
error("Invalid input filename: " + path)
def output(string, argv):
if len(argv) <= 2:
print(string)
else:
writeFile(argv[2], string)
def writeFile(path, string):
try:
fo = open(path, "w", encoding='utf8')
fo.write(string);
fo.close()
except IOError:
error("Invalid output filename: " + path)
def error(msg):
msg = os.path.basename(__file__)+": "+msg
print(msg, file=sys.stderr)
sys.exit(1)
if __== '__main__':
main()
Cependant, lorsqu'il est exécuté, il revient avec cette erreur
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 1318, in do_open
encode_chunked=req.has_header('Transfer-encoding'))
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 1239, in request
self._send_request(method, url, body, headers, encode_chunked)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 1285, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 1234, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 1026, in _send_output
self.send(msg)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 964, in send
self.connect()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py", line 1400, in connect
server_hostname=server_hostname)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/ssl.py", line 401, in wrap_socket
_context=self, _session=session)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/ssl.py", line 808, in __init__
self.do_handshake()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/ssl.py", line 1061, in do_handshake
self._sslobj.do_handshake()
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/ssl.py", line 683, in do_handshake
self._sslobj.do_handshake()
ssl.SSLError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:749)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/Daniel/Desktop/betbrain-scraper-master 2/betbrain.py", line 71, in <module>
main()
File "/Users/Daniel/Desktop/betbrain-scraper-master 2/betbrain.py", line 22, in main
html = getHtml(sys.argv)
File "/Users/Daniel/Desktop/betbrain-scraper-master 2/betbrain.py", line 30, in getHtml
return scrap(DEFAULT_URL)
File "/Users/Daniel/Desktop/betbrain-scraper-master 2/betbrain.py", line 41, in scrap
return opener.open(url)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 526, in open
response = self._open(req, data)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 544, in _open
'_open', req)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 504, in _call_chain
result = func(*args)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 1361, in https_open
context=self._context, check_hostname=self._check_hostname)
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py", line 1320, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:749)>
Comment puis-je résoudre ce problème? J'exécute Python 3.6.0 sur MacOS 10.12.1
Ouvrez un terminal et regardez:
/Applications/Python 3.6/Install Certificates.command
Python 3.6 sur MacOS utilise une version intégrée d'OpenSSL, qui n'utilise pas le magasin de certificats système. Plus de détails ici .
(Pour être explicite: les utilisateurs MacOS peuvent probablement résoudre le problème en ouvrant le Finder et en double-cliquant sur Installer Certificates.command )
J'ai exécuté ceci sur MacOS /Applications/Python\ 3.6/Install\ Certificates.command
Sur CentOS Linux, Python3.6, j'ai édité ce fichier (faites d'abord une copie de sauvegarde)
/usr/lib/python3.6/site-packages/certifi/cacert.pem
jusqu'à la fin du fichier, j'ai ajouté mon certificat public à partir de mon fichier .pem . vous devriez pouvoir obtenir le fichier .pem auprès de votre fournisseur de certificat SSL.
Voici comment je l'ai corrigé:
nltk.download()
. La fenêtre graphique de téléchargement s’ouvre et tous les paquetages sont installés.