Hu les gars,
Je suis nouveau sur python/anaconda/jupyter/numPy, panda, etc. Mais à chaque fois, je reçois une erreur HTTP 500 à la fin. Est-ce vraiment un problème de serveur (comme le suggère 500) ou est-ce que je fais quelque chose de mal?
Entrée dans jupyter:
from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original')
Résultat:
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
<ipython-input-1-15dc285fb373> in <module>()
1 from sklearn.datasets import fetch_mldata
----> 2 mnist = fetch_mldata('MNIST original')
e:\ProgramData\Anaconda3\lib\site-packages\sklearn\datasets\mldata.py in fetch_mldata(dataname, target_name, data_name, transpose_data, data_home)
140 urlname = MLDATA_BASE_URL % quote(dataname)
141 try:
--> 142 mldata_url = urlopen(urlname)
143 except HTTPError as e:
144 if e.code == 404:
e:\ProgramData\Anaconda3\lib\urllib\request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
221 else:
222 opener = _opener
--> 223 return opener.open(url, data, timeout)
224
225 def install_opener(opener):
e:\ProgramData\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
530 for processor in self.process_response.get(protocol, []):
531 meth = getattr(processor, meth_name)
--> 532 response = meth(req, response)
533
534 return response
e:\ProgramData\Anaconda3\lib\urllib\request.py in http_response(self, request, response)
640 if not (200 <= code < 300):
641 response = self.parent.error(
--> 642 'http', request, response, code, msg, hdrs)
643
644 return response
e:\ProgramData\Anaconda3\lib\urllib\request.py in error(self, proto, *args)
562 http_err = 0
563 args = (dict, proto, meth_name) + args
--> 564 result = self._call_chain(*args)
565 if result:
566 return result
e:\ProgramData\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
502 for handler in handlers:
503 func = getattr(handler, meth_name)
--> 504 result = func(*args)
505 if result is not None:
506 return result
e:\ProgramData\Anaconda3\lib\urllib\request.py in http_error_302(self, req, fp, code, msg, headers)
754 fp.close()
755
--> 756 return self.parent.open(new, timeout=req.timeout)
757
758 http_error_301 = http_error_303 = http_error_307 = http_error_302
e:\ProgramData\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
530 for processor in self.process_response.get(protocol, []):
531 meth = getattr(processor, meth_name)
--> 532 response = meth(req, response)
533
534 return response
e:\ProgramData\Anaconda3\lib\urllib\request.py in http_response(self, request, response)
640 if not (200 <= code < 300):
641 response = self.parent.error(
--> 642 'http', request, response, code, msg, hdrs)
643
644 return response
e:\ProgramData\Anaconda3\lib\urllib\request.py in error(self, proto, *args)
568 if http_err:
569 args = (dict, 'default', 'http_error_default') + orig_args
--> 570 return self._call_chain(*args)
571
572 # XXX probably also want an abstract factory that knows when it makes
e:\ProgramData\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
502 for handler in handlers:
503 func = getattr(handler, meth_name)
--> 504 result = func(*args)
505 if result is not None:
506 return result
e:\ProgramData\Anaconda3\lib\urllib\request.py in http_error_default(self, req, fp, code, msg, hdrs)
648 class HTTPDefaultErrorHandler(BaseHandler):
649 def http_error_default(self, req, fp, code, msg, hdrs):
--> 650 raise HTTPError(req.full_url, code, msg, hdrs, fp)
651
652 class HTTPRedirectHandler(BaseHandler):
HTTPError: HTTP Error 500: INTERNAL SERVER ERROR
J'ai également eu la même erreur et j'ai dû désactiver le pare-feu. Sur le Macbook, sélectionnez Préférences Système> Sécurité et confidentialité> Pare-feu> Désactiver le pare-feu.
from sklearn.datasets import fetch_mldata
try:
mnist = fetch_mldata('MNIST original')
except Exception as ex:
from six.moves import urllib
from scipy.io import loadmat
import os
mnist_path = os.path.join(".", "datasets", "mnist-original.mat")
# download dataset from github.
mnist_alternative_url = "https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat"
response = urllib.request.urlopen(mnist_alternative_url)
with open(mnist_path, "wb") as f:
content = response.read()
f.write(content)
mnist_raw = loadmat(mnist_path)
mnist = {
"data": mnist_raw["data"].T,
"target": mnist_raw["label"][0],
"COL_NAMES": ["label", "data"],
"DESCR": "mldata.org dataset: mnist-original",
}
print("Done!")
J'ai aussi la même erreur que toi. Voici quelques solutions possibles qui ne nécessitent pas ce serveur.
Si vous avez installé tensorflow
, vous pouvez obtenir les données MNIST de la manière suivante:
import tensorflow.examples.tutorials.mnist.input_data as input_data
m=input_data.read_data_sets("MNIST")
Alors, par exemple, len(m.train.images)
est 55000.
Si vous n'avez pas tensorflow, vous pouvez obtenir cet ensemble de données en suivant les instructions ici .
J'ai trouvé une bonne solution ici: https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py
Il télécharge le jeu de données du site Web de Yan LeCun ( http://yann.lecun.com/exdb/mnist/ ).
import os
from urllib import urlretrieve
def download(filename, source='http://yann.lecun.com/exdb/mnist/'):
print("Downloading %s" % filename)
urlretrieve(source + filename, filename)
# We then define functions for loading MNIST images and labels.
# For convenience, they also download the requested files if needed.
import gzip
def load_mnist_images(filename):
if not os.path.exists(filename):
download(filename)
# Read the inputs in Yann LeCun's binary format.
with gzip.open(filename, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=16)
# The inputs are vectors now, we reshape them to monochrome 2D images,
# following the shape convention: (examples, channels, rows, columns)
data = data.reshape(-1, 1, 28, 28)
# The inputs come as bytes, we convert them to float32 in range [0,1].
# (Actually to range [0, 255/256], for compatibility to the version
# provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.)
return data / np.float32(256)
def load_mnist_labels(filename):
if not os.path.exists(filename):
download(filename)
# Read the labels in Yann LeCun's binary format.
with gzip.open(filename, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=8)
# The labels are vectors of integers now, that's exactly what we want.
return data
X_train = load_mnist_images('train-images-idx3-ubyte.gz')
y_train = load_mnist_labels('train-labels-idx1-ubyte.gz')
X_test = load_mnist_images('t10k-images-idx3-ubyte.gz')
y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz')
Voici un autre emplacement pour télécharger le jeu de données MNIST (référencé depuis https://github.com/ageron/handson-ml/blob/master/03_classification.ipynb ).
from six.moves import urllib
from sklearn.datasets import fetch_mldata
try:
mnist = fetch_mldata('MNIST original')
except urllib.error.HTTPError as ex:
print("Could not download MNIST data from mldata.org, trying alternative...")
# Alternative method to load MNIST, if mldata.org is down
from scipy.io import loadmat
mnist_alternative_url = "https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat"
mnist_path = "./mnist-original.mat"
response = urllib.request.urlopen(mnist_alternative_url)
with open(mnist_path, "wb") as f:
content = response.read()
f.write(content)
mnist_raw = loadmat(mnist_path)
mnist = {
"data": mnist_raw["data"].T,
"target": mnist_raw["label"][0],
"COL_NAMES": ["label", "data"],
"DESCR": "mldata.org dataset: mnist-original",
}
print("Success!")
J'ai trouvé cette solution sur https://github.com/ageron/handson-ml/issues/7 et celle-ci m'a été très utile. Il suffit de télécharger le fichier à partir de https://github.com/amplab/datascience-sp14/raw/master/lab7/mldata/mnist-original.mat
après cela, utilisez ce script:
from scipy.io import loadmat
mnist_path = "my/local/path/mnist-original.mat" #type the directory where you want to the file is located
mnist_raw = loadmat(mnist_path)
mnist = {
"data": mnist_raw["data"].T,
"target": mnist_raw["label"][0],
"COL_NAMES": ["label", "data"],
"DESCR": "mldata.org dataset: mnist-original",
}
print("Success!")
ceci est pour python 3.6. *
import os
from urllib.request import urlretrieve
import numpy as np
def download(filename, source='http://yann.lecun.com/exdb/mnist/'):
print("Downloading %s" % filename)
urlretrieve(source + filename, filename)
# We then define functions for loading MNIST images and labels.
# For convenience, they also download the requested files if needed.
import gzip
def load_mnist_images(filename):
if not os.path.exists(filename):
download(filename)
# Read the inputs in Yann LeCun's binary format.
with gzip.open(filename, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=16)
# The inputs are vectors now, we reshape them to monochrome 2D images,
# following the shape convention: (examples, channels, rows, columns)
data = data.reshape(-1, 1, 28, 28)
# The inputs come as bytes, we convert them to float32 in range [0,1].
# (Actually to range [0, 255/256], for compatibility to the version
# provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.)
return data / np.float32(256)
def load_mnist_labels(filename):
if not os.path.exists(filename):
download(filename)
# Read the labels in Yann LeCun's binary format.
with gzip.open(filename, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=8)
# The labels are vectors of integers now, that's exactly what we want.
return data
X_train = load_mnist_images('train-images-idx3-ubyte.gz')
y_train = load_mnist_labels('train-labels-idx1-ubyte.gz')
X_test = load_mnist_images('t10k-images-idx3-ubyte.gz')
y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz')
En retard pour la fête, mais j'ai eu la même erreur et ma solution simple était d'exécuter les deux commandes séparément, comme:
from sklearn import datasets
et assurez-vous de l'exécuter sur une ligne séparée dans le cahier jupyter
mnist_data = datasets.fetch_mldata('MNIST original', data_home = 'datasets/')