python - BaseHttpServer returned code 404 with the Cyrillic alphabet -
i have following problem.
i used basehttpserver.
class reqhandler( basehttpserver.basehttprequesthandler): def __init__(self, request, client_address, server): basehttpserver.basehttprequesthandler.__init__( self, request, client_address, server ) def do_get(self ): self.performreq(self.path.decode('utf-8')) def performreq (self, req ): curdir = os.getcwd() fname = curdir + '/' + self.path[1:] try: self.send_response(200,"ok!") ext = os.path.splitext(self.path)[1] self.send_header('content', 'text/xml; charset=utf-8' ) self.end_headers() f = open(fname, 'rb') l in f: self.wfile.write(l) f.close() print 'file '+fname+" ok" except ioerror: print 'no file '+fname self.send_error(404) if __name__=='__main__': server = basehttpserver.httpserver( ('',8081), reqhandler ) print('server ok!') server.serve_forever()
if path file contains cyrillic.
http://localhost:8081/ТРА/Понедельник/Пн.doc)
i code 404.
thank you.
urls not encoded utf-8; url encoded. decode too, using urllib.urlunquote()
function:
from urllib import urlunquote self.performreq(unlunquote(self.path).decode('utf-8'))
demo:
>>> urllib import unquote >>> path = '/%d0%a2%d0%a0%d0%90/%d0%9f%d0%be%d0%bd%d0%b5%d0%b4%d0%b5%d0%bb%d1%8c%d0%bd%d0%b8%d0%ba/%d0%9f%d0%bd.doc' >>> unquote(path).decode('utf8') u'/\u0422\u0420\u0410/\u041f\u043e\u043d\u0435\u0434\u0435\u043b\u044c\u043d\u0438\u043a/\u041f\u043d.doc' >>> print unquote(path).decode('utf8') /ТРА/Понедельник/Пн.doc
Comments
Post a Comment