mysql - Error using utf-8 filenames in python script -


i have seemingly impossible conundrum , hope guys can point me in right direction. have been coming , leaving project weeks , think time solve it, hopefully.

i making script supposed read bunch of .xls excel files directory structure, parse contents , load mysql database. now, in main function, list of (croatian) file names gets passed xlrd, , problem lies.

the environment date freebsd 9.1.

i following error when executing script:

mars:~/20130829> python megascript.py  python version: 2.7.5  filesstem encoding is: utf-8 removing error.log if exists... doesn't. done! connecting database... done! mysql database version: 5.6.13 loading pilots... done! loading tehnicians... done! loading aircraft registrations... done! loading file list... done! processing files... /2006/1_siječanj.xls traceback (most recent call last):   file "megascript.py", line 540, in <module>     main()   file "megascript.py", line 491, in main     data = readxlsfile(files, 'upis', piloti, tehnicari, helikopteri)   file "megascript.py", line 129, in readxlsfile     workbook = open_workbook(f)   file "/usr/local/lib/python2.7/site-packages/xlrd-0.9.2-py2.7.egg/xlrd/__init__.py",     line 394, in open_workbook     f = open(filename, "rb") ioerror: [errno 2] no such file or directory: u'/2006/1_sije\u010danj.xls' 

i have included complete output make code fow easier follow. suppose problem in xlrd not accepting utf-8 file list. i'm not sure how around without messing around xlrd code though. ideas?

here goes code:

#! /usr/bin/env/python # -#*- coding: utf-8 -*- import os, sys, getopt, codecs, csv, mysqldb, platform mmap import mmap,access_read xlrd import open_workbook, xldate_as_tuple  # define constants  nalet_out = '' putnici_out = '' db_host = 'localhost' db_user = 'user' db_pass = 'pass' db_database = 'eth' start_dir = u'./' error_file = start_dir + 'mega_error.log'  # functions     def isnumber(s): # check if string number     try:         float(s)         return true     except valueerror:         return false  def getmonth(f): # izvuci mjesec iz imena datoteke u formatu "1_sijecanj.xls"      temp = os.path.basename(f)     temp = temp.split('_')     mjesec = int(temp[0])     return mjesec  def getyear(f): # izvuci godinu iz path     f = f.split('/')     godina = f[-2]     return godina  def databaseversion(cur): # print mysql database version     try:         cur.execute("select version()")         result = cur.fetchone()     except mysqldb.error, e:         try:             print "mysql error [%d]: %s]" % (e.args[0], e.args[1])         except indexerror:             print "mysql  error: %s" % (e.args[0], e.args[1])      print "mysql database version: %s" % result  def getquery(cur, sql_query): # perform passed query on passed database     try:         cur.execute(sql_query)         result = cur.fetchall()     except mysqldb.error, e:         try:             print "mysql error [%d]: %s]" % (e.args[0], e.args[1])         except indexerror:             print "mysql  error: %s" % (e.args[0], e.args[1])     return result  def getfiles():      files = []      # find subdirectories     in [x[0] x in os.walk(start_dir)]:          if (i != '.' , isnumber(os.path.basename(i))):              # find files in subdirectories             j in [y[2] y in os.walk(i)]:                  # every file in file list                 y in j:                     fn, fe = os.path.splitext(y)                     is_mj = fn.split("_")                     if(fe == '.xls' , y.find('_') , isnumber(is_mj[0])):                         mj = fn.split('_')                         files.append(i.lstrip('.') + "/" + y)      # sort list cronologically     files.sort(key=lambda x: getmonth(x))     files.sort(key=lambda x: getyear(x))      return files  def errhandle(f, datum, var, vrijednost, ispravka = "null"): # error information, print on screen , write error.log      f = unicode(str(f), 'utf-8')     datum = unicode(str(datum), 'utf-8')     var = unicode(str(var), 'utf-8')     try:         vrijednost = unicode(str(vrijednost.decode('utf-8')), 'utf-8')     except unicodeencodeerror:         vrijednost = vrijednost     ispravka = unicode(str(ispravka), 'utf-8')      err_f = codecs.open(error_file, 'a+', 'utf-8')     line = f + ": " + datum + " " + var + "='" + vrijednost\                     + "' ispravka='" + ispravka + "'"      #print "%s" % line      err_f.write(line)     err_f.close()  def readxlsfile(files, sheet, piloti, tehnicari, helikopteri): # read xls file , return list of rows      data = []     nalet = []     putn = []     id_index = 0      # every file in list     f in files:         print "%s" % f         temp = f.split('/')         godina = str(temp[-2])         temp = os.path.basename(f).split('_')         mjesec = str(temp[0])          workbook = open_workbook(f)         sheet = workbook.sheet_by_name('upis')          # every row doesn't contain '' or 'posada' or 'dan' etc...         ri in range(sheet.nrows):             if sheet.cell(ri,1).value!=''\                , sheet.cell(ri,2).value!='posada'\                , sheet.cell(ri,1).value!='dan'\                , (sheet.cell(ri,2).value!=''):                  temp = sheet.cell(ri, 1).value                 temp = temp.split('.')                 dan = temp[0]                  # datum                 datum = "'" + godina + "-" + mjesec + "-" + dan + "'"                  # kapetan                 kapetan = ''                 kapi=''                 if sheet.cell(ri, 2).value == "":                     kapetan = "null"                 else:                     kapetan = sheet.cell(ri, 2).value                     if kapetan[-1:] == " ":                         errhandle(f, datum, 'kapetan', kapetan, kapetan[-1:])                         kapetan = kapetan[:-1]                     if(kapetan):                         try:                             kapi = [x[0] x in piloti if x[2].lower() == kapetan]                             kapi = kapi[0]                         except valueerror:                             errhandle(f, datum, 'kapetan', kapetan, '')                             kapetan = ''                         except indexerror:                             errhandle(f, datum, 'kapetan', kapetan, '')                             kapi = 'null'                     else:                         kapi="null"                  # kopilot                 kopilot = ''                 kopi = ''                 if sheet.cell(ri, 3).value == "":                     kopi = "null"                 else:                     kopilot = sheet.cell(ri, 3).value                     if kopilot[-1:] == " ":                         errhandle(f, datum,'kopilot', kopilot,\                                   kopilot[:-1])                     if(kopilot):                         try:                             kopi = [x[0] x in piloti if x[2].lower() == kopilot]                             kopi = kopi[0]                         except valueerror:                             errhandle(f, datum,'kopilot', kopilot, '')                         except indexerror:                             errhandle(f, datum, 'kopilot', kopilot, '')                             kopi = 'null'                     else:                         kopi="null"                  # teh 1                 teh1 = ''                 t1i = ''                 if sheet.cell(ri, 4).value=='':                     t1i = 'null'                 else:                     teh1 = sheet.cell(ri, 4).value                     if teh1[-1:] == " ":                         errhandle(f, datum,'teh1', teh1, teh1[:-1])                         teh1 = 'null'                     if(teh1):                         try:                             t1i = [x[0] x in tehnicari if x[2].lower() == teh1]                             t1i = t1i[0]                         except valueerror:                             errhandle(f, datum,'teh1', teh1, '')                         except indexerror:                             errhandle(f, datum, 'teh1', teh1, '')                             t1i = 'null'                         else:                             t1i="null"                  # teh 2                 teh2=''                 t2i=''                 if sheet.cell(ri, 5).value=='':                     t2i = "null"                 else:                     teh2 = sheet.cell(ri, 5).value                     if teh2[-1:] == " ":                         errhandle(f, datum,'teh2', teh2, teh2[-1:])                         teh2 = ''                     if(teh2):                         try:                             t2i = [x[0] x in tehnicari if x[2].lower() == teh2]                             t2i = t2i[0]                         except valueerror:                             errhandle(f, datum,'teh2', teh2, 'null')                             t2i = 'null'                         except indexerror:                             errhandle(f, datum,'teh2', teh2, 'null')                             t2i = 'null'                     else:                         t2i="null"                  # oznaka                 oznaka = ''                 heli = ''                 if sheet.cell(ri, 6).value=="":                     oznaka = errhandle(f, datum, "helikopter", oznaka, "")                 else:                     oznaka = str(int(sheet.cell(ri, 6).value))                     try:                         heli = [x[0] x in helikopteri if x[0] == oznaka]                     except valueerror:                         errhandle(f, datum, 'helikopter', oznaka, '')                     except indexerror:                         errhandle(f, datum, 'helikopter', oznaka, '')                         heli = ''                  # uvjeti                 uvjeti = sheet.cell(ri, 9).value                  # letova                 letova_dan = 0                 letova_noc = 0                 letova_ifr = 0                 letova_sim = 0                 if sheet.cell(ri, 7).value == "":                     errhandle(f, datum, 'letova', letova, '')                 else:                     letova = str(int(sheet.cell(ri, 7).value))                  if uvjeti=="vfr":                     letova_dan = letova                 elif uvjeti=="ifr":                     letova_ifr = letova                 elif uvjeti=="sim":                     letova_sim = letova                 else:                     letova_noc = letova                  #block time                 bt_dan = "'00:00:00'"                 bt_noc = "'00:00:00'"                 bt_ifr = "'00:00:00'"                 bt_sim = "'00:00:00'"                 try:                     bt_tpl = xldate_as_tuple(sheet.cell(ri, 8).value, workbook.datemode)                     bt_m = bt_tpl[4]                     bt_h = bt_tpl[3]                     bt = "'" + str(bt_h).zfill(2)+":"+str(bt_m)+":00'"                 except valueerror or indexerror:                     errhandle(f, datum, 'bt', sheet.cell(ri,8).value, '')                 if uvjeti[:3]=="vfr":                     bt_dan = bt                 elif uvjeti[:3]=="ifr":                     bt_ifr = bt                 elif uvjeti[:3]=="sim":                     bt_sim = bt                 elif uvjeti[:2] == "no":                     bt_noc = bt                 else:                     errhandle(f, datum, 'uvjeti', uvjeti, '')                  # vrsta leta                 vrsta = "'" + sheet.cell(ri, 10).value + "'"                  # vjezba                 vjezba = 'null';                 try:                     vjezba = sheet.cell(ri, 11).value                     if vjezba == '':                         # many results                         #errhandle(f, datum, 'vjezba', vjezba, '')                         vjezba = 'null'                     if vjezba == "?":                         errhandle(f, datum, 'vjezba', str(vjezba), '')                         vjezba = 'null'                     if str(vjezba) == 'i':                         errhandle(f, datum, 'vjezba', str(vjezba), '')                         vjezba = 'null'                     if str(vjezba)[-1:] == 'i':                         errhandle(f, datum, 'vjezba', str(vjezba),\                         str(vjezba).rstrip('i'))                         vjezba = str(vjezba).rstrip('i')                     if str(vjezba).find(' ') != -1:                         errhandle(f, datum, 'vjezba', str(vjezba), str(vjezba).split(' ')[0])                         vjezba = str(vjezba).split(' ')                         vjezba = vjezba[0]                     if str(vjezba)[-1:] == 'm':                         errhandle(f, datum, 'vjezba', str(vjezba), str(vjezba).rstrip('m'))                         vjezba = str(vjezba).rstrip('m')                     if str(vjezba).find(';') != -1:                         errhandle(f, datum, 'vjezba', str(vjezba), str(vjezba).split(';')[0])                         temp = str(vjezba).split(';')                         vjezba = temp[0]                     if str(vjezba).find('/') != -1:                         errhandle(f, datum, 'vjezba', str(vjezba), str(vjezba).split('/')[0])                         temp = str(vjezba).split('/')                         vjezba = temp[0]                     if str(vjezba).find('-') != -1:                         errhandle(f, datum, 'vjezba', str(vjezba), str(vjezba).split('-')[0])                         temp = str(vjezba).split('-')                         vjezba = temp[0]                     if str(vjezba).find(',') != -1:                         errhandle(f, datum, 'vjezba', str(vjezba), str(vjezba).split(',')[0])                         temp = str(vjezba).split(',')                         vjezba = temp[0]                     if str(vjezba).find('_') != -1:                         errhandle(f, datum, 'vjezba', str(vjezba), str(vjezba).split('_')[0])                         temp = str(vjezba).split('_')                         vjezba = temp[0]                     if str(vjezba) == 'bo':                         errhandle(f, datum, 'vjezba', str(vjezba), '')                         vjezba = 'null'                     if str(vjezba).find(' ') != -1:                         if str(vjezba) == 'pp 300':                             errhandle(f, datum, 'vjezba', str(vjezba), str(vjezba).split(' ')[1])                             temp = str(vjezba).split(' ')                             vjezba = temp[1]                         else:                             errhandle(f, datum, 'vjezba', str(vjezba), str(vjezba).split(' ')[0])                             temp = str(vjezba).split(' ')                             vjezba = temp[0]                     if str(vjezba) == 'pp':                         errhandle(f, datum, 'vjezba', str(vjezba), '')                         vjezba = ''                 except unicodeencodeerror:                     errhandle(f, datum, 'unicode error! vjezba', vjezba, '')                  if vjezba != 'null':                     vjezba = int(float(vjezba))                  # visinska slijetanja                  # putnici                 vp1 = str(sheet.cell(ri, 12).value)                 bp1 = str(sheet.cell(ri, 13).value)                 vp2 = str(sheet.cell(ri, 14).value)                 bp2 = str(sheet.cell(ri, 15).value)                  # teret                 teret = ''                 teret = str(sheet.cell(ri, 16).value)                 if teret == '':                     teret = 0                  # baja                 baja = ''                 if sheet.cell(ri, 17).value == '':                     baja = 0                 else:                     baja = int(sheet.cell(ri, 17).value) / 2 # dodano /2 da se dobiju tone    # redosljed csv                 id_index = id_index + 1                  row = [id_index, datum, kapi, kopi, t1i, t2i, oznaka,\                        letova, letova_dan, letova_noc, letova_ifr,\                        letova_sim, bt, bt_dan, bt_noc, bt_ifr,\                        bt_sim, vrsta, vjezba, teret, baja]                  row = [str(i) in row]                 nalet.append(row)                  putn = []                 if bp1 != '':                     put = [id_index, vp1, bp1]                     putn.append(put)                 if bp2 != '':                     put = [id_index, vp2, bp2]                     putn.append(put)      data.append(nalet)     data.append(putn)     return data  def main():      # python version     print "\npython version: %s \n" % platform.python_version()      # print filesystem encoding     print "filesstem encoding is: %s" % sys.getfilesystemencoding()      # remove error file if exists     print "removing error.log if exists..."     try:         os.remove(error_file)         print "it did."     except oserror:         print "it doesn't."         pass     print "done!"      # connect database     print "connecting database..."     db = mysqldb.connect(db_host, db_user, db_pass, db_database,\                          use_unicode=true, charset='utf8')     cur=db.cursor()     print "done!"      # database version      databaseversion(cur)      # load pilots, tehnicians , helicopters db      print "loading pilots..."     sql_query = "select eth_osobnici.id, eth_osobnici.ime,\     eth_osobnici.prezime eth_osobnici right join \     eth_letacka_osposobljenja on eth_osobnici.id=\     eth_letacka_osposobljenja.id_osobnik \     eth_letacka_osposobljenja.vrsta_osposobljenja='kapetan' \     or eth_letacka_osposobljenja.vrsta_osposobljenja='kopilot'"      #piloti = []     #piloti = getquery(cur, sql_query)       piloti=[]     temp = []     temp = getquery(cur, sql_query)     row in temp:         piloti.append(row)     print "done!"      print "loading tehnicians..."     sql_query = "select eth_osobnici.id, eth_osobnici.ime,\     eth_osobnici.prezime eth_osobnici right join \     eth_letacka_osposobljenja on eth_osobnici.id=\     eth_letacka_osposobljenja.id_osobnik \     eth_letacka_osposobljenja.vrsta_osposobljenja='tehničar 1' \     or eth_letacka_osposobljenja.vrsta_osposobljenja='tehničar 2'"      tehnicari=[]     temp = []     temp = getquery(cur, sql_query)     row in temp:         tehnicari.append(row)     print "done!"      print "loading aircraft registrations..."     sql_query = "select id eth_helikopteri"      helikopteri=[]     temp = []     temp = getquery(cur, sql_query)     row in temp:         helikopteri.append(row)     print "done!"      # file names process     print "loading file list..."     files = getfiles()     print "done!"      # process files array     print "processing files..."     data = readxlsfile(files, 'upis', piloti, tehnicari, helikopteri)     print "done!"      # enter new information in database     result = 0      print "reseting database..."     sql_query = "delete eth_nalet"     cur.execute(sql_query)     db.commit()     sql_query = "alter table eth_nalet auto_increment=0"     cur.execute(sql_query)     db.commit()     print "done!"      print "loading data in 'eth_nalet'..."      row in data[0]:         sql_query = """insert eth_nalet (id, datum, kapetan,          kopilot, teh1, teh2, registracija, letova_uk, letova_dan,          letova_noc, letova_ifr, letova_sim, block_time, block_time_dan,          block_time_noc, block_time_ifr, block_time_sim, vrsta_leta,         vjezba, teret, baja) values (%s)""" % (", ".join(row))          cur.execute(sql_query)         db.commit()      print "done!"      print "loading data in 'eth_putnici'..."      row in data[1]:         sql_query = """insert eth_putnici (id_leta,         vrsta_putnika, broj_putnika) values (%s)""" % (", ".join(row))          cur.execute(sql_query)         db.commit()      print "done!"      # close database connection     print "closing database connection..."     if cur:         cur.close()     if db:         db.close()     print "database closed!"  if __name__ == '__main__':         main() 

i apologize not translating comments in code, old project of mine , tend make comments in english now. if needs explanation please fire away.

the funny thing if print file list screen, display fine. when passed xlrd don't seem in right format.

respectfully, me

i managed find error! wasn't due encoding error after all. logic error.

in function getfiles() stripped leading "." file list, , didn't strip "./" ought to. so, naturally file names "/2006/1_siječanj.xls" instead of "2006/1_siječanj.xls" should be. ioerror , not not unicodeencodeerror. , result of oversight script tried find absolute path instead of relative path.

well embarrassing. thank guys, hope post helps else pay more attention error types python throws @ us.


Comments

Popular posts from this blog

java.util.scanner - How to read and add only numbers to array from a text file -

rewrite - Trouble with Wordpress multiple custom querystrings -