sql.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. # Use of this source code is governed by a BSD-style
  2. # license that can be found in the LICENSE file.
  3. # Copyright 2019 The OSArchiver Authors. All rights reserved.
  4. """
  5. Implementation of CSV writer (SQL data -> SQL file)
  6. """
  7. import logging
  8. import re
  9. import pymysql
  10. from osarchiver.destination.file.base import Formatter
  11. class Sql(Formatter):
  12. """
  13. The class implement a formatter of SQL type which is able to convert a list
  14. of dict of data into one file of SQL statement
  15. """
  16. def get_handler(self, handler=None, file_to_handle=None):
  17. """
  18. Return a file handler if it already exists or create a new one
  19. """
  20. if handler not in self.handlers:
  21. self.handlers[handler] = {}
  22. self.handlers[handler]['file'] = file_to_handle
  23. self.handlers[handler]['fh'] = open(file_to_handle,
  24. 'w',
  25. encoding='utf-8')
  26. return self.handlers[handler]['fh']
  27. def write(self, database=None, table=None, data=None):
  28. """
  29. The write method which should be implemented because of ineherited
  30. Formatter class
  31. The name of the file is of the form <database>.<table>.sql
  32. The SQL statement is:
  33. INSERT INTO <database>.<table> (col1, col2, ... ) VALUES
  34. (val1, val2, ... )
  35. ON DUPLICATE KEY UPDATE <primary_key>=<primary_key>
  36. This will help in importing again a file without removing already
  37. inserted lines
  38. """
  39. destination_file = '{directory}/{db}.{table}.sql'.format(
  40. directory=self.directory, db=database, table=table)
  41. key = '{db}.{table}'.format(db=database, table=table)
  42. writer = self.get_handler(handler=key, file_to_handle=destination_file)
  43. lines = []
  44. primary_key = self.source.get_table_primary_key(database=database,
  45. table=table)
  46. for item in data:
  47. # Build columns insert part
  48. # iterate over keys or values of dict is consitent in python 3
  49. columns = '`' + '`, `'.join(item.keys()) + '`'
  50. # SQL scaping, None is changed to NULL
  51. values = [
  52. pymysql.escape_string(str(v)) if v is not None else 'NULL'
  53. for v in item.values()
  54. ]
  55. placeholders = "'" + "', '".join(values) + "'"
  56. # Remove the simple quote around NULL statement to be understood as
  57. # a MysQL NULL key word.
  58. placeholders = re.sub("'NULL'", "NULL", placeholders)
  59. # The SQL statement
  60. sql = "INSERT INTO {database}.{table} ({columns}) VALUES "\
  61. "({placeholders}) ON DUPLICATE KEY UPDATE {pk} = {pk};\n".\
  62. format(
  63. database=database,
  64. table=table,
  65. columns=columns,
  66. placeholders=placeholders,
  67. pk=primary_key
  68. )
  69. lines.append(sql)
  70. logging.info("%s formatter: writing %s lines in %s", self.name,
  71. len(data), destination_file)
  72. if not self.dry_run:
  73. writer.writelines(lines)
  74. else:
  75. logging.debug("[DRY RUN] No data writen in %s", destination_file)