s3_migration_cluster_config.ini 4.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. ; configure file for s3_migration_cluster master and worker nodes
  2. [Basic]
  3. JobType = PUT
  4. # JobType = PUT | GET
  5. # PUT means EC2 is not in the same account as Destination Bucket.
  6. # GET means EC2 is not in the same account as Source Bucket.
  7. # PUT表示EC2跟目标S3不在一个Account,GET表示EC2跟源S3不在一个Account.
  8. table_queue_name = s3_migration_file_list
  9. # table_queue_name ,如果CloudFormation/CDK部署,这个值会被自动替换
  10. sqs_queue_name = s3_migration_file_list
  11. # sqs_queue_name,如果CloudFormation/CDK部署,这个值会被自动替换
  12. ssm_parameter_bucket = s3_migration_bucket_para
  13. # ssm_parameter_bucket ,如果CloudFormation/CDK部署,这个值会被自动替换
  14. ssm_parameter_credentials = s3_migration_credentials
  15. ;需要在ssm parameter store手工新建一个名为 "s3_migrate_credentials" 的 parameter(CloudFormation/CDK 没设这个)
  16. ;这个是跟EC2不在一个Account体系下的另一个Account的access_key
  17. ;例如EC2在Global,则这个是China Account access_key,反之EC2在中国,这就是Global Account
  18. ;EC2本Account的权限会从 IAM Role 获取。以下是这个参数的例子:
  19. ;{
  20. ; "aws_access_key_id": "your_aws_access_key_id",
  21. ; "aws_secret_access_key": "your_aws_secret_access_key",
  22. ; "region": "cn-northwest-1"
  23. ;}
  24. Des_bucket_default = hawkey999
  25. Des_prefix_default = s3-migration-test/
  26. # 默认目标桶信息是给S3新增文件自动触发SQS的场景,用来配置目标桶/前缀的。
  27. # 对于Jobsender扫描S3并派发Job的场景,不需要配置这两项。即使配置了,程序看到SQS消息里面有就会使用消息里面的目标桶/前缀
  28. [Mode]
  29. StorageClass = STANDARD
  30. # STANDARD|REDUCED_REDUNDANCY|STANDARD_IA|ONEZONE_IA|INTELLIGENT_TIERING|GLACIER|DEEP_ARCHIVE
  31. ResumableThreshold = 5
  32. # 单位MBytes,小于该值的文件,则开始传文件时不查S3上已有的Multipart Upload,不做断点续传,而直接覆盖,节省性能
  33. MaxRetry = 20
  34. # 单个Part上传失败后,最大重试次数, type = int
  35. MaxThread = 20
  36. # 单文件同时working的Thread进程数量, type = int
  37. MaxParallelFile = 5
  38. # 并行操作文件数量, type = int
  39. JobTimeout = 3500
  40. # 单个文件传输超时时间。Seconds 秒, sqs队列inVisibletime 设置为 3600,中间留了100秒间隔
  41. JobsenderCompareVersionId = False
  42. # True: When Jobsender compare source/destination bucket list, Jobsender get the destination versionId from DDB
  43. UpdateVersionId = False
  44. # True: When Worker start a new object job(multipart upload), head source s3 to get object new versionId
  45. # If you disable JobsenderCompareVersionId, the jobsender sends job with null versionId:
  46. # then you should enable this UpdateVersionId feature, to get the real versionId, not 'null'
  47. # so to avoid in special case transfer half new object half old object
  48. GetObjectWithVersionId = False
  49. # True: Worker get object with versionId
  50. # Even the object has no version, the versionId 'null' can still work for getting lastest object.
  51. # If Bucket owner only allow you to get object, but not allow to get object version, then need to disable this feature.
  52. [Debug]
  53. CleanUnfinishedUpload = False
  54. # 建议设置 False。遇到存在现有的未完成 multipart upload id 时,可以找出来进行续传。
  55. # True 启动传输文件时,自动清理掉 S3 上所有的未完成 multipart upload id 。也就是不做断点续传了。
  56. # 如果调整过ChunkSize 可以用这个功能把上传过的清理掉。
  57. LoggingLevel = INFO
  58. # 日志输出级别 WARNING | INFO | DEBUG
  59. # 日常建议设置为 WARNING
  60. LocalProfileMode = False
  61. # 一般设置为False
  62. # True: get credential from local profile; False: get from EC2 IAM Role
  63. # If debug on local, os type as Darwin, you don't have to set True, I will recognize it
  64. ifVerifyMD5Twice = False
  65. # 是否做两次的MD5校验
  66. # ifVerifyMD5Twice 为True则一个文件完成上传合并分片之后再次进行整个文件的ETag校验MD5。
  67. # 对于S3_TO_S3,该开关True会在断点续传的时候,也重新"下载"源文件中所有已传过的分片来计算MD5,但不会重新上传。
  68. # 所以启用该模式下,推荐 worker node 与源S3在同Region,否则延迟会较大
  69. # 该开关不影响每个分片上传时候的校验,即使为False也会校验每个分片MD5,但完成文件合并后则不再校验。
  70. # Danger!!! Don't change ChunkSize unless you well understand how to clean uploaded parts
  71. ChunkSize = 5
  72. # 单位:MBytes,文件分片大小,不小于5M,如果发现对于某个文件分片会多于10000个,本程序会自动针对这个文件先扩大ChunkSize_auto再分片
  73. # 不建议修改该参数。如修改,请务必手工清除所有已上传而又未完成文件合并的 Parts,否则:
  74. # 会导致文件错误。对于启用了ifVerifyMD5Twice为True的,会校验整个文件从而发现并重传。而没有打开的话则会导致最终文件是错误的。