Programing

iOS 4.0 iTunes 백업에서 Manifest.mbdb 파일을 구문 분석하는 방법

lottogame 2020. 9. 24. 08:11
반응형

iOS 4.0 iTunes 백업에서 Manifest.mbdb 파일을 구문 분석하는 방법


iOS 4.0에서 Apple은 백업 프로세스를 재 설계했습니다.

iTunes는 Manifest.plist 파일에 백업 파일과 관련된 파일 이름 목록을 저장하는 데 사용되었지만 iOS 4.0에서는이 정보를 Manifest.mbdb로 이동했습니다.

iOS 4.0 장치로 백업하고 ~ / Library / Application Support / MobileSync / Backup 폴더를 살펴보면이 파일의 예를 볼 수 있습니다 (가장 최근 날짜가있는 하위 폴더 내부를보십시오).

다음은 텍스트 편집기에서 파일이 어떻게 보이는지 보여주는 스크린 샷입니다.

대체 텍스트
(출처 : supercrazyawesome.com )

iOS 4.0 용 (무료) iPhone Backup Extractor 앱 ( http://supercrazyawesome.com )을 업데이트 할 수 있도록 Cocoa 애플리케이션으로 어떻게 구문 분석 합니까?


감사합니다, user374559 및 reneD-해당 코드와 설명은 매우 유용합니다.

일부 Python에서 정보를 구문 분석하고 Unix ls-l과 같은 형식으로 인쇄합니다.

#!/usr/bin/env python
import sys

def getint(data, offset, intsize):
    """Retrieve an integer (big-endian) and new offset from the current offset"""
    value = 0
    while intsize > 0:
        value = (value<<8) + ord(data[offset])
        offset = offset + 1
        intsize = intsize - 1
    return value, offset

def getstring(data, offset):
    """Retrieve a string and new offset from the current offset into the data"""
    if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF):
        return '', offset+2 # Blank string
    length, offset = getint(data, offset, 2) # 2-byte length
    value = data[offset:offset+length]
    return value, (offset + length)

def process_mbdb_file(filename):
    mbdb = {} # Map offset of info in this file => file info
    data = open(filename).read()
    if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file")
    offset = 4
    offset = offset + 2 # value x05 x00, not sure what this is
    while offset < len(data):
        fileinfo = {}
        fileinfo['start_offset'] = offset
        fileinfo['domain'], offset = getstring(data, offset)
        fileinfo['filename'], offset = getstring(data, offset)
        fileinfo['linktarget'], offset = getstring(data, offset)
        fileinfo['datahash'], offset = getstring(data, offset)
        fileinfo['unknown1'], offset = getstring(data, offset)
        fileinfo['mode'], offset = getint(data, offset, 2)
        fileinfo['unknown2'], offset = getint(data, offset, 4)
        fileinfo['unknown3'], offset = getint(data, offset, 4)
        fileinfo['userid'], offset = getint(data, offset, 4)
        fileinfo['groupid'], offset = getint(data, offset, 4)
        fileinfo['mtime'], offset = getint(data, offset, 4)
        fileinfo['atime'], offset = getint(data, offset, 4)
        fileinfo['ctime'], offset = getint(data, offset, 4)
        fileinfo['filelen'], offset = getint(data, offset, 8)
        fileinfo['flag'], offset = getint(data, offset, 1)
        fileinfo['numprops'], offset = getint(data, offset, 1)
        fileinfo['properties'] = {}
        for ii in range(fileinfo['numprops']):
            propname, offset = getstring(data, offset)
            propval, offset = getstring(data, offset)
            fileinfo['properties'][propname] = propval
        mbdb[fileinfo['start_offset']] = fileinfo
    return mbdb

def process_mbdx_file(filename):
    mbdx = {} # Map offset of info in the MBDB file => fileID string
    data = open(filename).read()
    if data[0:4] != "mbdx": raise Exception("This does not look like an MBDX file")
    offset = 4
    offset = offset + 2 # value 0x02 0x00, not sure what this is
    filecount, offset = getint(data, offset, 4) # 4-byte count of records 
    while offset < len(data):
        # 26 byte record, made up of ...
        fileID = data[offset:offset+20] # 20 bytes of fileID
        fileID_string = ''.join(['%02x' % ord(b) for b in fileID])
        offset = offset + 20
        mbdb_offset, offset = getint(data, offset, 4) # 4-byte offset field
        mbdb_offset = mbdb_offset + 6 # Add 6 to get past prolog
        mode, offset = getint(data, offset, 2) # 2-byte mode field
        mbdx[mbdb_offset] = fileID_string
    return mbdx

def modestr(val):
    def mode(val):
        if (val & 0x4): r = 'r'
        else: r = '-'
        if (val & 0x2): w = 'w'
        else: w = '-'
        if (val & 0x1): x = 'x'
        else: x = '-'
        return r+w+x
    return mode(val>>6) + mode((val>>3)) + mode(val)

def fileinfo_str(f, verbose=False):
    if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
    if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink
    elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file
    elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir
    else: 
        print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
        type = '?' # unknown
    info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % 
            (type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'], 
             f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
    if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
    for name, value in f['properties'].items(): # extra properties
        info = info + ' ' + name + '=' + repr(value)
    return info

verbose = True
if __name__ == '__main__':
    mbdb = process_mbdb_file("Manifest.mbdb")
    mbdx = process_mbdx_file("Manifest.mbdx")
    for offset, fileinfo in mbdb.items():
        if offset in mbdx:
            fileinfo['fileID'] = mbdx[offset]
        else:
            fileinfo['fileID'] = "<nofileID>"
            print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
        print fileinfo_str(fileinfo, verbose)

iOS 5에서는 Manifest.mbdx 파일이 제거되었습니다. 이 기사에서는 도메인과 경로가 Manifest.mbdb에 있고 ID 해시가 SHA1로 생성 될 수 있기 때문에 어쨌든 중복되었습니다.

다음은 galloglass의 코드 업데이트이므로 iOS 5 장치의 백업과 함께 작동합니다. 유일한 변경 사항은 process_mbdx_file ()을 제거하고 process_mbdb_file ()에 몇 줄을 추가하는 것입니다.

많은 앱과 파일을 사용하여 iPhone 4S 및 iPad 1의 백업으로 테스트했습니다.

#!/usr/bin/env python
import sys
import hashlib

mbdx = {}

def getint(data, offset, intsize):
    """Retrieve an integer (big-endian) and new offset from the current offset"""
    value = 0
    while intsize > 0:
        value = (value<<8) + ord(data[offset])
        offset = offset + 1
        intsize = intsize - 1
    return value, offset

def getstring(data, offset):
    """Retrieve a string and new offset from the current offset into the data"""
    if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF):
        return '', offset+2 # Blank string
    length, offset = getint(data, offset, 2) # 2-byte length
    value = data[offset:offset+length]
    return value, (offset + length)

def process_mbdb_file(filename):
    mbdb = {} # Map offset of info in this file => file info
    data = open(filename).read()
    if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file")
    offset = 4
    offset = offset + 2 # value x05 x00, not sure what this is
    while offset < len(data):
        fileinfo = {}
        fileinfo['start_offset'] = offset
        fileinfo['domain'], offset = getstring(data, offset)
        fileinfo['filename'], offset = getstring(data, offset)
        fileinfo['linktarget'], offset = getstring(data, offset)
        fileinfo['datahash'], offset = getstring(data, offset)
        fileinfo['unknown1'], offset = getstring(data, offset)
        fileinfo['mode'], offset = getint(data, offset, 2)
        fileinfo['unknown2'], offset = getint(data, offset, 4)
        fileinfo['unknown3'], offset = getint(data, offset, 4)
        fileinfo['userid'], offset = getint(data, offset, 4)
        fileinfo['groupid'], offset = getint(data, offset, 4)
        fileinfo['mtime'], offset = getint(data, offset, 4)
        fileinfo['atime'], offset = getint(data, offset, 4)
        fileinfo['ctime'], offset = getint(data, offset, 4)
        fileinfo['filelen'], offset = getint(data, offset, 8)
        fileinfo['flag'], offset = getint(data, offset, 1)
        fileinfo['numprops'], offset = getint(data, offset, 1)
        fileinfo['properties'] = {}
        for ii in range(fileinfo['numprops']):
            propname, offset = getstring(data, offset)
            propval, offset = getstring(data, offset)
            fileinfo['properties'][propname] = propval
        mbdb[fileinfo['start_offset']] = fileinfo
        fullpath = fileinfo['domain'] + '-' + fileinfo['filename']
        id = hashlib.sha1(fullpath)
        mbdx[fileinfo['start_offset']] = id.hexdigest()
    return mbdb

def modestr(val):
    def mode(val):
        if (val & 0x4): r = 'r'
        else: r = '-'
        if (val & 0x2): w = 'w'
        else: w = '-'
        if (val & 0x1): x = 'x'
        else: x = '-'
        return r+w+x
    return mode(val>>6) + mode((val>>3)) + mode(val)

def fileinfo_str(f, verbose=False):
    if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
    if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink
    elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file
    elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir
    else: 
        print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
        type = '?' # unknown
    info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % 
            (type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'], 
             f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
    if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
    for name, value in f['properties'].items(): # extra properties
        info = info + ' ' + name + '=' + repr(value)
    return info

verbose = True
if __name__ == '__main__':
    mbdb = process_mbdb_file("Manifest.mbdb")
    for offset, fileinfo in mbdb.items():
        if offset in mbdx:
            fileinfo['fileID'] = mbdx[offset]
        else:
            fileinfo['fileID'] = "<nofileID>"
            print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
        print fileinfo_str(fileinfo, verbose)

이 작업을 완료했습니다. 즉, Python 용 백업 디코더 라이브러리의 iOS 4 + iTunes 9.2 업데이트-http: //www.iki.fi/fingon/iphonebackupdb.py

그것은 내가 필요한 것을 거의 문서화하지 않지만 거기에서 아이디어를 자유롭게 복사 할 수 있습니다 ;-)

(적어도 내 백업으로 잘 작동하는 것 같습니다.)


여기에서 MBDB / MBDX 형식에 대한 정보와 간단한 설명을 찾을 수 있습니다.

http://code.google.com/p/iphonebackupbrowser/

이것은 백업 파일을 탐색하는 내 응용 프로그램입니다. iTunes 9.2와 함께 제공되는 새 파일의 형식을 문서화하려고했습니다.


이 파이썬 스크립트는 굉장합니다.

여기 내 Ruby 버전 (사소한 개선 포함)과 검색 기능이 있습니다. (iOS 5의 경우)

# encoding: utf-8
require 'fileutils'
require 'digest/sha1'

class ManifestParser
  def initialize(mbdb_filename, verbose = false)
    @verbose = verbose
    process_mbdb_file(mbdb_filename)
  end

  # Returns the numbers of records in the Manifest files.
  def record_number
    @mbdb.size
  end

  # Returns a huge string containing the parsing of the Manifest files.
  def to_s
    s = ''
    @mbdb.each do |v|
      s += "#{fileinfo_str(v)}\n"
    end
    s
  end

  def to_file(filename)
    File.open(filename, 'w') do |f|
      @mbdb.each do |v|
        f.puts fileinfo_str(v)
      end
    end
  end

  # Copy the backup files to their real path/name.
  # * domain_match Can be a regexp to restrict the files to copy.
  # * filename_match Can be a regexp to restrict the files to copy.
  def rename_files(domain_match = nil, filename_match = nil)
    @mbdb.each do |v|
      if v[:type] == '-' # Only rename files.
        if (domain_match.nil? or v[:domain] =~ domain_match) and (filename_match.nil? or v[:filename] =~ filename_match)
          dst = "#{v[:domain]}/#{v[:filename]}"
          puts "Creating: #{dst}"
          FileUtils.mkdir_p(File.dirname(dst))
          FileUtils.cp(v[:fileID], dst)
        end
      end
    end
  end

  # Return the filename that math the given regexp.
  def search(regexp)
    result = Array.new
    @mbdb.each do |v|
      if "#{v[:domain]}::#{v[:filename]}" =~ regexp
        result << v
      end
    end
    result
  end

  private
  # Retrieve an integer (big-endian) and new offset from the current offset
  def getint(data, offset, intsize)
    value = 0
    while intsize > 0
      value = (value<<8) + data[offset].ord
      offset += 1
      intsize -= 1
    end
    return value, offset
  end

  # Retrieve a string and new offset from the current offset into the data
  def getstring(data, offset)
    return '', offset + 2 if data[offset] == 0xFF.chr and data[offset + 1] == 0xFF.chr # Blank string
    length, offset = getint(data, offset, 2) # 2-byte length
    value = data[offset...(offset + length)]
    return value, (offset + length)
  end

  def process_mbdb_file(filename)
    @mbdb = Array.new
    data = File.open(filename, 'rb') { |f| f.read }
    puts "MBDB file read. Size: #{data.size}"
    raise 'This does not look like an MBDB file' if data[0...4] != 'mbdb'
    offset = 4
    offset += 2 # value x05 x00, not sure what this is
    while offset < data.size
      fileinfo = Hash.new
      fileinfo[:start_offset] = offset
      fileinfo[:domain], offset = getstring(data, offset)
      fileinfo[:filename], offset = getstring(data, offset)
      fileinfo[:linktarget], offset = getstring(data, offset)
      fileinfo[:datahash], offset = getstring(data, offset)
      fileinfo[:unknown1], offset = getstring(data, offset)
      fileinfo[:mode], offset = getint(data, offset, 2)
      if (fileinfo[:mode] & 0xE000) == 0xA000 # Symlink
        fileinfo[:type] = 'l'
      elsif (fileinfo[:mode] & 0xE000) == 0x8000 # File
        fileinfo[:type] = '-'
      elsif (fileinfo[:mode] & 0xE000) == 0x4000 # Dir
        fileinfo[:type] = 'd'
      else
        # $stderr.puts "Unknown file type %04x for #{fileinfo_str(f, false)}" % f['mode']
        fileinfo[:type] = '?'
      end
      fileinfo[:unknown2], offset = getint(data, offset, 4)
      fileinfo[:unknown3], offset = getint(data, offset, 4)
      fileinfo[:userid], offset = getint(data, offset, 4)
      fileinfo[:groupid], offset = getint(data, offset, 4)
      fileinfo[:mtime], offset = getint(data, offset, 4)
      fileinfo[:atime], offset = getint(data, offset, 4)
      fileinfo[:ctime], offset = getint(data, offset, 4)
      fileinfo[:filelen], offset = getint(data, offset, 8)
      fileinfo[:flag], offset = getint(data, offset, 1)
      fileinfo[:numprops], offset = getint(data, offset, 1)
      fileinfo[:properties] = Hash.new
      (0...(fileinfo[:numprops])).each do |ii|
        propname, offset = getstring(data, offset)
        propval, offset = getstring(data, offset)
        fileinfo[:properties][propname] = propval
      end
      # Compute the ID of the file.
      fullpath = fileinfo[:domain] + '-' + fileinfo[:filename]
      fileinfo[:fileID] = Digest::SHA1.hexdigest(fullpath)
      # We add the file to the list of files.
      @mbdb << fileinfo
    end
    @mbdb
  end

  def modestr(val)
    def mode(val)
      r = (val & 0x4) ? 'r' : '-'
      w = (val & 0x2) ? 'w' : '-'
      x = (val & 0x1) ? 'x' : '-'
      r + w + x
    end
    mode(val >> 6) + mode(val >> 3) + mode(val)
  end

  def fileinfo_str(f)
    return "(#{f[:fileID]})#{f[:domain]}::#{f[:filename]}" unless @verbose
    data = [f[:type], modestr(f[:mode]), f[:userid], f[:groupid], f[:filelen], f[:mtime], f[:atime], f[:ctime], f[:fileID], f[:domain], f[:filename]]
    info = "%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % data
    info += ' -> ' + f[:linktarget] if f[:type] == 'l' # Symlink destination
    f[:properties].each do |k, v|
      info += " #{k}=#{v.inspect}"
    end
    info
  end
end

if __FILE__ == $0
  mp = ManifestParser.new 'Manifest.mbdb', true
  mp.to_file 'filenames.txt'
end

나는 galloglas의 코드가 마음에 들었고 응용 프로그램별로 총 크기의 정렬 된 목록을 표시하도록 주요 기능을 변경했습니다.

verbose = True
if __name__ == '__main__':
    mbdb = process_mbdb_file("Manifest.mbdb")
    mbdx = process_mbdx_file("Manifest.mbdx")
    sizes = {}
    for offset, fileinfo in mbdb.items():
        if offset in mbdx:
            fileinfo['fileID'] = mbdx[offset]
        else:
            fileinfo['fileID'] = "<nofileID>"
            print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
        print fileinfo_str(fileinfo, verbose)
        if (fileinfo['mode'] & 0xE000) == 0x8000:
        sizes[fileinfo['domain']]= sizes.get(fileinfo['domain'],0) + fileinfo['filelen']
    for domain in sorted(sizes, key=sizes.get):
        print "%-60s %11d (%dMB)" % (domain, sizes[domain], int(sizes[domain]/1024/1024))

이렇게하면 어떤 응용 프로그램이 모든 공간을 차지하고 있는지 파악할 수 있습니다.


MBDB 파일 판독기의 Java 구현을 찾는 사람들을 위해 몇 가지가 있습니다.


Galloglass의 답변 덕분입니다. 이 코드는 Python 2.7에서 잘 작동합니다. 내가 말하고 싶은 것은 단 하나뿐입니다. manifest.mbdb 파일을 읽을 때 바이너리 모드를 사용해야합니다. 그렇지 않으면 모든 콘텐츠를 읽지는 않습니다.

또한 Python 3.4에서 코드가 작동하도록 약간 변경했습니다. 다음은 코드입니다.

#!/usr/bin/env python
import sys
import hashlib

mbdx = {}

def getint(data, offset, intsize):
    """Retrieve an integer (big-endian) and new offset from the current offset"""
    value = 0
    while intsize > 0:
        value = (value << 8) + data[offset]
        offset = offset + 1
        intsize = intsize - 1
    return value, offset

def getstring(data, offset):
    """Retrieve a string and new offset from the current offset into the data"""
    if chr(data[offset]) == chr(0xFF) and chr(data[offset + 1]) == chr(0xFF):
        return '', offset + 2  # Blank string
    length, offset = getint(data, offset, 2)  # 2-byte length
    value = data[offset:offset + length]
    return value.decode(encoding='latin-1'), (offset + length)

def process_mbdb_file(filename):
    mbdb = {}  # Map offset of info in this file => file info
    data = open(filename, 'rb').read()  # 'b' is needed to read all content at once
    if data[0:4].decode() != "mbdb": raise Exception("This does not look like an MBDB file")
    offset = 4
    offset = offset + 2  # value x05 x00, not sure what this is
    while offset < len(data):
        fileinfo = {}
        fileinfo['start_offset'] = offset
        fileinfo['domain'], offset = getstring(data, offset)
        fileinfo['filename'], offset = getstring(data, offset)
        fileinfo['linktarget'], offset = getstring(data, offset)
        fileinfo['datahash'], offset = getstring(data, offset)
        fileinfo['unknown1'], offset = getstring(data, offset)
        fileinfo['mode'], offset = getint(data, offset, 2)
        fileinfo['unknown2'], offset = getint(data, offset, 4)
        fileinfo['unknown3'], offset = getint(data, offset, 4)
        fileinfo['userid'], offset = getint(data, offset, 4)
        fileinfo['groupid'], offset = getint(data, offset, 4)
        fileinfo['mtime'], offset = getint(data, offset, 4)
        fileinfo['atime'], offset = getint(data, offset, 4)
        fileinfo['ctime'], offset = getint(data, offset, 4)
        fileinfo['filelen'], offset = getint(data, offset, 8)
        fileinfo['flag'], offset = getint(data, offset, 1)
        fileinfo['numprops'], offset = getint(data, offset, 1)
        fileinfo['properties'] = {}
        for ii in range(fileinfo['numprops']):
            propname, offset = getstring(data, offset)
            propval, offset = getstring(data, offset)
            fileinfo['properties'][propname] = propval
        mbdb[fileinfo['start_offset']] = fileinfo
        fullpath = fileinfo['domain'] + '-' + fileinfo['filename']
        id = hashlib.sha1(fullpath.encode())
        mbdx[fileinfo['start_offset']] = id.hexdigest()
    return mbdb

def modestr(val):
    def mode(val):
        if (val & 0x4):
            r = 'r'
        else:
            r = '-'
        if (val & 0x2):
            w = 'w'
        else:
            w = '-'
        if (val & 0x1):
            x = 'x'
        else:
            x = '-'
        return r + w + x
    return mode(val >> 6) + mode((val >> 3)) + mode(val)

def fileinfo_str(f, verbose=False):
    if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
    if (f['mode'] & 0xE000) == 0xA000:
        type = 'l'  # symlink
    elif (f['mode'] & 0xE000) == 0x8000:
        type = '-'  # file
    elif (f['mode'] & 0xE000) == 0x4000:
        type = 'd'  # dir
    else:
        print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
        type = '?'  # unknown
    info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" %
            (type, modestr(f['mode'] & 0x0FFF), f['userid'], f['groupid'], f['filelen'],
             f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
    if type == 'l': info = info + ' -> ' + f['linktarget']  # symlink destination
    for name, value in f['properties'].items():  # extra properties
        info = info + ' ' + name + '=' + repr(value)
    return info

verbose = True
if __name__ == '__main__':
    mbdb = process_mbdb_file(
        r"Manifest.mbdb")
    for offset, fileinfo in mbdb.items():
        if offset in mbdx:
            fileinfo['fileID'] = mbdx[offset]
        else:
            fileinfo['fileID'] = "<nofileID>"
            print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
        print(fileinfo_str(fileinfo, verbose))

참고 URL : https://stackoverflow.com/questions/3085153/how-to-parse-the-manifest-mbdb-file-in-an-ios-4-0-itunes-backup

반응형