#!/usr/bin/env python # -*- coding: iso-8859-15 -*- # # Copyright (c) 2007-2009 René Leonhardt, Germany. # Copyright (c) 2007 Hampus Wessman, Sweden. # # Website: http://code.google.com/p/metalink-library/ # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published # by the Free Software Foundation; either version 2.1 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA import binascii, datetime, glob, math, os, re, sys, time, urllib, urlparse, xml.dom from xml.dom.minidom import parse, Node from xml.sax.saxutils import escape # Globals current_version = "1.2" generator="Metalink Library %s" % current_version fs_encoding = sys.getfilesystemencoding() preference_ed2k = "95" verbose = None # Command-line options _opts = {} def usage_and_exit(error_msg=None, options=''): progname = os.path.basename(sys.argv[0]) stream = error_msg and sys.stderr or sys.stdout if error_msg: print >> stream, "ERROR: %s\n" % error_msg print >> stream, "Metalink Library %s by Rene Leonhardt and Hampus Wessman" % current_version if error_msg is False: sys.exit() params = [progname] * 14 params.append(options and "\n\nOptions:\n" + options or '') print >> stream, """ Usage: %s [FILE|DIRECTORY]... Create Metalink and BitTorrent files by parsing download files and mirror URLs. Helper files will be searched and parsed automatically: .metalink, .torrent, .mirrors, .md5, .sha1, .sha256 (sum, SUMS), .sig. Glob wildcard expressions are allowed for filenames (openproj-1.4*). Torrents will only be created for single files with chunks (parsed or scanned). Chunks will only be imported from single-file torrents. Automatic mirror parsing for SourceForge, Eclipse, MySQL and PostgreSQL URLs. Examples: # Parse file1, search helper files file1.* and generate file1.metalink. # In addition, create file1.torrent (if exists, create file1.torrent.new). %s file1 --create-torrent=http://linuxtracker.org/announce.php # Parse directory, search download and helper files *.* and generate # *.metalink for all non-helper files bigger than 1 MB. # First metalink file with no download file match will be the template # for download files with no corresponding metalink file. %s directory # Upgrade to new release with single metalink template. # In addition, generate gzip compressed files to save server bandwidth. %s --version=1.1 file-1.0.zip.metalink file-1.1* --compress-gzip # Update file-1.0*.metalink files with new version number 1.1, # parse file-1.1* and file-1.1*.torrent and generate file-1.1*.metalink. %s --version=1.1 file-1.0*.metalink # Update version number in existing SourceForge .metalink files (including filename and mirrors), # remove hashes and P2P mirrors and create phpMyAdmin-3.1.4-*.metalink. %s phpMyAdmin-3.1.3-*.metalink --version=3.1.4 # If the metalink files were generated without --version, you still can update the version # information in filenames and mirrors by providing the old version number. %s phpMyAdmin-3.1.3-*.metalink --version=3.1.4 --old-version=3.1.3 # After downloading, a BitTorrent file can easily be generated in a second command-line run: # Parse finished download, update .metalink with hashes, piece checksums and P2P links and create .torrent. %s openproj-1.4.zip.metalink --overwrite --create-torrent=http://example.tracker.org/announce # Define URL prefix to save the original .metalink download URL: # http://openoffice.org/url/prefix/file1.metalink %s http://openoffice.org/url/prefix/ file1 # Mirror mode: create openproj-1.4.zip.metalink with all SourceForge mirrors (static list, no download). %s --mirrors=http://downloads.sourceforge.net/openproj/openproj-1.4.zip # Or you can even use a direct download link (without mirror selection) %s --mirrors=http://superb-west.dl.sourceforge.net/sourceforge/openproj/openproj-1.4.zip # Download the complete mirror list and create cdt-master-6.0.0.zip.metalink. %s --mirrors="http://www.eclipse.org/downloads/download.php?file=/tools/cdt/releases/galileo/dist/cdt-master-6.0.0.zip" # Also works for Eclipse links: http://www.eclipse.org/downloads/download.php?file=/technology/epp/downloads/release/galileo/R/eclipse-php-galileo-win32.zip # Mirror files may contain preference and country values, i.e. ooop.mirrors: http://puzzle.dl.sourceforge.net/ooop/ 1 http://mesh.dl.sourceforge.net/ooop/ au ftp://optusnet.dl.sourceforge.net/sourceforge/o/oo/ooop/ be 100 http://belnet.dl.sourceforge.net/ooop/ 90 br http://ufpr.dl.sourceforge.net/ooop/ # Append filename automatically to mirrors ending with / and generate several metalink files: %s OOo_3.0.1_090128_Win32Intel_install.exe OOo_3.0.1_090128_Win32Intel_install_de.exe ooop.mirrors # Generated: OOo_3.0.1_090128_Win32Intel_install.exe.metalink # Generated: OOo_3.0.1_090128_Win32Intel_install_de.exe.metalink # http://mesh.dl.sourceforge.net/ooop/OOo_3.0.1_090128_Win32Intel_install.exe # Download and parse mirrors automatically, guess --filename and --filter from URL filename %s --mirrors=http://wwwmaster.postgresql.org/download/mirrors-ftp/source/v8.4.0/postgresql-8.4.0.tar.bz2 # Generated: postgresql-8.4.0.tar.bz2.metalink %s""" % tuple(params), sys.exit(error_msg and 1 or 0) def get_first(x): try: return x[0] except: return x def check_rfc822_date(date): if date.strip() != "": _date = re.sub(r' (\+0000|GMT|UTC?|Z)$', '', date) try: time.strptime(_date, "%a, %d %b %Y %H:%M:%S") except ValueError, e: return False return True def encode_text(text, encoding='utf-8'): return text.decode(fs_encoding).encode(encoding) def split_values(value_list, return_array=True, separator=',', separator2=''): '''Return splitted list of comma-separated value_list''' if not value_list or not isinstance(value_list, basestring): if return_array: return [] return value_list values = [] for value in value_list.split(separator): value = value.strip() if not value: continue if separator2: values.append(split_values(value, True, separator2)) elif value not in values: values.append(value) return values # Uses compression if available # HINT: Use httplib2 if possible def get_url(url): if not url: return '' import urllib2 headers = {'Accept-encoding': 'gzip;q=1.0, deflate;q=0.9, identity;q=0.5', 'User-agent': 'Mozilla/5.0 (X11; U; Linux x86_64; de; rv:1.9.1) Gecko/20090701 Ubuntu/9.04 (jaunty) Firefox/3.5'} req = urllib2.Request(url, '', headers) def uncompress(page): content = page.read() info = page.info() encoding = info.get("Content-Encoding") if encoding in ('gzip', 'x-gzip', 'deflate'): from cStringIO import StringIO if encoding == 'deflate': import zlib content = StringIO(zlib.decompress(content)).read() else: import gzip content = gzip.GzipFile(fileobj=StringIO(content)).read() #info['Content-Length'] = str(len(content)) #del info['Content-Encoding'] return content try: f = urllib2.urlopen(req) return uncompress(f) except Exception, e: # urllib2.URLError print >> sys.stderr, 'Download error:', e return '' def unique(seq, key=None, range=None): d = {} if key is not None: return [d.setdefault(e[key],range and e[range[0]:range[1]] or e) for e in seq if e[key] not in d] return [d.setdefault(e,e) for e in seq if e not in d] def generate_verification_and_resources(self, add_p2p=True, protocols=[], is_child=True): text = '' indentation = is_child and ' ' or ' ' # Verification if self.hashes.pieces or self.signature or self.hashes.has_one('ed2k md5 sha1 sha256'): text += indentation + ' ' + os.linesep # TODO: ed2k really allowed? for hash, value in sorted(self.hashes.get_multiple('ed2k md5 sha1 sha256').items()): text += '%s %s%s' % (indentation, hash, value.lower(), os.linesep) # TODO: Why len(self.pieces) > 1 ? if len(self.hashes.pieces): text += indentation + ' ' + os.linesep for id, piece in enumerate(self.hashes.pieces): text += indentation + ' '+piece+'' + os.linesep text += indentation + ' ' + os.linesep if self.signature.strip() != "": text += '%s %s%s' % (indentation, self.signature_type, self.signature, os.linesep) text += indentation + ' ' + os.linesep # Add missing P2P resources implicitly if hashes are available if add_p2p and 'ed2k' in self.hashes and self.size and getattr(self, 'filename', '') and 'ed2k' not in protocols: aich = 'aich' in self.hashes and ('h=' + self.hashes['aich'].upper() + '|') or '' url = "ed2k://|file|%s|%s|%s|%s/" % (urllib.quote(os.path.basename(self.filename)), self.size, self.hashes['ed2k'].upper(), aich) self.add_url(url, "ed2k", "", preference_ed2k, "", is_child) if add_p2p and ((self.size and getattr(self, 'filename', '')) or self.hashes.has_one('btih ed2k sha1')) and 'magnet' not in protocols: magnet = {} hashes = [] if getattr(self, 'filename', ''): magnet['dn'] = os.path.basename(self.filename) if self.size: magnet['xl'] = self.size if 'sha1' in self.hashes: hashes.append("urn:sha1:%s" % self.hashes['sha1'].upper()) if 'ed2k' in self.hashes: hashes.append("urn:ed2k:%s" % self.hashes['ed2k'].lower()) # Another way of including the ED2K hash: hashes.append("urn:ed2khash:%s" % self.hashes['ed2k'].lower()) # TODO: tiger-tree root hash: http://wiki.shareaza.com/static/MagnetsMakeAndUse # TODO: kzhash if magnet or hashes: params = urllib.urlencode(magnet) if hashes: params += (params and '&' or '') + 'xt=' + '&xt='.join(hashes) url = "magnet:?%s" % params self.add_url(url, "magnet", "", "90", "", is_child) if 'btih' in self.hashes: url = "magnet:?xt=urn:btih:%s" % self.hashes['btih'].upper() self.add_url(url, "magnet", "", "99", "", is_child) if self.resources: if getattr(self, 'maxconn_total', '') and "" != self.maxconn_total.strip() and "-" != self.maxconn_total.strip(): text += indentation + ' ' + os.linesep else: text += indentation + " " + os.linesep for res in self.resources: details = '' if res.location.strip() != "": details += ' location="'+res.location.lower()+'"' if res.preference.strip() != "": details += ' preference="'+res.preference+'"' if res.conns.strip() != "" and res.conns.strip() != "-" : details += ' maxconnections="'+res.conns+'"' text += '%s %s%s' % (indentation, res.type, details, escape(res.url), os.linesep) text += indentation + ' ' + os.linesep return text # return 0=no valid URL, 1=URL prefix, 2=normal URL def is_url(url): u = urlparse.urlparse(url, '', False) if not (u[0] and u[1] and u[2]): return 0 _is_url = u[0] in 'http https ftp ftps'.split() and u[1] and u[2] if not _is_url: return 0 return u[2][-1] == '/' and 1 or 2 # Create a gzip compressed file from data (use highest compression=9) def compress_file(outfile, data, read_file=False): try: import gzip zfile = gzip.GzipFile(outfile, 'wb') if read_file: infile = open(data, 'rb') data = infile.read() infile.close() zfile.write(data) zfile.close() return True except: return False def main(args=[]): global _opts, verbose if isinstance(args, basestring): args = [args] # Optional speed improvement try: import psyco psyco.full() except ImportError: pass # Read arguments and options optParser = OptParser(['compress-gzip','Generate gzip compressed files (.metalink.gz, .torrent.gz) in addition to save server bandwidth', 'create-torrent=sURLs','Create torrent with given tracker URLs (comma separates groups, space separates group members: "t1, t2a t2b")', 'create-torrent-only=sURLs','The same as --create-torrent, but do not generate Metalink files', 'directory|d=sDIRECTORY','Use output directory (instead of input file directory or current directory)', 'filename=sTEXT','Relative file path to append to mirror list', 'filter=sTEXT','Filter mirror URLs by a search text', 'filter-regex=sTEXT','Filter mirror URLs by a regular expression', 'filter-from=sTEXT','Parse mirror urls after (and including) regular expression', 'filter-to=sTEXT','Parse mirror urls until (and including) regular expression', 'mirrors=sURL|FILE','Parse URL or file for mirrors (special support for SourceForge and Eclipse)', 'old-version=sTEXT','Old version to replace by --version (only necessary if no old version information is available)', 'overwrite','Overwrite existing files (otherwise append .new)', 'preference=sTEXT','Set default preference (1-100), extended: http=80,ftp=90,bittorrent=100', 'recursive|R-sFILE PATTERN','Parse directories recursively', 'template|t=sFILE','Metalink template file', 'url-prefix=sURL','URL prefix (where metalink should be placed online)', 'verbose|v','Verbose output', 'V','Show program version and exit', 'help|h','Print this message and exit\n\nMetalink options:', 'changelog=sTEXT','Changelog', 'copyright=sTEXT','Copyright', 'description=sTEXT','Description', 'identity=sTEXT','Identity', 'language=sISO-CODE','ISO-639/3166 code of language (en-US)', 'license-name=sTEXT','Name of the license', 'license-url=sURL','URL of the license', 'logo=sURL','Logo URL', 'origin=sURL','Absolute or relative URL to this metalink file (online)', 'os=sOS','Operating system ("Source", "Linux-x86", ...)', 'publisher-name=sTEXT','Name of the publisher', 'publisher-url=sURL','URL of the publisher', 'refreshdate=sDATE','RFC 822 date of refresh (for type "dynamic")', 'releasedate=sDATE','RFC 822 date of release', 'screenshot=sURL','Screenshot(s) URL', 'tags=sTEXT','Comma-separated list of tags', 'type=sTEXT','Type of this metalink file ("dynamic" or "static")', 'upgrade=sTYPE','Upgrade type ("install", "uninstall, reboot, install" or "uninstall, install")', 'version=sTEXT','Version of the file']) _args = args + sys.argv[1:] _opts, args, stdin, errors = optParser.parse(_args) if _opts['verbose'] is not None: verbose = _opts['verbose'] if _opts['help'] or errors: usage_and_exit(os.linesep.join(errors), optParser.getHelp()) if _opts['V']: usage_and_exit(False) # Sanitize options # TODO: check rest of _opts _opts['tags'] = split_values(_opts['tags'], False) files = {} files_not_found = [] files_skipped = [] m = Metalink() _files = [] _directories = [] _hashes = {} _hashes_general = Hashes() _metalinks = {} _metalink_general = '' _mirrors = {} _mirrors_general = Mirrors() _signatures = {} _torrents = {} if _opts['template'] and os.path.isfile(_opts['template']): _files.append(_opts['template']) if _opts['url_prefix'] and 1 != is_url(_opts['url_prefix']): _opts['url_prefix'] = False if _opts['create_torrent_only']: _opts['create_torrent'] = split_values(_opts['create_torrent_only'], True, ',', ' ') elif _opts['create_torrent']: _opts['create_torrent'] = split_values(_opts['create_torrent'], True, ',', ' ') if _opts['mirrors'] and re.search(r'\s', _opts['mirrors']): _opts['mirrors'] = [_mirror for _mirror in re.split(r'\s+', _opts['mirrors']) if is_url(_mirror)] elif _opts['mirrors'] and not (is_url(_opts['mirrors']) or os.path.isfile(_opts['mirrors'])): _opts['mirrors'] = False if _opts['preference']: preferences = split_values(_opts['preference'], True, ',', '=') protocols = m.supported_protocols() p1 = [p for p in preferences if len(p) == 1 and p[0].isdigit() and 0 <= int(p[0]) <= 100] p2 = [p for p in preferences if len(p) == 2 and p[1].isdigit() and p[0] in protocols and 0 <= int(p[1]) <= 100] if p1 or p2: _opts['preference'] = {} if p1: _opts['preference'] = dict([ [p, p1[-1][0] ] for p in protocols]) for p in p2: _opts['preference'][p[0]] = p[1] else: _opts['preference'] = False # Search files and url_prefix for arg in args: if os.path.isdir(arg): _directories.append(arg) elif os.path.isfile(arg): file = os.path.realpath(arg) _files.append(file) # Search parallel helper files _files.extend(m.find_helper_files(file)) elif is_url(arg): if 1 == is_url(arg): if not _opts['url_prefix']: _opts['url_prefix'] = arg else: # Add mirror _mirrors_general.parse('', arg) else: # Try glob expression (wildcards) for file in glob.glob(arg): if os.path.isfile(file): _files.append(file) # Search parallel helper files _files.extend(m.find_helper_files(file)) elif os.path.isdir(file): _directories.append(arg) else: files_not_found.append(arg) if files_not_found: _set_opt('filename', files_not_found[0]) if _opts['recursive'] and not _directories: _directories.append('.') for directory in _directories: for root, dirs, filenames in os.walk(os.path.realpath(directory)): for file in [os.path.join(root, file) for file in filenames]: _files.append(file) # Search parallel helper files _files.extend(m.find_helper_files(file)) if not _opts['recursive']: break _files = unique(_files) # Categorize and filter files (hashes, mirrors, torrents, signatures) for file in _files: _file = os.path.basename(file) if _file.endswith('.metalink'): _metalinks[_file[:-9]] = file elif _file.endswith('.torrent'): _torrents[_file[:-8]] = file elif _file.endswith('.mirrors') or _file.lower() == 'mirrors': key = _file.lower() == 'mirrors' and _file or _file[:-8] _mirrors[key] = file elif m.hashes.is_hash_file(_file): hash_file = m.hashes.last_hash_file if hash_file not in _hashes: _hashes[hash_file] = {} if hash_file == _file: key = os.path.dirname(file) else: key = _file[len(hash_file)+1:] _hashes[hash_file][key] = file elif m.hashes.is_signature_file(_file): hash_file = m.hashes.last_hash_file if hash_file not in _signatures: _signatures[hash_file] = {} _signatures[hash_file][_file[len(hash_file)+1:]] = file _signatures[m.hashes.last_hash_file] = file elif os.stat(file).st_size > 1000000: files[_file] = file else: files_skipped.append(file) if files_skipped: files_skipped.sort() print >> sys.stderr, "Skipped the following files:\n%s" % "\n".join(files_skipped) # Metalink update mode if not files and len(_metalinks): if verbose is not False: print 'Metalink update mode (apply options and create torrents)' for filename, file in _metalinks.items(): m = Metalink(False) m.load_file(file, False) if _opts['version']: new_version = _opts['version'] else: new_version = m.version if _opts['old_version']: old_version = _opts['old_version'] else: old_version = m.version if old_version and old_version != new_version: new_file = (os.path.dirname(file) and os.path.dirname(file) + os.sep or '') + filename.replace(old_version, new_version) + '.metalink' else: new_file = file # Parse mirrors before changing filename local_file = new_file[:-9] mirrors = local_file + '.mirrors' if os.path.isfile(mirrors): m.parse_mirrors(mirrors) elif len(_mirrors) == 1: m.parse_mirrors(_mirrors.values().pop()) elif os.path.basename(local_file) in _mirrors: m.parse_mirrors(_mirrors[os.path.basename(local_file)]) elif _opts['mirrors']: if isinstance(_opts['mirrors'], list): m.parse_mirrors(data="\n".join(_opts['mirrors'])) elif is_url(_opts['mirrors']): m.parse_mirrors('', _opts['mirrors']) else: m.parse_mirrors(_opts['mirrors']) if old_version != new_version: m.change_filename(new_version, old_version, False) # Parse parallel files torrent = local_file + '.torrent' if os.path.isfile(torrent): m.parse_torrent(torrent) if os.path.isfile(local_file): m.scan_file(local_file) # Force current creation date (may be overwritten by command-line option afterwards) m.pubdate = '' m.apply_command_line_options() if os.path.isfile(new_file) and not _opts['overwrite']: new_file += '.new' m.generate(new_file) return # Mirror update mode if not files and len(_metalinks) == 1 and len(_mirrors) == 1: files[_metalinks.keys()[0]] = _metalinks.keys()[0] # Filter general help files for filename in set(_metalinks.keys()).difference(set(files.keys())): # TODO: Parse general metalink only once _metalink_general = _metalinks.pop(filename) break if files: for filename in set(_mirrors.keys()).difference(set(files.keys())): _mirrors_general.parse(_mirrors.pop(filename)) for filename in set(_hashes.keys()).difference(set(files.keys())): for file in _hashes[filename].values(): _hashes_general.parse(file) # Mirror download mode if len(_mirrors) == 1 and not _opts['mirrors']: _opts['mirrors'] = _mirrors.popitem()[1] if not files and _opts['mirrors']: if not files_not_found: files_not_found.append('') for filename in files_not_found: if filename: _set_opt('filename', filename) if isinstance(_opts['mirrors'], list): m.parse_mirrors(data="\n".join(_opts['mirrors'])) elif is_url(_opts['mirrors']): m.parse_mirrors('', _opts['mirrors']) else: m.parse_mirrors(_opts['mirrors']) local_file = m.file.mirrors.get_filename() if m.file.filename and not local_file: local_file = m.file.filename else: m.file.filename = local_file directory = _get_opt('directory') if directory: local_file = directory + os.sep + local_file torrent = local_file + '.torrent' if os.path.isfile(torrent): m.parse_torrent(torrent) if os.path.isfile(local_file): m.scan_file(local_file) m.generate(True) m.reset() return if not files: usage_and_exit(None, optParser.getHelp()) # 'No files to process' for filename, file in files.items(): if verbose is not False: print 'Processing %s' % file m = Metalink() # Parse metalink template if filename in _metalinks: m.load_file(_metalinks[filename]) elif _metalink_general: m.load_file(_metalink_general) # Force pubdate to be the current timestamp m.pubdate = '' # Overwrite old mirror filenames from template m.change_filename(filename) if filename in _mirrors: m.clear_res('http ftp https ftps') m.parse_mirrors(_mirrors[filename], '', '', False, True) # m.file.mirrors.change_filename(filename) elif _mirrors_general.mirrors: _mirrors_general.change_filename(filename) m.file.mirrors.add(_mirrors_general, True) elif _opts['mirrors']: if isinstance(_opts['mirrors'], list): m.parse_mirrors(data="\n".join(_opts['mirrors'])) elif is_url(_opts['mirrors']): m.parse_mirrors('', _opts['mirrors']) else: m.parse_mirrors(_opts['mirrors']) # Parse torrent files if filename in _torrents: m.parse_torrent(_torrents[filename]) elif len(_torrents) == len(files) == 1: m.parse_torrent(_torrents.values()[0]) # Parse signature file if filename in _signatures: m.import_signature(_signatures[filename]) # Parse hash files _hashes_general.set_file(file) m.file.hashes.update(_hashes_general) if filename in _hashes: m.file.hashes.files = _hashes[filename].values() m.file.hashes.parse_files() m.file.hashes.set_file(file) if os.path.isfile(file): # Scan file for remaining hashes m.scan_file(file) m.generate(True) class Resource(object): def __init__(self, url, type="default", location="", preference="", conns=""): self.errors = [] self.url = url self.location = location if type == "default" or type.strip() == "": if url.endswith(".torrent"): self.type = "bittorrent" else: chars = url.find(":") self.type = url[:chars] else: self.type = type self.preference = str(preference) if conns.strip() == "-" or conns.strip() == "": self.conns = "-" else: self.conns = conns def validate(self): if self.url.strip() == "": self.errors.append("Empty URLs are not allowed!") allowed_types = ["ftp", "ftps", "http", "https", "rsync", "bittorrent", "magnet", "ed2k"] if not self.type in allowed_types: self.errors.append("Invalid URL: " + self.url + '.') elif self.type in ['http', 'https', 'ftp', 'ftps', 'bittorrent']: m = re.search(r'\w+://.+\..+/.*', self.url) if m is None: self.errors.append("Invalid URL: " + self.url + '.') if self.location.strip() != "": iso_locations = ["AF", "AX", "AL", "DZ", "AS", "AD", "AO", "AI", "AQ", "AG", "AR", "AM", "AW", "AU", "AT", "AZ", "BS", "BH", "BD", "BB", "BY", "BE", "BZ", "BJ", "BM", "BT", "BO", "BA", "BW", "BV", "BR", "IO", "BN", "BG", "BF", "BI", "KH", "CM", "CA", "CV", "KY", "CF", "TD", "CL", "CN", "CX", "CC", "CO", "KM", "CG", "CD", "CK", "CR", "CI", "HR", "CU", "CY", "CZ", "DK", "DJ", "DM", "DO", "EC", "EG", "SV", "GQ", "ER", "EE", "ET", "FK", "FO", "FJ", "FI", "FR", "GF", "PF", "TF", "GA", "GM", "GE", "DE", "GH", "GI", "GR", "GL", "GD", "GP", "GU", "GT", "GG", "GN", "GW", "GY", "HT", "HM", "VA", "HN", "HK", "HU", "IS", "IN", "ID", "IR", "IQ", "IE", "IM", "IL", "IT", "JM", "JP", "JE", "JO", "KZ", "KE", "KI", "KP", "KR", "KW", "KG", "LA", "LV", "LB", "LS", "LR", "LY", "LI", "LT", "LU", "MO", "MK", "MG", "MW", "MY", "MV", "ML", "MT", "MH", "MQ", "MR", "MU", "YT", "MX", "FM", "MD", "MC", "MN", "ME", "MS", "MA", "MZ", "MM", "NA", "NR", "NP", "NL", "AN", "NC", "NZ", "NI", "NE", "NG", "NU", "NF", "MP", "NO", "OM", "PK", "PW", "PS", "PA", "PG", "PY", "PE", "PH", "PN", "PL", "PT", "PR", "QA", "RE", "RO", "RU", "RW", "SH", "KN", "LC", "PM", "VC", "WS", "SM", "ST", "SA", "SN", "RS", "SC", "SL", "SG", "SK", "SI", "SB", "SO", "ZA", "GS", "ES", "LK", "SD", "SR", "SJ", "SZ", "SE", "CH", "SY", "TW", "TJ", "TZ", "TH", "TL", "TG", "TK", "TO", "TT", "TN", "TR", "TM", "TC", "TV", "UG", "UA", "AE", "GB", "US", "UM", "UY", "UZ", "VU", "VE", "VN", "VG", "VI", "WF", "EH", "YE", "ZM", "ZW", "UK"] if not self.location.upper() in iso_locations: self.errors.append(self.location + " is not a valid country code.") if self.preference != "": try: pref = int(self.preference) if pref < 0 or pref > 100: self.errors.append("Preference must be between 0 and 100, not " + self.preference + '.') except: self.errors.append("Preference must be a number, between 0 and 100.") if self.conns.strip() != "" and self.conns.strip() != "-": try: conns = int(self.conns) if conns < 1: self.errors.append("Max connections must be at least 1, not " + self.conns + '.') elif conns > 20: self.errors.append("You probably don't want max connections to be as high as " + self.conns + '!') except: self.errors.append("Max connections must be a positive integer, not " + self.conns + ".") # TODO: Validate ed2k MD4/AICH and magnet SHA1 hash return len(self.errors) == 0 def __repr__(self): return '%s, %s, %s, %s' % (self.url, self.type, self.location, self.preference) class Metafile(object): def __init__(self): self.hashes = Hashes() self.mirrors = Mirrors() self.reset() def clear_res(self, types=''): if not types.strip(): self.resources = [] self.urls = [] else: _types = types.strip().split() self.resources = [res for res in self.resources if res.type not in _types] self.urls = [res.url for res in self.resources] def add_url(self, url, type="default", location="", preference="", conns="", add_to_child=True): if url not in self.urls: l = self.mirrors.parse_link(url, location, False, preference) if l: self.resources.append(Resource(l[0], l[1], l[2], l[3], conns)) self.urls.append(url) return True return False def add_res(self, res): if res.url not in self.urls: self.resources.append(res) self.urls.append(res.url) return True return False def import_signature(self, file): fp = open(file, "rb") self.signature = fp.read() fp.close() return True def scan_file(self, filename, use_chunks=True, max_chunks=255, chunk_size=256, progresslistener=None): if verbose: print "Scanning file..." # Filename and size self.filename = os.path.basename(filename) if not self.hashes.filename: self.hashes.filename = self.filename size = os.stat(filename).st_size self.size = str(size) known_hashes = self.hashes.get_multiple('ed2k md5 sha1 sha256') # If all hashes and pieces are already known, do nothing if 4 == len(known_hashes) and self.hashes.pieces: return True piecelength_ed2k = 9728000 # Force maximum size for piece checksums to 512 KiB: # http://en.wikipedia.org/wiki/BitTorrent_(protocol)#Creating_and_publishing_torrents maxlength = 524288 # Calculate piece length if use_chunks: minlength = chunk_size*1024 self.hashes.piecelength = 1024 while self.hashes.piecelength < maxlength and (size / self.hashes.piecelength > max_chunks or self.hashes.piecelength <= minlength): self.hashes.piecelength *= 2 if verbose: print "Using piecelength", self.hashes.piecelength, "(" + str(self.hashes.piecelength / 1024) + " KiB)" numpieces = size / self.hashes.piecelength if numpieces < 2: use_chunks = False hashes = {} # ADDED: MD4 for calculating ed2k hashes # TODO: AICH ed2k hashes (allow much better error recognition and repair, 180 KB pieces instead of 9500 KB) # Try to use hashlib try: import hashlib hashes['md4'] = hashlib.new('md4') hashes['md5'] = hashlib.md5() hashes['sha1'] = hashlib.sha1() hashes['sha256'] = hashlib.sha256() except: # Import deprecated modules import md5 import sha # Try old MD4 lib try: import Crypto.Hash.MD4 hashes['md4'] = Crypto.Hash.MD4.new() except: hashes['md4'] = None print >> sys.stderr, "Hashlib not available. No support for SHA-256%s" % (hashes['md4'] and "." or " and ED2K.") hashes['md5'] = md5.new() hashes['sha1'] = sha.new() hashes['sha256'] = None if hashes['md4']: md4piecehash = None if size > piecelength_ed2k: md4hash_copy = hashes['md4'].copy() md4piecehash = md4hash_copy.copy() length_ed2k = 0 sha1hash_copy = hashes['sha1'].copy() piecehash = sha1hash_copy.copy() piecenum = 0 length = 0 # If some hashes are already available, do not calculate them if 'ed2k' in known_hashes: known_hashes['md4'] = known_hashes['ed2k'] del known_hashes['ed2k'] for hash in known_hashes.keys(): hashes[hash] = None # TODO: Don't calculate pieces if already known self.hashes.pieces = [] if not self.hashes.piecetype: self.hashes.piecetype = "sha1" num_reads = math.ceil(size / 4096.0) reads_per_progress = int(math.ceil(num_reads / 100.0)) reads_left = reads_per_progress progress = 0 fp = open(filename, "rb") while True: data = fp.read(4096) if data == "": break # Progress updating if progresslistener: reads_left -= 1 if reads_left <= 0: reads_left = reads_per_progress progress += 1 result = progresslistener.Update(progress) if get_first(result) == False: if verbose: print "Cancelling scan!" return False # Process the data if hashes['md5']: hashes['md5'].update(data) if hashes['sha1']: hashes['sha1'].update(data) if hashes['sha256']: hashes['sha256'].update(data) left = len(data) if hashes['md4']: if md4piecehash: l = left numbytes_ed2k = 0 while l > 0: if length_ed2k + l <= piecelength_ed2k: if numbytes_ed2k: md4piecehash.update(data[numbytes_ed2k:]) else: md4piecehash.update(data) length_ed2k += l l = 0 else: numbytes_ed2k = piecelength_ed2k - length_ed2k md4piecehash.update(data[:numbytes_ed2k]) length_ed2k = piecelength_ed2k l -= numbytes_ed2k if length_ed2k == piecelength_ed2k: hashes['md4'].update(md4piecehash.digest()) md4piecehash = md4hash_copy.copy() length_ed2k = 0 else: hashes['md4'].update(data) while use_chunks and left > 0: if length + left <= self.hashes.piecelength: piecehash.update(data) length += left left = 0 else: numbytes = self.hashes.piecelength - length piecehash.update(data[:numbytes]) length = self.hashes.piecelength data = data[numbytes:] left -= numbytes if length == self.hashes.piecelength: if verbose: print "Done with piece hash", len(self.hashes.pieces) self.hashes.pieces.append(piecehash.hexdigest()) piecehash = sha1hash_copy.copy() length = 0 if use_chunks: if length > 0: if verbose: print "Done with piece hash", len(self.hashes.pieces) self.hashes.pieces.append(piecehash.hexdigest()) if verbose: print "Total number of pieces:", len(self.hashes.pieces) fp.close() if hashes['md4']: if md4piecehash and length_ed2k: hashes['md4'].update(md4piecehash.digest()) self.hashes['ed2k'] = hashes['md4'].hexdigest() for hash in 'md5 sha1 sha256'.split(): if hashes[hash]: self.hashes[hash] = hashes[hash].hexdigest() # TODO: Why len(self.pieces) < 2 ? if len(self.hashes.pieces) < 2: self.hashes.pieces = [] # Convert to string self.hashes.piecelength = str(self.hashes.piecelength) if verbose: print "done" if progresslistener: progresslistener.Update(100) return True def validate(self): for url in 'screenshot logo'.split(): if getattr(self, url).strip() != "": if not self.validate_url(getattr(self, url)): self.errors.append("Invalid URL: " + getattr(self, url) + '.') if not self.resources and not self.mirrors: self.errors.append("You need to add at least one URL!") for hash, length in {'md5':32, 'sha1':40, 'sha256':64}.items(): if hash in self.hashes: if re.match(r'^[0-9a-fA-F]{%d}$' % length, self.hashes[hash]) is None: self.errors.append("Invalid %s hash." % hash) if self.size.strip() != "": try: size = int(self.size) if size < 0: self.errors.append("File size must be at least 0, not " + self.size + '.') except: self.errors.append("File size must be an integer, not " + self.size + ".") if self.maxconn_total.strip() != "" and self.maxconn_total.strip() != "-": try: conns = int(self.maxconn_total) if conns < 1: self.errors.append("Max connections must be at least 1, not " + self.maxconn_total + '.') elif conns > 20: self.errors.append("You probably don't want max connections to be as high as " + self.maxconn_total + '!') except: self.errors.append("Max connections must be a positive integer, not " + self.maxconn_total + ".") if self.upgrade.strip() != "": if self.upgrade not in ["install", "uninstall, reboot, install", "uninstall, install"]: self.errors.append('Upgrade must be "install", "uninstall, reboot, install", or "uninstall, install".') return len(self.errors) == 0 def validate_url(self, url): if url.endswith(".torrent"): type = "bittorrent" else: chars = url.find(":") type = url[:chars] allowed_types = ["ftp", "ftps", "http", "https", "rsync", "bittorrent", "magnet", "ed2k"] if not type in allowed_types: return False elif type in ['http', 'https', 'ftp', 'ftps', 'bittorrent']: if re.search(r'\w+://.+\..+/.*', url) is None: return False return True def generate_file(self, add_p2p=True): if self.filename.strip() != "": text = ' ' + os.linesep else: text = ' ' + os.linesep # File info # TODO: relations for attr in 'identity size version language os changelog description logo mimetype releasedate screenshot upgrade'.split(): if "" != getattr(self, attr).strip(): text += " <%s>%s%s" % (attr, escape(getattr(self, attr)), attr, os.linesep) if self.tags: text += ' ' + ','.join(unique(self.tags)) + "" + os.linesep # Add mirrors for url, type, location, preference in self.mirrors.mirrors: # Add filename for relative urls if '/' == url[-1]: url += os.path.basename(self.filename) self.add_url(url, type, location, preference) text += generate_verification_and_resources(self, add_p2p, self.get_protocols()) text += ' ' + os.linesep return text # Return list of found resource types def get_protocols(self): found = {} for res in self.resources: if res.type not in found: found[res.type] = res.url return found # Call with filename or url def parse_torrent(self, filename='', url=''): torrent = Torrent(filename, url) torrent.parse() if not self.description: self.description = torrent.comment self.filename = torrent.files[0][0] self.size = str(torrent.files[0][1]) if not self.hashes.filename: self.hashes.filename = self.filename self.hashes['btih'] = torrent.infohash self.hashes.pieces = torrent.pieces self.hashes.piecelength = str(torrent.piecelength) self.hashes.piecetype = 'sha1' if url and not filename: self.add_url(url, "bittorrent", "", "100") return torrent.files # Call with filename, url or text def parse_mirrors(self, filename='', url='', data='', plain=False, remove_others=False): if filename: filename = filename.strip() if url: url = url.strip() if filename and filename.endswith('.torrent'): return self.parse_torrent(filename) elif url and url.endswith('.torrent'): return self.parse_torrent('', url) mirrors = Mirrors(filename, url) mirrors.parse('', '', data, plain) self.mirrors.add(mirrors, remove_others) # Call with filename, url or text def parse_hashes(self, filename='', url='', data='', force_type='', filter_name=''): hashes = Hashes(filename, url) if self.filename: hashes.filename = self.filename hashes.parse('', data, force_type, filter_name) # TODO: Better setting of dict key self.hashes.filename = hashes.filename self.hashes.update(hashes) def change_filename(self, new, old='', overwrite_filename=True): if overwrite_filename and not old: old = self.filename if not old or not new: return False self.filename = self.filename.replace(old, new) self.mirrors.change_filename(new, old) # Clear resources containing size and hashes self.clear_res('ed2k magnet') self.hashes.init() self.size = '' old = urllib.quote(old) new = urllib.quote(new) self.urls = [] for res in self.resources: res.url = res.url.replace(old, new) self.urls.append(res.url) return True def remove_other_mirrors(self, mirrors): _types = "bittorrent ed2k magnet".split() self.resources = [res for res in self.resources if res.type in _types or res.url in mirrors.urls] self.urls = [res.url for res in self.resources] self.mirrors.remove_other_mirrors(mirrors) def replace_hashes(self, hashes): old = hashes.filename hashes.filename = self.filename for hash, value in hashes.get_multiple('ed2k md5 sha1 sha256').items(): self.hashes[hash] = value hashes.filename = old def get_urls(self): return [res.url for res in self.resources] def reset(self): """Reset mutable attributes to allow object reuse""" self.changelog = "" self.description = "" self.filename = "" self.identity = "" self.language = "" self.logo = "" self.maxconn_total = "" self.mimetype = "" self.os = "" self.releasedate = "" self.screenshot = "" self.signature = "" self.signature_type = "" self.size = "" self.tags = [] self.upgrade = "" self.version = "" self.resources = [] self.urls = [] self.errors = [] self.hashes.reset() self.mirrors.reset() class Metalink(object): def __init__(self, overwrite_with_opts=True): self.hashes = Hashes() self.file = Metafile() self.reset(overwrite_with_opts) def apply_command_line_options(self): attrs = {} for opt in 'changelog copyright description filename_absolute generator identity license_name license_url logo origin pubdate publisher_name publisher_url refreshdate releasedate screenshot tags type upgrade version url_prefix os language'.split(): if _get_opt(opt): attrs[opt] = _opts[opt] self.setattrs(attrs) def create_torrent(self, torrent_trackers, torrent): if not self.file.filename: return ['file name must be non-empty'] if not self.file.size: return ['file size must be non-empty'] t = Torrent(torrent) data = {'comment':encode_text(self.description), 'files':[[encode_text(self.file.filename), int(self.file.size)]], 'piece length':int(self.file.hashes.piecelength), 'pieces':self.file.hashes.pieces, 'trackers':torrent_trackers, 'created by':generator, 'encoding':'UTF-8'} return t.create(data) def clear_res(self, types=''): self.file.clear_res(types) def add_url(self, url, type="default", location="", preference="", conns="", add_to_child=True): if add_to_child: return self.file.add_url(url, type, location, preference, conns) if url not in self.urls: l = self.file.mirrors.parse_link(url, location, False, preference) if l: self.resources.append(Resource(l[0], l[1], l[2], l[3], conns)) self.urls.append(url) return True return False def add_res(self, res): return self.file.add_res(res) def import_signature(self, file): return self.file.import_signature(file) def scan_file(self, filename, use_chunks=True, max_chunks=255, chunk_size=256, progresslistener=None): self.filename_absolute = filename return self.file.scan_file(filename, use_chunks, max_chunks, chunk_size, progresslistener) # TODO: get_errors() merges self errors and self.files errors def validate(self): for url in 'publisher_url license_url origin screenshot logo'.split(): if getattr(self, url).strip() != "": if not self.validate_url(getattr(self, url)): self.errors.append("Invalid %s%s: %s." % (url, url[-4:] != '_url' and ' URL' or '', getattr(self, url))) for d in 'pubdate refreshdate releasedate'.split(): if not check_rfc822_date(getattr(self, d)): self.errors.append("%s must be of format RFC 822: %s" % (d, getattr(self, d))) if self.type.strip() != "": if self.type.lower() not in ["dynamic", "static"]: self.errors.append("Type must be either dynamic or static.") if self.upgrade.strip() != "": if self.upgrade.lower().replace(' ', '') not in ["install", "uninstall,reboot,install", "uninstall,install"]: self.errors.append('Upgrade must be "install", "uninstall, reboot, install", or "uninstall, install".') valid_files = True for f in self.files: valid_files = f.validate() and valid_files return valid_files and len(self.errors) == 0 def get_errors(self): errors = self.errors for file in self.files: errors.extend(file.errors) return errors def validate_url(self, url): return self.file.validate_url(url) def generate(self, filename='', add_p2p=True): text = '' + os.linesep origin = "" if _get_opt('url_prefix'): self.url_prefix = _get_opt('url_prefix') if self.url_prefix: text += '%s' % (self.url_prefix, os.linesep) if not self.origin: if filename and filename is not True: metalink = os.path.basename(filename) else: metalink = os.path.basename(self.filename_absolute) if not metalink and _get_opt('filename'): metalink = os.path.basename(_get_opt('filename')) if not metalink.endswith('.metalink'): metalink += '.metalink' self.origin = self.url_prefix + metalink if self.origin.strip() != "": origin = 'origin="'+self.origin+'" ' pubdate = self.pubdate or time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime()) if 'dynamic' == self.type and self.refreshdate: refreshdate = '" refreshdate="' + self.refreshdate else: refreshdate = '' type = "" if self.type.strip() != "": type = 'type="'+self.type+'" ' _generator = self.generator and self.generator or generator text += '' + os.linesep text += self.generate_info() text += " " + os.linesep # Add multi-file torrent information text += generate_verification_and_resources(self, add_p2p, [], False) text_start = text text_end = ' ' + os.linesep text_end += '' text_files = '' for f in self.files: text = f.generate_file(add_p2p) text_files += text # TODO: Save separate .metalink for multi-file metalinks text = text_start + text_files + text_end try: data = text.encode('utf-8') except: data = text.decode('latin1').encode('utf-8') if filename: if filename is True: filename = (self.filename_absolute or self.file.filename or 'NEW') + '.metalink' elif not filename: filename = 'NEW.metalink' directory = _get_opt('directory') if directory: if not os.path.isdir(directory): print >> sys.stderr, 'ERROR: output directory %s does not exist' % directory return False filename = directory + os.sep + os.path.basename(filename) # Create backup if os.path.isfile(filename) and not _get_opt('overwrite'): filename += '.new' # os.rename(filename, filename + '.bak') compress_gzip = _get_opt('compress_gzip') if not _get_opt('create_torrent_only'): fp = open(filename, "wb") fp.write(data) fp.close() print '%s%s' % (verbose is not False and 'Generated: ' or '', filename) if compress_gzip: if compress_file(filename + '.gz', data): print '%s%s' % (verbose is not False and 'Generated: ' or '', filename + '.gz') create_torrent = _get_opt('create_torrent') if create_torrent: torrent = filename.endswith('.new') and filename[:-4] or filename torrent = (torrent.endswith('.metalink') and torrent[:-9] or torrent) + '.torrent' if os.path.isfile(torrent) and not _get_opt('overwrite'): torrent += '.new' _errors = self.create_torrent(create_torrent, torrent) if _errors: print 'ERROR while generating %s:\n%s' % (torrent, "\n".join(_errors)) elif compress_gzip: if compress_file(torrent + '.gz', torrent, True): print '%s%s' % (verbose is not False and 'Generated: ' or '', torrent + '.gz') return True return data def generate_info(self): text = "" # Publisher info if self.publisher_name.strip() != "" or self.publisher_url.strip() != "": text += ' ' + os.linesep if self.publisher_name.strip() != "": text += ' ' + self.publisher_name + '' + os.linesep if self.publisher_url.strip() != "": text += ' ' + self.publisher_url + '' + os.linesep text += ' ' + os.linesep # License info if self.license_name.strip() != "" or self.license_url.strip() != "": text += ' ' + os.linesep if self.license_name.strip() != "": text += ' ' + self.license_name + '' + os.linesep if self.license_url.strip() != "": text += ' ' + self.license_url + '' + os.linesep text += ' ' + os.linesep # Release info for attr in 'identity version copyright description logo releasedate screenshot upgrade changelog'.split(): if "" != getattr(self, attr).strip(): text += " <%s>%s%s" % (attr, escape(getattr(self, attr)), attr, os.linesep) if self.tags: text += ' ' + ','.join(unique(self.tags)) + "" + os.linesep return text def load_file(self, filename, overwrite_with_opts=True): try: doc = parse(filename) except: raise Exception("Failed to parse metalink file! Please select a valid metalink.") try: for attr in 'origin pubdate refreshdate type'.split(): setattr(self, attr, self.get_attribute(doc.documentElement, attr)) publisher = self.get_tag(doc, "publisher") if publisher is not None: self.publisher_name = self.get_tagvalue(publisher, "name") self.publisher_url = self.get_tagvalue(publisher, "url") license = self.get_tag(doc, "license") if license is not None: self.license_name = self.get_tagvalue(license, "name") self.license_url = self.get_tagvalue(license, "url") for attr in 'identity version copyright description logo releasedate screenshot upgrade changelog'.split(): setattr(self, attr, self.get_tagvalue(doc, attr)) self.tags = split_values(self.get_tagvalue(doc, "tags")) files = self.get_tag(doc, "files") if files is None: raise Exception("Failed to parse metalink. Found no tag.") metafiles = self.get_tag(files, "file", False) if metafiles is None: raise Exception("Failed to parse metalink. It must contain exactly one file description.") for index, file in enumerate(metafiles): if file.hasAttribute("name"): self.file.filename = file.getAttribute("name") for attr in 'identity size version language os changelog description logo mimetype releasedate screenshot upgrade'.split(): setattr(self.file, attr, self.get_tagvalue(file, attr)) # TODO: self.file.relations = self.get_tagvalue(file, "relations") if self.version == "": self.version = self.file.version self.file.tags = split_values(self.get_tagvalue(file, "tags")) self.file.hashes.filename = os.path.basename(self.file.filename) verification = self.get_tag(file, "verification") if verification is not None: signature = self.get_tag(verification, "signature") if signature is not None: # TODO: Support optional file="linux.sign" attribute self.file.signature = self.get_text(signature, False) self.file.signature_type = self.get_attribute(signature, "type") for hash in verification.getElementsByTagName("hash"): # TODO: Double check can be removed # TODO: Is ed2k hash really allowed? Used by Metalink Gen - http://metalink.packages.ro # TODO: Support the rest of allowed hash types: md4 sha384 sha512 rmd160 tiger crc32 if hash in verification.childNodes: if hash.hasAttribute("type"): if hash.getAttribute("type").lower() in "ed2k md5 sha1 sha256".split(): self.file.hashes[hash.getAttribute("type").lower()] = self.get_text(hash).lower() pieces = self.get_tag(verification, "pieces") if pieces is not None: if pieces.hasAttribute("type") and pieces.hasAttribute("length"): self.file.hashes.piecetype = pieces.getAttribute("type") self.file.hashes.piecelength = pieces.getAttribute("length") self.file.hashes.pieces = [] for hash in pieces.getElementsByTagName("hash"): self.file.hashes.pieces.append(self.get_text(hash).lower()) else: print >> sys.stderr, "Load error: missing attributes in " resources = self.get_tag(file, "resources") num_urls = 0 if resources is not None: self.file.maxconn_total = self.get_attribute(resources, "maxconnections") if self.file.maxconn_total.strip() == "": self.file.maxconn_total = "-" for resource in resources.getElementsByTagName("url"): type = self.get_attribute(resource, "type") location = self.get_attribute(resource, "location") preference = self.get_attribute(resource, "preference") conns = self.get_attribute(resource, "maxconnections") # TODO: Should get_text() result not be already stripped? url = self.get_text(resource).strip() self.add_url(url, type, location, preference, conns) num_urls += 1 if num_urls == 0: raise Exception("Failed to parse metalink. Found no URLs!") if index < len(metafiles) - 1: self.add_file() self.rewind() if overwrite_with_opts: self.apply_command_line_options() except xml.dom.DOMException, e: raise Exception("Failed to load metalink: " + str(e)) finally: doc.unlink() def get_attribute(self, element, attribute): if element.hasAttribute(attribute): return element.getAttribute(attribute) return "" def get_tagvalue(self, node, tag): nodelist = node.getElementsByTagName(tag) if len(nodelist): return self.get_text(nodelist[0]) return "" # TODO: Rename only_first if unclear def get_tag(self, node, tag, only_first=True): nodelist = node.getElementsByTagName(tag) if len(nodelist): if len(nodelist) == 1 and only_first: return nodelist[0] return nodelist return None def get_text(self, node, strip=True): text = "" for n in node.childNodes: if n.nodeType == Node.TEXT_NODE: text += n.data if strip: return text.strip() return text # Automatical string representation, i.e. "print metalink" def __str__(self): return self.generate() # Call with filename or url def parse_torrent(self, filename='', url=''): files = self.file.parse_torrent(filename, url) if not self.description: # Set torrent comment as description self.description = self.file.description if len(files) > 1: self.hashes['btih'] = self.file.hashes['btih'] self.hashes.pieces = self.file.hashes.pieces self.hashes.piecelength = self.file.hashes.piecelength self.hashes.piecetype = self.file.hashes.piecetype self.file.description = '' self.file.hashes['btih'] = '' self.file.hashes.pieces = [] if url and not filename: self.resources.append(self.file.resources.pop()) current_key = self.key() else: # Remove single file description self.file.description = '' for name, size in files[1:]: self.add_file() self.file.filename = name self.file.size = str(size) if len(files) > 1: self.seek(current_key) # Call with filename, url or text def parse_mirrors(self, filename='', url='', data='', plain=False, remove_others=False): return self.file.parse_mirrors(filename, url, data, plain, remove_others) # Call with filename, url or text def parse_hashes(self, filename='', url='', data='', force_type='', filter_name=''): return self.file.parse_hashes(filename, url, data, force_type, filter_name) def setattrs(self, attrs): '''Set multiple attribute values.''' for attr, value in attrs.items(): if hasattr(self, attr): setattr(self, attr, value) else: setattr(self.file, attr, value) def change_filename(self, new, old='', overwrite_filename=True): _old = old or os.path.basename(self.filename_absolute) or self.file.filename if _old: self.origin = self.origin.replace(_old, new) return self.file.change_filename(new, old, overwrite_filename) def remove_other_mirrors(self, mirrors): self.file.remove_other_mirrors(mirrors) def replace_hashes(self, hashes): self.file.replace_hashes(hashes) def is_helper_file(self, file): filename, extension = os.path.splitext(os.path.basename(file)) # Skip filenames without extension if filename and not extension and filename.upper() in 'MD5SUMS SHA1SUMS SHA256SUMS'.split(): return True if not (filename and len(extension) > 1): return False return extension[1:].lower() in 'metalink torrent mirrors md5 sha1 sha256 md5sum sha1sum sha256sum asc gpg sig'.split() def find_helper_files(self, file): files = [] # Skip helper files if self.is_helper_file(file): return files for helper in 'metalink torrent mirrors'.split(): if os.path.isfile(file + '.' + helper): files.append(file + '.' + helper) hashes = Hashes() hashes.find_files(file) files.extend(hashes.files) files.extend(hashes.find_signatures(file)) return files def add_file(self): self.file = Metafile() self.files.append(self.file) self._valid = True def rewind(self): self.file = self.files[0] self._valid = True def prev(self): self._valid = True key = self.key() if key is not None and self.seek(key - 1): return self.file return False def current(self): if not self._valid: return False return self.file def key(self): if not self._valid: return None return self.files.index(self.file) def next(self): key = self.key() if key is not None and self.seek(key + 1): return self.file self._valid = False return False def end(self): self._valid = True self.file = self.files[-1] # Seek to metafile directly by index (or TODO: filename) def seek(self, key): try: self.file = self.files[key] return True except: return False def valid(self): return self._valid # Access metafile directly by index (or TODO: filename) def __getitem__(self, key): try: return self.files[key] except: pass # Remove metafile directly by index (or TODO: filename) def __delitem__(self, key): try: current_key = self.key() del self.files[key] except: return None if not self.files: self.file = Metafile() self.files.append(self.file) elif current_key == key: if len(self.files) > current_key: self.seek(current_key) else: self.end() def __setitem__(self, key, value): raise Exception("Setting metafiles is not supported.") # Does metafile with index exist? (or TODO: filename) def __contains__(self, key): try: self.files[key] return True except: return False def __iter__(self): return iter(self.files) def supported_protocols(self): return 'http ftp rsync bittorrent ed2k magnet'.split() def reset(self, overwrite_with_opts=True): """Reset mutable attributes to allow object reuse""" self.changelog = "" self.copyright = "" self.description = "" self.filename_absolute = "" self.generator = "" self.identity = "" self.license_name = "" self.license_url = "" self.logo = "" self.origin = "" self.pubdate = "" self.publisher_name = "" self.publisher_url = "" self.refreshdate = "" self.releasedate = "" self.screenshot = "" self.tags = [] self.type = "" self.upgrade = "" self.version = "" self.resources = [] self.signature = "" self.signature_type = "" self.size = "" self.urls = [] self.errors = [] self.url_prefix = '' self._valid = True self.hashes.reset() self.file.reset() # For multi-file torrent data self.files = [self.file] if overwrite_with_opts: self.apply_command_line_options() class Torrent(object): def __init__(self, filename='', url=''): self.filename = filename self.url = url self.comment = '' self.files = [] self.infohash = '' self.piecelength = 0 self.pieces = [] def parse(self, data=''): '''Main function to decode bencoded data and extract important information''' if not data and (self.filename or self.url): if self.filename: fp = open(self.filename, "rb") data = fp.read() fp.close() else: data = get_url(self.url) if not data: return {} self.data = data self.pos = 0 root = self.bdecode() del self.data del self.pos if 'comment' in root: self.comment = root['comment'] if 'info' in root and set(['pieces', 'piece length', 'name']).issubset(set(root['info'].keys())): info = root['info'] name = info['name'].strip() if 'length' in info: self.files.append((name, info['length'])) if 'files' in info: # Multi-file torrent: info['name'] is directory name and prefix for all file names name = [name] for f in info['files']: if 'length' in f and 'path' in f: self.files.append(('/'.join(name + f['path']), f['length'])) self.piecelength = info['piece length'] # Only decoding of single-file torrents is possible if len(self.files) == 1: self.pieces = self.decode_pieces(info['pieces']) return root def decode_pieces(self, pieces): if isinstance(pieces, str) and len(pieces) and len(pieces) % 20 == 0: def divide(seq, size): return [seq[i:i+size] for i in xrange(0, len(seq), size)] return [binascii.hexlify(piece) for piece in divide(pieces, 20)] return [] def encode_pieces(self, pieces): if isinstance(pieces, list) and len(pieces): return "".join([binascii.unhexlify(piece) for piece in pieces]) return '' def create(self, data, filename=''): errors = [] # Check given data if not 'files' in data: errors.append('files not found in torrent data') elif not isinstance(data['files'], list): errors.append('files must be a list of files') elif len(data['files']) != 1: errors.append('files must contain only a single file at the moment') else: for file in data['files']: if not isinstance(file, list) or len(file) != 2 or not isinstance(file[0], basestring) or not isinstance(file[1], (int, long)): errors.append('elements of files must be a list of file data (name, size)') break if not 'piece length' in data: errors.append('piece length not found in torrent data') elif not isinstance(data['piece length'], (int, long)) or not data['piece length']: errors.append('piece length must be a number') if not 'pieces' in data: errors.append('pieces not found in torrent data') elif not isinstance(data['pieces'], list) or not data['pieces']: errors.append('pieces must be a non-empty list') if not 'trackers' in data: errors.append('trackers not found in torrent data') elif not isinstance(data['trackers'], (basestring, list)): errors.append('trackers must be passed as string or list of tracker groups') elif isinstance(data['trackers'], basestring): trackers = split_values(data['trackers'], True, ',', ' ') else: trackers = data['trackers'] if 'trackers' not in locals(): pass elif not trackers: errors.append('list of trackers must not be empty') else: for tracker_group in trackers: if not isinstance(tracker_group, list) or not tracker_group: errors.append('elements of trackers must be a list of tracker URLs (tracker group)') break for tracker in tracker_group: if not isinstance(tracker, basestring) or len(tracker) < 10: errors.append('elements of tracker groups must be strings') break for key in 'created by,comment'.split(','): if key in data and not isinstance(data[key], basestring): errors.append('%s must be a string' % key) if not filename and not self.filename: errors.append('no output filename given') if errors: return errors # Create torrent root = {} for key in 'created by,comment'.split(','): if key in data and len(data[key]) > 2: root[key] = encode_text(data[key]) root['announce'] = trackers[0][0] if len(trackers) > 1 or len(trackers[0]) > 1: root['announce-list'] = trackers # At the moment only single-file torrents can be created because of missing pieces hashing for multi-file torrents # Multiple-file torrents may contain subdirectories (so no basename!) root['info'] = {} file = data['files'][0] root['info']['name'] = encode_text(os.path.basename(file[0])) root['info']['length'] = file[1] root['info']['piece length'] = data['piece length'] root['info']['pieces'] = self.encode_pieces(data['pieces']) root['creation date'] = int(time.time()) # Write file file = filename or self.filename if os.path.isfile(file) and not _get_opt('overwrite'): file += '.new' fp = open(file, "wb") fp.write(self.bencode(root)) fp.close() print '%s%s' % (verbose is not False and 'Generated: ' or '', file) return errors def bdecode(self): c = self.data[self.pos] if 'd' == c: d = {} self.pos += 1 while not self._is_end(): start = self.pos + 6 key = self._process_string() d[key] = self.bdecode() if not self.infohash and 'info' == key: try: import hashlib hashfunc = hashlib.sha1 except: # Import deprecated modules import sha hashfunc = sha.new self.infohash = hashfunc(self.data[start:self.pos]).hexdigest().upper() self.pos += 1 return d elif c == 'l': l = [] self.pos += 1 while not self._is_end(): l.append(self.bdecode()) self.pos += 1 return l elif c == 'i': self.pos += 1 pos = self.data.find('e', self.pos) i = int(self.data[self.pos:pos]) self.pos = pos + 1 return i if c.isdigit(): return self._process_string() raise TypeError('Invalid bencoded string') def _process_string(self): pos = self.data.find(':', self.pos) length = int(self.data[self.pos:pos]) self.pos = pos + 1 text = self.data[self.pos:self.pos+length] self.pos += length return text def _is_end(self): return self.data[self.pos] == 'e' def bencode(self, x): from cStringIO import StringIO s = StringIO() self._bencode_value(x, s) return s.getvalue() def _bencode_value(self, x, s): t = type(x) if t in (int, long, bool): s.write('i%de' % x) elif isinstance(x, basestring): s.write('%d:%s' % (len(x), x)) elif t in (list, tuple): s.write('l') for e in x: self._bencode_value(e, s) s.write('e') elif t is dict: s.write('d') keys = x.keys() keys.sort() for k in keys: self._bencode_value(k, s) self._bencode_value(x[k], s) s.write('e') else: raise TypeError('Unsupported data type to bencode: %s' % t.__name__) class Mirrors(object): def __init__(self, filename='', url=''): self.locations = "af ax al dz as ad ao ai aq ag ar am aw au at az bs bh bd bb by be bz bj bm bt bo ba bw bv br io bn bg bf bi kh cm ca cv ky cf td cl cn cx cc co km cg cd ck cr ci hr cu cy cz dk dj dm do ec eg sv gq er ee et fk fo fj fi fr gf pf tf ga gm ge de gh gi gr gl gd gu gt gg gn gw gy ht hm va hn hk hu is in id ir iq ie im il it jm jp je jo kz ke ki kp kr kw kg la lv lb ls lr ly li lt lu mo mk mg mw my mv ml mt mh mq mr mu yt mx fm md mc mn me ms ma mz mm na nr np nl an nc nz ni ne ng nu nf mp no om pk pw ps pa pg py pe ph pn pl pt pr qa re ro ru rw sh kn lc pm vc ws sm st sa sn rs sc sl sg sk si sb so za gs es lk sd sr sj sz se ch sy tw tj tz th tl tg tk to tt tn tr tm tc tv ug ua ae gb us um uy uz vu ve vn vg vi wf eh ye zm zw uk".split() self.search_eclipse = re.compile(r'http://(?:(?:www\.)?eclipse\.org/[^/]+/download\.php\?file=([^&]+)|[^.]+\.eclipse.org(/eclipse/downloads/drops/[^/]+/)download\.php\?dropFile=([^&]+))') self.search_sourceforge = re.compile(r'https?://(?:downloads|(?:[^.]+)?\.?dl)\.(?:sourceforge|sf)\.net(/[^/]+/[^?]+)|https?://(?:sourceforge|sf).net/project/downloading.php\?group_id=\d+\&filename=([^&]+)') self.search_mysql = re.compile(r'(http://dev.mysql.com/get/Downloads/)([^/]+/)([^/]+)(?:/from/)?.*') self.search_link = re.compile(r'(?:(\d{1,3}|[A-Za-z]{2})\s+)?(?:(\d{1,3}|[A-Za-z]{2})\s+)?((?:(ftps?|https?|rsync|ed2k)://|(magnet):\?)[^" <>\r\n]+)') self.search_links = re.compile(r'(?:(\d{1,3}|[A-Za-z]{2})\s+)?(?:(\d{1,3}|[A-Za-z]{2})\s+)?((?:(?:ftps?|https?|rsync|ed2k)://|magnet:\?)[^" <>\r\n]+)') self.search_location = re.compile(r'(?:ftps?|https?|rsync)://([^/]*?([^./]+\.([^./]+)))/') self.search_btih = re.compile(r'xt=urn:btih:[a-zA-Z0-9]{32}') self.domains = {'postgresql.org':'at', 'tarpoon.org':'fr', 'carroll.aset.psu.edu':'us', 'yoxos.com':'de', 'ialto.org':'fr', 'linux-bg.org':'bg', 'fpt.net':'vn', 'harvard.edu':'us', 'sourceshare.org':'us', 'bevc.net':'si', 'ovh.net':'fr', 'clarkson.edu':'us', 'yousendit.com':'us', 'lunarpages.com':'us', 'kgt.org':'de', 'vt.edu':'us', 'lupaworld.com':'cn', 'pdx.edu':'us', 'mainseek.com':'pl', 'vmmatrix.net':'cn', 'mirrormax.net':'us', 'cn99.com':'cn', 'anl.gov':'us', 'mirrorservice.org':'gb', 'oleane.net':'fr', 'proxad.net':'fr', 'osuosl.org':'us', 'telia.net':'dk', 'mtu.edu':'us', 'utah.edu':'us', 'oakland.edu':'us', 'stanford.edu':'us', 'rit.edu':'us', 'calpoly.edu':'us', 'supp.name':'cz', 'wayne.edu':'us', 'tummy.com':'us', 'dotsrc.org':'dk', 'ubuntu.com':'sp', 'wmich.edu':'us', 'smenet.org':'us', 'bay13.net':'de', 'saix.net':'za', 'vlsm.org':'id', 'ac.uk':'gb', 'optus.net':'au', 'esat.net':'ie', 'unrealradio.org':'us', 'dudcore.net':'us', 'filearena.net':'au', 'ale.org':'us', 'linux.org':'se', 'ipacct.com':'bg', 'planetmirror.com':'au', 'tds.net':'us', 'ac.yu':'sp', 'stealer.net':'de', 'co.uk':'gb', 'iu.edu':'us', 'jtlnet.com':'us', 'umn.edu':'us', 'rfc822.org':'de', 'opensourcemirrors.org':'us', 'xmission.com':'us', 'xtec.net':'es', 'nullnet.org':'us', 'ubuntu-es.org':'es', 'roedu.net':'ro', 'mithril-linux.org':'jp', 'gatech.edu':'us', 'ibiblio.org':'us', 'kangaroot.net':'be', 'comactivity.net':'se', 'prolet.org':'bg', 'actuatechina.com':'cn', 'areum.biz':'kr', 'daum.net':'kr', 'daum.net':'kr', 'calvin.edu':'us', 'columbia.edu':'us', 'crazeekennee.com':'us', 'buffalo.edu':'us', 'uta.edu':'us', 'software-mirror.com':'us', 'unixheads.org':'us', 'optusnet.dl.sourceforge.net':'au', 'belnet.dl.sourceforge.net':'be', 'ufpr.dl.sourceforge.net':'br', 'puzzle.dl.sourceforge.net':'ch', 'switch.dl.sourceforge.net':'ch', 'dfn.dl.sourceforge.net':'de', 'mesh.dl.sourceforge.net':'de', 'ovh.dl.sourceforge.net':'fr', 'heanet.dl.sourceforge.net':'ie', 'garr.dl.sourceforge.net':'it', 'jaist.dl.sourceforge.net':'jp', 'surfnet.dl.sourceforge.net':'nl', 'nchc.dl.sourceforge.net':'tw', 'kent.dl.sourceforge.net':'uk', 'easynews.dl.sourceforge.net':'us', 'internap.dl.sourceforge.net':'us', 'superb-east.dl.sourceforge.net':'us', 'superb-west.dl.sourceforge.net':'us', 'umn.dl.sourceforge.net':'us'} self.reset(filename, url) def parse(self, filename='', url='', data='', plain=False): '''Main function to parse mirror data''' _filename = '' _url = '' from_url = False if not data and (filename or url or self.filename or self.url): if filename or (self.filename and not url): _filename = filename or self.filename fp = open(_filename, "rb") data = fp.read() fp.close() else: from_url = True _url = url and url or self.url data, _filename = self.parse_url(_url) if not re.search(r"[\n\r]", data): data = get_url(data) if not data: return False # Filter links filter = _get_opt('filter') filter_regex = _get_opt('filter_regex') if filter_regex: filter_regex = re.compile(filter_regex) filter_from = _get_opt('filter_from') if filter_from: m = re.search(filter_from, data) if m: data = data[m.start():] filter_to = _get_opt('filter_to') if filter_to: m = re.search(filter_to, data) if m: data = data[:m.end()] _filename = _get_opt('filename') or _filename # Try to default filter by filename for URLs filtered = self.filter_custom or filter or filter_regex or filter_from or filter_to if from_url and not filtered: name = _filename or os.path.basename(_url) if '.' in name and '?' not in name and '#' not in name: filter = name filtered = True # Search links if plain: links = unique([self.search_link.search(line).group(1, 2, 3) for line in data.splitlines() if line.strip() and self.search_link.search(line)], 2) else: if self.search_links_custom: links = unique(self.search_links_custom.findall(data)) self.search_links_custom = None else: links = unique(self.search_links.findall(data), 2) if self.filter_custom: links = [l for l in [self.filter_custom(l) for l in links] if l] else: links = [l for l in links if (not filter or filter in l[2]) and (not filter_regex or filter_regex.search(l[2]))] # Try to guess from URL and parsed links if not _filename: name = os.path.basename(_url) if name and '.' in name and '?' not in name and '#' not in name: for link in links: if os.path.basename(self.filter_custom and link or link[2]) == name: _filename = name break # Filter afterwards if _filename and from_url and not filtered: links = [l for l in links if os.path.basename(l[2]) == _filename] # Append filename if _filename: for i, link in enumerate(links): if link[2][-1] == '/': links[i] = (link[0], link[1], link[2] + _filename) self.mirrors.extend([link for link in [self.parse_link(link, is_match=not self.filter_custom) for link in links] if link]) if self.filter_custom: self.filter_custom = None return True def parse_url(self, url): m = self.search_eclipse.match(url) if m: group = m.groups() filepath = group[0] is None and ''.join(group[1:]) or group[0] return 'http://www.eclipse.org/downloads/download.php?file=' + filepath + '&format=xml', os.path.basename(filepath) m = self.search_sourceforge.match(url) if m: filepath = max(m.groups()) # Path unknown, only filename -> download and parse URL if filepath[0] != '/': content = get_url(url) m = re.search('sourceforge.net(/sourceforge/[^/]+/' + re.escape(filepath) + ')', content) if not m: return '' filepath = m.group(1) mirrors = ['http://' + mirror + filepath for mirror in self.domains.keys() if mirror.endswith('.sourceforge.net')] return "\n".join(mirrors), os.path.basename(filepath) m = self.search_mysql.match(url) if m: group = m.groups() filepath = ''.join(group) self.search_links_custom = re.compile(r']*href="([^"]+)"') search_link = re.compile(r'^/get/(Downloads/[^/]+/[^/]+)/from/(.+)') def filter(link): m = search_link.search(link) return m and m.group(2) + m.group(1) or False self.filter_custom = filter return filepath + '/from/pick', os.path.basename(filepath) return url, '' # Return list (link, type, location, preference, language) def parse_link(self, link, location='', check_duplicate=True, preference='', is_match=False): m = self.search_link.match(is_match and link[2] or link) if m: # Check for location and preference information if is_match and not location: location = link[0] if is_match and not preference: preference = link[1] if location and location.isdigit() or preference and re.match('^[A-Za-z]{2}$', preference): location, preference = (preference, location) group = m.groups() type = group[2].endswith('.torrent') and 'bittorrent' or group[3] or group[4] # P2P links are not allowed to include location location = type not in 'bittorrent ed2k magnet'.split() and self.parse_location(group[2], location) or '' if group[2] in self.urls: if check_duplicate: print >> sys.stderr, 'Duplicate mirror found:', group[2] return None else: self.urls.append(group[2]) preference = self.parse_preference(group[2], type, preference) return [group[2], type, location, preference] print >> sys.stderr, 'Invalid mirror link:', link return None # Return location if a valid 2-letter country code can be found def parse_location(self, link, location=''): if location and re.match('^[A-Za-z]{2}$', location): return location.lower() m = self.search_location.match(link) if m: group = m.groups() if group[2] in self.locations: return group[2] if group[1] in self.domains: return self.domains[group[1]] if group[0] in self.domains: return self.domains[group[0]] # Support ftp.us.postgresql.org style domain names prefix = group[0][:-(len(group[1])+1)] if len(group[0]) > len(group[1]) and re.search(r'(^|[^.]+\.)[a-z]{2}$', prefix) and prefix[-2:] in self.locations: return prefix[-2:] if location: self.domains[group[1]] = location return location if not group[0] in self.__class__.unknown_domains: print >> sys.stderr, 'Country unknown for:', group[0] self.__class__.unknown_domains.append(group[0]) return location def parse_preference(self, link, type, preference=0): try: preference = int(preference) if 0 < preference <= 100: return str(preference) except: pass if _get_opt('preference', type): return _get_opt('preference', type) if 'bittorrent' == type: return '100' if 'ed2k' == type: return preference_ed2k if 'magnet' == type: if self.search_btih.search(link): return '99' return '90' if 'ftp' == type: return '30' return '10' def change_filename(self, new, old=''): if not new: return False if self.mirrors and not old: for url, type, location, preference in self.mirrors: if type not in "bittorrent ed2k magnet".split(): old = os.path.basename(url) break if old: old = urllib.quote(old) new = urllib.quote(new) self.urls = [] for mirror in self.mirrors: # Rename file if old: mirror[0] = mirror[0].replace(old, new) # Or append new name elif mirror[0][-1] == '/': mirror[0] += new self.urls.append(mirror[0]) return True def add(self, mirrors, remove_others=False): if remove_others: self.remove_other_mirrors(mirrors) for mirror in mirrors.mirrors: if mirror[0] not in self.urls: self.mirrors.append(mirror) self.urls.append(mirror[0]) def remove_other_mirrors(self, mirrors): types = "bittorrent ed2k magnet".split() self.mirrors = [mirror for mirror in self.mirrors if mirror[1] in types or mirror[0] in mirrors.urls] self.urls = [mirror[0] for mirror in self.mirrors] # basename of first URL, which is no P2P link def get_filename(self): p2p = 'bittorrent ed2k magnet'.split() for mirror in self.mirrors: if mirror[1] not in p2p: return urllib.unquote(os.path.basename(mirror[0])) return '' def reset(self, filename='', url=''): """Reset mutable attributes to allow object reuse""" self.filename = filename self.url = url self.search_links_custom = None self.filter_custom = None self.mirrors = [] self.urls = [] def __repr__(self): return "\n".join([link[0] for link in self.mirrors]) unknown_domains = [] class Hashes(object): def __init__(self, filename='', url=''): self.search_hashes = r"^(([a-z0-9]{32,64})\s+(?:\?(AICH|BTIH|EDONKEY|SHA1|SHA256))?\*?([^\r\n]+))" # aich=ED2K AICH hash, btih=BitTorrent infohash (= magnet:?xt=urn:btih link) self.verification_hashes = 'md4 md5 sha1 sha256 sha384 sha512 rmd160 tiger crc32 btih ed2k aich' self.reset(filename, url) def init(self): self.pieces = [] self.hashes = {} for hash in self.verification_hashes.split(): self.hashes[hash] = {} def set_file(self, filename): if not filename.strip(): return False self.filename_absolute = filename self.filename = os.path.basename(filename) for extension in 'md5sum sha1sum sha256sum md5 sha1 sha256'.split(): if self.filename.lower().endswith('.' + extension): self.filename = self.filename[: - len(extension) -1] break return True def parse(self, filename='', data='', force_type='', filter_name=''): '''Main function to parse hash data''' self.set_file(filename) if not data and (self.filename or self.url): if self.filename: fp = open(self.filename_absolute or self.filename, "rb") data = fp.read() fp.close() else: data = get_url(self.url) if not data: return 0 count = 0 for line, hash, type, name in re.findall(self.search_hashes, data, re.MULTILINE): name = name.strip() if filter_name and filter_name != name: continue if 'EDONKEY' == type: type = 'ED2K' if type in ('ED2K', 'AICH', 'BTIH'): for _type, length in {'ED2K':32, 'AICH':32, 'BTIH':40}.items(): if _type == type: if len(hash) != length: print >> sys.stderr, 'Invalid %s hash: %s' % (type, line.strip()) elif not force_type or force_type.upper() == _type: self.hashes[_type.lower()][name] = hash count += 1 break else: for _type, length in {'md5':32, 'sha1':40, 'sha256':64}.items(): if len(hash) == length and not force_type or force_type.lower() == _type: self.hashes[_type][name] = hash count += 1 break return count # Find hash files parallel to filename def find_files(self, filename=''): if not filename: filename = self.filename name = '' if not filename or not os.path.dirname(filename): # Search in working directory directory = os.getcwd() if filename: name = filename + '.' elif os.path.isdir(filename): # Search only for general files in directory directory = os.path.realpath(filename) else: # Search for general and specific files in directory directory = os.path.dirname(filename) name = os.path.basename(filename) + '.' directory += os.sep files = [] # Add general files for f in 'MD5SUMS SHA1SUMS SHA256SUMS'.split(): files.append(directory + f) # Add specific files if name: for f in 'md5 sha1 sha256'.split(): files.append(directory + name + f) files.append(directory + name + f + 'sum') found_files = [f for f in files if os.path.isfile(f)] self.files.extend(found_files) return len(found_files) def is_hash_file(self, file): _file = os.path.basename(file) for hash in 'md5sum sha1sum sha256sum md5 sha1 sha256'.split(): if _file.lower().endswith('.' + hash): self.last_hash_file = _file[: - len(hash) - 1] return True for hash in 'MD5SUMS SHA1SUMS SHA256SUMS'.split(): if _file.upper() == hash: self.last_hash_file = _file return True return False # Find signature files parallel to filename # TODO: Move signatures into Hashes class def find_signatures(self, filename=''): if not filename: filename = self.filename name = '' if not filename or not os.path.dirname(filename): # Search in working directory directory = os.getcwd() if filename: name = filename + '.' elif os.path.isdir(filename): # Search only for general files in directory directory = os.path.realpath(filename) else: # Search for general and specific files in directory directory = os.path.dirname(filename) name = os.path.basename(filename) + '.' directory += os.sep files = [] # Add specific files if name: for f in 'asc gpg.sig gpg sig'.split(): files.append(directory + name + f) found_files = [f for f in files if os.path.isfile(f)] return found_files def is_signature_file(self, file): _file = os.path.basename(file) for signature in 'asc gpg.sig gpg sig'.split(): if _file.lower().endswith('.' + signature): self.last_hash_file = _file[: - len(signature) - 1] return True return False # TODO: filter_name def parse_files(self): self.url = '' for file in self.files: self.parse(file) def has(self, hash): hash = hash.lower() if hash not in self.hashes or not self.hashes[hash]: return False h = self.hashes[hash] if self.filename: return self.filename in h and "" != h[self.filename].strip() return 1 == len(h) and "" != h.values()[0].strip() def has_one(self, hashes): for hash in hashes.split(): if self.has(hash): return True def get(self, hash): hash = hash.lower() if not self.has(hash): return "" if self.filename: return self.hashes[hash][self.filename].strip() return self.hashes[hash].values()[0].strip() def get_all(self): return self.get_multiple(" ".join(self.hashes.keys())) def get_multiple(self, hashes): hashes_found = {} for hash in hashes.lower().split(): if self.has(hash): hashes_found[hash] = self.get(hash) return hashes_found def remove(self, hashes): for hash in hashes.lower().split(): if self.has(hash): self.hashes[hash].clear() def update(self, hashes): for hash, value in hashes.get_multiple(self.verification_hashes).items(): if hash not in self: self[hash] = value # Array-access methods def __getitem__(self, hash): return self.get(hash) def __delitem__(self, hash): self.remove(hash) def __setitem__(self, hash, value): self.hashes[hash.lower()][self.filename or len(self.hashes[hash])] = value def __contains__(self, hash): return self.has(hash) def reset(self, filename='', url=''): """Reset mutable attributes to allow object reuse""" self.filename = '' self.filename_absolute = '' self.set_file(filename) self.url = url self.hashes = {} self.init() self.last_hash_file = '' self.pieces = [] self.piecelength = 0 self.piecetype = '' self.files = [] class OptParser(object): def __init__(self, long_options = []): self.opts = {} self._opts = {} self._positions = [] self.errors = [] self.init(long_options) def addError(self, msg): if not msg in self.errors: self.errors.append(msg) def parseValue(self, val, is_bool=False, inverse=False): if not is_bool: return val if val is None: return not inverse value = val.strip().lower() if value in '1 True yes y on enable'.split(): return not inverse if value in '0 False no n off disable'.split(): return inverse return val def getOpt(self, opt): if not opt in self.opts: self.addError("Option '%s' is unknown" % opt) return [None, False, 0] _opt = self._opts[self.opts[opt]] is_bool = 'bool' == _opt['type'] required = _opt['required'] return [_opt, is_bool, required] def cmp_option_length(self, a, b): len_a = len(a) len_b = len(b) if len_a != len_b: return cmp(len_a, len_b) return cmp(a, b) def getHelp(self): help = '' _options = [] _len = 0 for key in self._positions: option = self._opts[key] _opt = [] for opt in sorted(option['options'], self.cmp_option_length): _opt.append('%s%s' % (len(opt) > 1 and '--' or '-', opt)) _opt = ', '.join(_opt) if option['explanation'] and option['required']: _opt += '%s=%s%s' % (1 != option['required'] and '[' or '', option['explanation'], 1 != option['required'] and ']' or '') _options.append((_opt, option['help'] + "\n")) # TODO: os.linesep if len(_opt) > _len: _len = len(_opt) _len += 2 for key, option in _options: help += ' ' + key.ljust(_len) + option return help # options string|array String means short options, dict means ZendFramework like long options # ['file|f=sFILE'=>'Input file (-: use STDIN)'] def init(self, options = []): if not isinstance(options, list): return False for i in range(0, len(options), 2): _long, help = options[i:i+2] # Only associative array keys allowed if isinstance(_long, (int, long)) or _long.isdigit(): continue _long = _long.strip() if '' == _long: continue required = 0 _type = 'bool' explanation = '' match = re.search('(=|-)([isw])(.+)?', _long) if match: _long = _long[:match.start()] match = match.groups() required = '=' == match[0] and 1 or 2 _type = 'i' == match[1] and 'int' or 'string' explanation = match[2] is not None and match[2] or '' opts = unique(_long.split('|')) self._positions.append(_long) for opt in opts: # Overwrite existing short options if len(opt) == 1 and opt in self._opts: del self._opts[opt] self.opts[opt] = _long self._opts[_long] = {'type':_type, 'required':required, 'help':help, 'explanation':explanation, 'options':opts} return True def parse(self, args, convert_hyphen=True): stdin = False opts = {} non_opts = [] # Predefine empty values for option in self._opts.values(): for opt in option['options']: opts[convert_hyphen and opt.replace('-', '_') or opt] = None if not args: return [opts, non_opts, stdin, []] # args = list(args) length = len(args) skip = False for i, arg in enumerate(args): if skip: skip = False continue arg = arg.strip() if arg == '': continue if arg[0] != '-': non_opts.append(arg) elif arg == '-': stdin = True elif arg == '--': self.addError("Unknown option '--'") elif len(arg) > 1 and arg[1] == '-': opt = arg[2:] value = None has_value = '=' in arg if has_value: opt, value = opt.split('=', 1) if not has_value and opt in self.opts and self._opts[self.opts[opt]]['required'] and i < length - 1 and (0 == len(args[i+1]) or '-' != args[i+1][0]): has_value = True value = args[i+1] skip = True default = True if not opt in self.opts and re.match('^(disable|no)-', opt): opt = opt[re.match('^(disable|no)-', opt).end():] default = False _opt, is_bool, required = self.getOpt(opt) if not _opt: continue if is_bool: value = self.parseValue(value, is_bool, not default) if not required and has_value and (not is_bool or not isinstance(value, bool)): self.addError("--%s allows no value" % opt) elif 1 == required and not has_value: self.addError("--%s requires a value" % opt) elif not has_value: value = default if value is not None: for option in _opt['options']: opts[convert_hyphen and option.replace('-', '_') or option] = value else: # Parse short option default = True if re.match('^-(disable|no)-', arg): arg = arg[re.match('^-(disable|no)-', arg).end() - 1:] default = False opt = '' _len = len(arg) for j in range(1, _len): if arg[j] == '=': if _opt: if required: if _len - 1 == j: value = '' else: value = self.parseValue(arg[j+1:], is_bool, not default) for option in _opt['options']: opts[option] = value else: self.addError("-%s allows no value" % opt) break opt = arg[j] _opt, is_bool, required = self.getOpt(opt) if not _opt: continue value = default if j < _len - 1 and '=' == arg[j+1]: if j == _len - 2: value = '' else: value = self.parseValue(arg[j+2:], is_bool, not default) elif required and j == _len - 1 and i < length - 1 and (0 == len(args[i+1]) or '-' != args[i+1][0]): value = self.parseValue(args[i+1], is_bool, not default) skip = True if 1 == required and isinstance(value, bool) and not is_bool: self.addError("-%s requires a value" % opt) else: for option in _opt['options']: opts[option] = value return [opts, non_opts, stdin, self.errors] def doGetopt(args, long_options=[]): optParser = OptParser(long_options) return optParser.parse(args) def _get_opt(opt, key=None): if opt not in _opts: return None if key is not None: try: return _opts[opt][key] except: return None return _opts[opt] def _set_opt(opt, value, key=None): if key is not None: if opt not in _opts: _opts[opt] = {} _opts[opt][key] = value else: _opts[opt] = value if __name__ == '__main__': main()