Class | Gem::Indexer |
In: |
lib/rubygems/indexer.rb
|
Parent: | Object |
Top level class for building the gem repository index.
build_legacy | [RW] | Build indexes for RubyGems older than 1.2.0 when true |
build_modern | [RW] | Build indexes for RubyGems 1.2.0 and newer when true |
dest_directory | [R] | Index install location |
dest_latest_specs_index | [R] | Latest specs index install location |
dest_prerelease_specs_index | [R] | Prerelease specs index install location |
dest_specs_index | [R] | Specs index install location |
directory | [R] | Index build directory |
Create an indexer that will index the gems in directory.
# File lib/rubygems/indexer.rb, line 56 56: def initialize(directory, options = {}) 57: require 'fileutils' 58: require 'tmpdir' 59: require 'zlib' 60: 61: unless defined?(Builder::XChar) then 62: raise "Gem::Indexer requires that the XML Builder library be installed:" \ 63: "\n\tgem install builder" 64: end 65: 66: options = { :build_legacy => true, :build_modern => true }.merge options 67: 68: @build_legacy = options[:build_legacy] 69: @build_modern = options[:build_modern] 70: 71: @rss_title = options[:rss_title] 72: @rss_host = options[:rss_host] 73: @rss_gems_host = options[:rss_gems_host] 74: 75: @dest_directory = directory 76: @directory = File.join Dir.tmpdir, "gem_generate_index_#{$$}" 77: 78: marshal_name = "Marshal.#{Gem.marshal_version}" 79: 80: @master_index = File.join @directory, 'yaml' 81: @marshal_index = File.join @directory, marshal_name 82: 83: @quick_dir = File.join @directory, 'quick' 84: 85: @quick_marshal_dir = File.join @quick_dir, marshal_name 86: 87: @quick_index = File.join @quick_dir, 'index' 88: @latest_index = File.join @quick_dir, 'latest_index' 89: 90: @specs_index = File.join @directory, "specs.#{Gem.marshal_version}" 91: @latest_specs_index = File.join @directory, 92: "latest_specs.#{Gem.marshal_version}" 93: @prerelease_specs_index = File.join(@directory, 94: "prerelease_specs.#{Gem.marshal_version}") 95: 96: @dest_specs_index = File.join @dest_directory, 97: "specs.#{Gem.marshal_version}" 98: @dest_latest_specs_index = File.join @dest_directory, 99: "latest_specs.#{Gem.marshal_version}" 100: @dest_prerelease_specs_index = File.join @dest_directory, 101: "prerelease_specs.#{Gem.marshal_version}" 102: 103: @rss_index = File.join @directory, 'index.rss' 104: 105: @files = [] 106: end
Abbreviate the spec for downloading. Abbreviated specs are only used for searching, downloading and related activities and do not need deployment specific information (e.g. list of files). So we abbreviate the spec, making it much smaller for quicker downloads.
# File lib/rubygems/indexer.rb, line 114 114: def abbreviate(spec) 115: spec.files = [] 116: spec.test_files = [] 117: spec.rdoc_options = [] 118: spec.extra_rdoc_files = [] 119: spec.cert_chain = [] 120: spec 121: end
Build various indicies
# File lib/rubygems/indexer.rb, line 126 126: def build_indicies(index) 127: # Marshal gemspecs are used by both modern and legacy RubyGems 128: build_marshal_gemspecs index 129: build_legacy_indicies index if @build_legacy 130: build_modern_indicies index if @build_modern 131: build_rss index 132: 133: compress_indicies 134: end
Builds indicies for RubyGems older than 1.2.x
# File lib/rubygems/indexer.rb, line 139 139: def build_legacy_indicies(index) 140: say "Generating Marshal master index" 141: 142: Gem.time 'Generated Marshal master index' do 143: open @marshal_index, 'wb' do |io| 144: io.write index.dump 145: end 146: end 147: 148: @files << @marshal_index 149: @files << "#{@marshal_index}.Z" 150: end
Builds Marshal quick index gemspecs.
# File lib/rubygems/indexer.rb, line 155 155: def build_marshal_gemspecs(index) 156: progress = ui.progress_reporter index.size, 157: "Generating Marshal quick index gemspecs for #{index.size} gems", 158: "Complete" 159: 160: files = [] 161: 162: Gem.time 'Generated Marshal quick index gemspecs' do 163: index.gems.each do |original_name, spec| 164: spec_file_name = "#{original_name}.gemspec.rz" 165: marshal_name = File.join @quick_marshal_dir, spec_file_name 166: 167: marshal_zipped = Gem.deflate Marshal.dump(spec) 168: open marshal_name, 'wb' do |io| io.write marshal_zipped end 169: 170: files << marshal_name 171: 172: progress.updated original_name 173: end 174: 175: progress.done 176: end 177: 178: @files << @quick_marshal_dir 179: 180: files 181: end
Build a single index for RubyGems 1.2 and newer
# File lib/rubygems/indexer.rb, line 186 186: def build_modern_index(index, file, name) 187: say "Generating #{name} index" 188: 189: Gem.time "Generated #{name} index" do 190: open(file, 'wb') do |io| 191: specs = index.map do |*spec| 192: # We have to splat here because latest_specs is an array, 193: # while the others are hashes. See the TODO in source_index.rb 194: spec = spec.flatten.last 195: platform = spec.original_platform 196: 197: # win32-api-1.0.4-x86-mswin32-60 198: unless String === platform then 199: alert_warning "Skipping invalid platform in gem: #{spec.full_name}" 200: next 201: end 202: 203: platform = Gem::Platform::RUBY if platform.nil? or platform.empty? 204: [spec.name, spec.version, platform] 205: end 206: 207: specs = compact_specs(specs) 208: Marshal.dump(specs, io) 209: end 210: end 211: end
Builds indicies for RubyGems 1.2 and newer. Handles full, latest, prerelease
# File lib/rubygems/indexer.rb, line 216 216: def build_modern_indicies(index) 217: build_modern_index(index.released_specs.sort, @specs_index, 'specs') 218: build_modern_index(index.latest_specs.sort, 219: @latest_specs_index, 220: 'latest specs') 221: build_modern_index(index.prerelease_specs.sort, 222: @prerelease_specs_index, 223: 'prerelease specs') 224: 225: @files += [@specs_index, 226: "#{@specs_index}.gz", 227: @latest_specs_index, 228: "#{@latest_specs_index}.gz", 229: @prerelease_specs_index, 230: "#{@prerelease_specs_index}.gz"] 231: end
Builds an RSS feed for past two days gem releases according to the gem‘s date.
# File lib/rubygems/indexer.rb, line 237 237: def build_rss(index) 238: if @rss_host.nil? or @rss_gems_host.nil? then 239: if Gem.configuration.really_verbose then 240: alert_warning "no --rss-host or --rss-gems-host, RSS generation disabled" 241: end 242: return 243: end 244: 245: require 'cgi' 246: require 'rubygems/text' 247: 248: extend Gem::Text 249: 250: Gem.time 'Generated rss' do 251: open @rss_index, 'wb' do |io| 252: rss_host = CGI.escapeHTML @rss_host 253: rss_title = CGI.escapeHTML(@rss_title || 'gems') 254: 255: io.puts "<?xml version=\"1.0\"?>\n<rss version=\"2.0\">\n<channel>\n<title>\#{rss_title}</title>\n<link>http://\#{rss_host}</link>\n<description>Recently released gems from http://\#{rss_host}</description>\n<generator>RubyGems v\#{Gem::VERSION}</generator>\n<docs>http://cyber.law.harvard.edu/rss/rss.html</docs>\n" 256: 257: today = Gem::Specification::TODAY 258: yesterday = today - 86400 259: 260: index = index.select do |_, spec| 261: spec_date = spec.date 262: 263: case spec_date 264: when Date 265: Time.parse(spec_date.to_s) >= yesterday 266: when Time 267: spec_date >= yesterday 268: end 269: end 270: 271: index = index.select do |_, spec| 272: spec_date = spec.date 273: 274: case spec_date 275: when Date 276: Time.parse(spec_date.to_s) <= today 277: when Time 278: spec_date <= today 279: end 280: end 281: 282: index.sort_by { |_, spec| [-spec.date.to_i, spec] }.each do |_, spec| 283: gem_path = CGI.escapeHTML "http://#{@rss_gems_host}/gems/#{spec.file_name}" 284: size = File.stat(spec.loaded_from).size rescue next 285: 286: description = spec.description || spec.summary || '' 287: authors = Array spec.authors 288: emails = Array spec.email 289: authors = emails.zip(authors).map do |email, author| 290: email += " (#{author})" if author and not author.empty? 291: end.join ', ' 292: 293: description = description.split(/\n\n+/).map do |chunk| 294: format_text chunk, 78 295: end 296: 297: description = description.join "\n\n" 298: 299: item = '' 300: 301: item << "<item>\n<title>\#{CGI.escapeHTML spec.full_name}</title>\n<description>\n<pre>\#{CGI.escapeHTML description.chomp}</pre>\n</description>\n<author>\#{CGI.escapeHTML authors}</author>\n<guid>\#{CGI.escapeHTML spec.full_name}</guid>\n<enclosure url=\\\"\#{gem_path}\\\"\nlength=\\\"\#{size}\\\" type=\\\"application/octet-stream\\\" />\n<pubDate>\#{spec.date.rfc2822}</pubDate>\n" 302: 303: item << "<link>\#{CGI.escapeHTML spec.homepage}</link>\n" if spec.homepage 304: 305: item << "</item>\n" 306: 307: io.puts item 308: end 309: 310: io.puts "</channel>\n</rss>\n" 311: end 312: end 313: 314: @files << @rss_index 315: end
Collect specifications from .gem files from the gem directory.
# File lib/rubygems/indexer.rb, line 352 352: def collect_specs(gems = gem_file_list) 353: index = Gem::SourceIndex.new 354: 355: progress = ui.progress_reporter gems.size, 356: "Loading #{gems.size} gems from #{@dest_directory}", 357: "Loaded all gems" 358: 359: Gem.time 'loaded' do 360: gems.each do |gemfile| 361: if File.size(gemfile.to_s) == 0 then 362: alert_warning "Skipping zero-length gem: #{gemfile}" 363: next 364: end 365: 366: begin 367: spec = Gem::Format.from_file_by_path(gemfile).spec 368: spec.loaded_from = gemfile 369: 370: unless gemfile =~ /\/#{Regexp.escape spec.original_name}.*\.gem\z/i then 371: expected_name = spec.full_name 372: expected_name << " (#{spec.original_name})" if 373: spec.original_name != spec.full_name 374: alert_warning "Skipping misnamed gem: #{gemfile} should be named #{expected_name}" 375: next 376: end 377: 378: abbreviate spec 379: sanitize spec 380: 381: index.add_spec spec, spec.original_name 382: 383: progress.updated spec.original_name 384: 385: rescue SignalException => e 386: alert_error "Received signal, exiting" 387: raise 388: rescue Exception => e 389: alert_error "Unable to process #{gemfile}\n#{e.message} (#{e.class})\n\t#{e.backtrace.join "\n\t"}" 390: end 391: end 392: 393: progress.done 394: end 395: 396: index 397: end
Compacts Marshal output for the specs index data source by using identical objects as much as possible.
# File lib/rubygems/indexer.rb, line 425 425: def compact_specs(specs) 426: names = {} 427: versions = {} 428: platforms = {} 429: 430: specs.map do |(name, version, platform)| 431: names[name] = name unless names.include? name 432: versions[version] = version unless versions.include? version 433: platforms[platform] = platform unless platforms.include? platform 434: 435: [names[name], versions[version], platforms[platform]] 436: end 437: end
Compress filename with extension.
# File lib/rubygems/indexer.rb, line 442 442: def compress(filename, extension) 443: data = Gem.read_binary filename 444: 445: zipped = Gem.deflate data 446: 447: open "#{filename}.#{extension}", 'wb' do |io| 448: io.write zipped 449: end 450: end
Compresses indicies on disk
# File lib/rubygems/indexer.rb, line 404 404: def compress_indicies 405: say "Compressing indicies" 406: 407: Gem.time 'Compressed indicies' do 408: if @build_legacy then 409: compress @marshal_index, 'Z' 410: paranoid @marshal_index, 'Z' 411: end 412: 413: if @build_modern then 414: gzip @specs_index 415: gzip @latest_specs_index 416: gzip @prerelease_specs_index 417: end 418: end 419: end
List of gem file names to index.
# File lib/rubygems/indexer.rb, line 455 455: def gem_file_list 456: Dir.glob(File.join(@dest_directory, "gems", "*.gem")) 457: end
Builds and installs indicies.
# File lib/rubygems/indexer.rb, line 462 462: def generate_index 463: make_temp_directories 464: index = collect_specs 465: build_indicies index 466: install_indicies 467: rescue SignalException 468: ensure 469: FileUtils.rm_rf @directory 470: end
Zlib::GzipWriter wrapper that gzips filename on disk.
# File lib/rubygems/indexer.rb, line 475 475: def gzip(filename) 476: Zlib::GzipWriter.open "#{filename}.gz" do |io| 477: io.write Gem.read_binary(filename) 478: end 479: end
Install generated indicies into the destination directory.
# File lib/rubygems/indexer.rb, line 484 484: def install_indicies 485: verbose = Gem.configuration.really_verbose 486: 487: say "Moving index into production dir #{@dest_directory}" if verbose 488: 489: files = @files.dup 490: files.delete @quick_marshal_dir if files.include? @quick_dir 491: 492: if files.include? @quick_marshal_dir and 493: not files.include? @quick_dir then 494: files.delete @quick_marshal_dir 495: quick_marshal_dir = @quick_marshal_dir.sub @directory, '' 496: 497: dst_name = File.join @dest_directory, quick_marshal_dir 498: 499: FileUtils.mkdir_p File.dirname(dst_name), :verbose => verbose 500: FileUtils.rm_rf dst_name, :verbose => verbose 501: FileUtils.mv @quick_marshal_dir, dst_name, :verbose => verbose, 502: :force => true 503: end 504: 505: files = files.map do |path| 506: path.sub @directory, '' 507: end 508: 509: files.each do |file| 510: src_name = File.join @directory, file 511: dst_name = File.join @dest_directory, file 512: 513: FileUtils.rm_rf dst_name, :verbose => verbose 514: FileUtils.mv src_name, @dest_directory, :verbose => verbose, 515: :force => true 516: end 517: end
Make directories for index generation
# File lib/rubygems/indexer.rb, line 522 522: def make_temp_directories 523: FileUtils.rm_rf @directory 524: FileUtils.mkdir_p @directory, :mode => 0700 525: FileUtils.mkdir_p @quick_marshal_dir 526: end
Ensure path and path with extension are identical.
# File lib/rubygems/indexer.rb, line 531 531: def paranoid(path, extension) 532: data = Gem.read_binary path 533: compressed_data = Gem.read_binary "#{path}.#{extension}" 534: 535: unless data == Gem.inflate(compressed_data) then 536: raise "Compressed file #{compressed_path} does not match uncompressed file #{path}" 537: end 538: end
Sanitize the descriptive fields in the spec. Sometimes non-ASCII characters will garble the site index. Non-ASCII characters will be replaced by their XML entity equivalent.
# File lib/rubygems/indexer.rb, line 545 545: def sanitize(spec) 546: spec.summary = sanitize_string(spec.summary) 547: spec.description = sanitize_string(spec.description) 548: spec.post_install_message = sanitize_string(spec.post_install_message) 549: spec.authors = spec.authors.collect { |a| sanitize_string(a) } 550: 551: spec 552: end
Sanitize a single string.
# File lib/rubygems/indexer.rb, line 557 557: def sanitize_string(string) 558: return string unless string 559: 560: # HACK the #to_s is in here because RSpec has an Array of Arrays of 561: # Strings for authors. Need a way to disallow bad values on gemspec 562: # generation. (Probably won't happen.) 563: string = string.to_s 564: 565: begin 566: Builder::XChar.encode string 567: rescue NameError, NoMethodError 568: string.to_xs 569: end 570: end
Perform an in-place update of the repository from newly added gems. Only works for modern indicies, and sets build_legacy to false when run.
# File lib/rubygems/indexer.rb, line 576 576: def update_index 577: @build_legacy = false 578: 579: make_temp_directories 580: 581: specs_mtime = File.stat(@dest_specs_index).mtime 582: newest_mtime = Time.at 0 583: 584: updated_gems = gem_file_list.select do |gem| 585: gem_mtime = File.stat(gem).mtime 586: newest_mtime = gem_mtime if gem_mtime > newest_mtime 587: gem_mtime >= specs_mtime 588: end 589: 590: if updated_gems.empty? then 591: say 'No new gems' 592: terminate_interaction 0 593: end 594: 595: index = collect_specs updated_gems 596: 597: files = build_marshal_gemspecs index 598: 599: Gem.time 'Updated indexes' do 600: update_specs_index index.released_gems, @dest_specs_index, @specs_index 601: update_specs_index index.released_gems, @dest_latest_specs_index, @latest_specs_index 602: update_specs_index(index.prerelease_gems, @dest_prerelease_specs_index, 603: @prerelease_specs_index) 604: end 605: 606: compress_indicies 607: 608: verbose = Gem.configuration.really_verbose 609: 610: say "Updating production dir #{@dest_directory}" if verbose 611: 612: files << @specs_index 613: files << "#{@specs_index}.gz" 614: files << @latest_specs_index 615: files << "#{@latest_specs_index}.gz" 616: files << @prerelease_specs_index 617: files << "#{@prerelease_specs_index}.gz" 618: 619: files = files.map do |path| 620: path.sub @directory, '' 621: end 622: 623: files.each do |file| 624: src_name = File.join @directory, file 625: dst_name = File.join @dest_directory, File.dirname(file) 626: 627: FileUtils.mv src_name, dst_name, :verbose => verbose, 628: :force => true 629: 630: File.utime newest_mtime, newest_mtime, dst_name 631: end 632: end
Combines specs in index and source then writes out a new copy to dest. For a latest index, does not ensure the new file is minimal.
# File lib/rubygems/indexer.rb, line 638 638: def update_specs_index(index, source, dest) 639: specs_index = Marshal.load Gem.read_binary(source) 640: 641: index.each do |_, spec| 642: platform = spec.original_platform 643: platform = Gem::Platform::RUBY if platform.nil? or platform.empty? 644: specs_index << [spec.name, spec.version, platform] 645: end 646: 647: specs_index = compact_specs specs_index.uniq.sort 648: 649: open dest, 'wb' do |io| 650: Marshal.dump specs_index, io 651: end 652: end