2016-09-27 21:44:49 +00:00
# frozen_string_literal: true
2016-10-20 18:49:02 +00:00
require " retriable "
2016-05-18 17:13:22 +00:00
module GitHubChangelogGenerator
# A Fetcher responsible for all requests to GitHub and all basic manipulation with related data
# (such as filtering, validating, e.t.c)
#
# Example:
2016-09-27 21:44:49 +00:00
# fetcher = GitHubChangelogGenerator::OctoFetcher.new(options)
2016-05-18 17:13:22 +00:00
class OctoFetcher
PER_PAGE_NUMBER = 100
2016-10-05 22:23:11 +00:00
MAX_THREAD_NUMBER = 25
2016-10-07 23:58:38 +00:00
MAX_FORBIDDEN_RETRIES = 100
2016-05-18 17:13:22 +00:00
CHANGELOG_GITHUB_TOKEN = " CHANGELOG_GITHUB_TOKEN "
GH_RATE_LIMIT_EXCEEDED_MSG = " Warning: Can't finish operation: GitHub API rate limit exceeded, change log may be " \
" missing some issues. You can limit the number of issues fetched using the `--max-issues NUM` argument. "
NO_TOKEN_PROVIDED = " Warning: No token provided (-t option) and variable $CHANGELOG_GITHUB_TOKEN was not found. " \
" This script can make only 50 requests to GitHub API per hour without token! "
2016-09-27 21:44:49 +00:00
# @param options [Hash] Options passed in
# @option options [String] :user GitHub username
# @option options [String] :project GitHub project
# @option options [String] :since Only issues updated at or after this time are returned. This is a timestamp in ISO 8601 format: YYYY-MM-DDTHH:MM:SSZ. eg. Time.parse("2016-01-01 10:00:00").iso8601
# @option options [Boolean] :http_cache Use ActiveSupport::Cache::FileStore to cache http requests
# @option options [Boolean] :cache_file If using http_cache, this is the cache file path
# @option options [Boolean] :cache_log If using http_cache, this is the cache log file path
def initialize ( options = { } ) # rubocop:disable Metrics/CyclomaticComplexity
2016-05-18 17:13:22 +00:00
@options = options || { }
@user = @options [ :user ]
@project = @options [ :project ]
2016-09-27 21:44:49 +00:00
@since = @options [ :since ]
2016-05-24 01:28:00 +00:00
@http_cache = @options [ :http_cache ]
2016-09-27 21:44:49 +00:00
@cache_file = @options . fetch ( :cache_file , " /tmp/github-changelog-http-cache " ) if @http_cache
@cache_log = @options . fetch ( :cache_log , " /tmp/github-changelog-logger.log " ) if @http_cache
2016-05-24 01:28:00 +00:00
init_cache if @http_cache
2016-05-18 17:13:22 +00:00
@github_token = fetch_github_token
2016-05-25 19:46:47 +00:00
@request_options = { per_page : PER_PAGE_NUMBER }
2016-05-18 17:13:22 +00:00
@github_options = { }
@github_options [ :access_token ] = @github_token unless @github_token . nil?
@github_options [ :api_endpoint ] = @options [ :github_endpoint ] unless @options [ :github_endpoint ] . nil?
client_type = @options [ :github_endpoint ] . nil? ? Octokit :: Client : Octokit :: EnterpriseAdminClient
@client = client_type . new ( @github_options )
end
2016-05-24 01:28:00 +00:00
def init_cache
2016-05-18 22:40:25 +00:00
middleware_opts = {
2016-09-27 21:44:49 +00:00
serializer : Marshal ,
store : ActiveSupport :: Cache :: FileStore . new ( @cache_file ) ,
logger : Logger . new ( @cache_log ) ,
shared_cache : false
2016-05-18 22:40:25 +00:00
}
stack = Faraday :: RackBuilder . new do | builder |
builder . use Faraday :: HttpCache , middleware_opts
builder . use Octokit :: Response :: RaiseError
builder . adapter Faraday . default_adapter
# builder.response :logger
end
Octokit . middleware = stack
end
2016-05-18 22:35:38 +00:00
2016-05-18 17:13:22 +00:00
# Fetch all tags from repo
#
2016-05-20 06:46:13 +00:00
# @return [Array <Hash>] array of tags
2016-05-18 17:13:22 +00:00
def get_all_tags
print " Fetching tags... \r " if @options [ :verbose ]
check_github_response { github_fetch_tags }
end
# Returns the number of pages for a API call
#
# @return [Integer] number of pages for this API call in total
def calculate_pages ( client , method , request_options )
# Makes the first API call so that we can call last_response
2016-05-18 22:35:38 +00:00
check_github_response do
client . send ( method , user_project , @request_options . merge ( request_options ) )
end
2016-05-18 17:13:22 +00:00
last_response = client . last_response
2016-05-25 19:46:47 +00:00
if ( last_pg = last_response . rels [ :last ] )
2016-10-01 17:23:48 +00:00
querystring_as_hash ( last_pg . href ) [ " page " ] . to_i
2016-05-18 17:13:22 +00:00
else
1
end
end
# Fill input array with tags
#
2016-05-20 06:46:13 +00:00
# @return [Array <Hash>] array of tags in repo
2016-05-18 17:13:22 +00:00
def github_fetch_tags
tags = [ ]
page_i = 0
2016-05-25 19:46:47 +00:00
count_pages = calculate_pages ( @client , " tags " , { } )
2016-05-18 17:13:22 +00:00
2016-05-25 19:46:47 +00:00
iterate_pages ( @client , " tags " , { } ) do | new_tags |
2016-05-18 17:13:22 +00:00
page_i += PER_PAGE_NUMBER
print_in_same_line ( " Fetching tags... #{ page_i } / #{ count_pages * PER_PAGE_NUMBER } " )
tags . concat ( new_tags )
end
print_empty_line
if tags . count == 0
2016-11-10 19:37:46 +00:00
Helper . log . warn " Warning: Can't find any tags in repo. \
2016-09-27 21:44:49 +00:00
Make sure , that you push tags to remote repo via 'git push --tags' "
2016-05-18 17:13:22 +00:00
else
Helper . log . info " Found #{ tags . count } tags "
end
2016-05-20 06:46:13 +00:00
# tags are a Sawyer::Resource. Convert to hash
2016-10-05 20:43:43 +00:00
tags = tags . map { | h | stringify_keys_deep ( h . to_hash ) }
2016-05-18 17:13:22 +00:00
tags
end
# This method fetch all closed issues and separate them to pull requests and pure issues
# (pull request is kind of issue in term of GitHub)
#
2016-05-20 06:46:13 +00:00
# @return [Tuple] with (issues [Array <Hash>], pull-requests [Array <Hash>])
2016-05-18 17:13:22 +00:00
def fetch_closed_issues_and_pr
print " Fetching closed issues... \r " if @options [ :verbose ]
issues = [ ]
options = {
2016-05-25 19:46:47 +00:00
state : " closed " ,
filter : " all " ,
labels : nil
2016-05-18 17:13:22 +00:00
}
2016-05-18 22:35:38 +00:00
options [ :since ] = @since unless @since . nil?
2016-05-18 17:13:22 +00:00
2016-05-18 22:35:38 +00:00
page_i = 0
2016-05-25 19:46:47 +00:00
count_pages = calculate_pages ( @client , " issues " , options )
2016-05-18 17:13:22 +00:00
2016-05-25 19:46:47 +00:00
iterate_pages ( @client , " issues " , options ) do | new_issues |
2016-05-18 22:35:38 +00:00
page_i += PER_PAGE_NUMBER
print_in_same_line ( " Fetching issues... #{ page_i } / #{ count_pages * PER_PAGE_NUMBER } " )
issues . concat ( new_issues )
break if @options [ :max_issues ] && issues . length > = @options [ :max_issues ]
2016-05-18 17:13:22 +00:00
end
2016-05-18 22:35:38 +00:00
print_empty_line
Helper . log . info " Received issues: #{ issues . count } "
2016-05-18 17:13:22 +00:00
2016-10-05 20:43:43 +00:00
issues = issues . map { | h | stringify_keys_deep ( h . to_hash ) }
2016-05-20 06:46:13 +00:00
2016-05-18 17:13:22 +00:00
# separate arrays of issues and pull requests:
issues . partition do | x |
2016-05-25 19:46:47 +00:00
x [ " pull_request " ] . nil?
2016-05-18 17:13:22 +00:00
end
end
# Fetch all pull requests. We need them to detect :merged_at parameter
#
2016-05-20 06:46:13 +00:00
# @return [Array <Hash>] all pull requests
2016-05-18 17:13:22 +00:00
def fetch_closed_pull_requests
pull_requests = [ ]
2016-05-25 19:46:47 +00:00
options = { state : " closed " }
2016-05-18 17:13:22 +00:00
2016-05-25 19:46:47 +00:00
unless @options [ :release_branch ] . nil?
2016-05-18 17:13:22 +00:00
options [ :base ] = @options [ :release_branch ]
end
2016-05-18 22:35:38 +00:00
page_i = 0
2016-05-25 19:46:47 +00:00
count_pages = calculate_pages ( @client , " pull_requests " , options )
2016-05-18 22:35:38 +00:00
2016-05-25 19:46:47 +00:00
iterate_pages ( @client , " pull_requests " , options ) do | new_pr |
page_i += PER_PAGE_NUMBER
2016-05-18 22:35:38 +00:00
log_string = " Fetching merged dates... #{ page_i } / #{ count_pages * PER_PAGE_NUMBER } "
print_in_same_line ( log_string )
pull_requests . concat ( new_pr )
2016-05-18 17:13:22 +00:00
end
2016-05-18 22:35:38 +00:00
print_empty_line
2016-05-18 17:13:22 +00:00
2016-05-18 22:35:38 +00:00
Helper . log . info " Pull Request count: #{ pull_requests . count } "
2016-10-05 20:43:43 +00:00
pull_requests = pull_requests . map { | h | stringify_keys_deep ( h . to_hash ) }
2016-05-18 17:13:22 +00:00
pull_requests
end
2016-05-20 06:55:16 +00:00
# Fetch event for all issues and add them to 'events'
2016-05-18 17:13:22 +00:00
#
# @param [Array] issues
# @return [Void]
def fetch_events_async ( issues )
i = 0
threads = [ ]
issues . each_slice ( MAX_THREAD_NUMBER ) do | issues_slice |
issues_slice . each do | issue |
threads << Thread . new do
2016-05-25 19:46:47 +00:00
issue [ " events " ] = [ ]
iterate_pages ( @client , " issue_events " , issue [ " number " ] , { } ) do | new_event |
issue [ " events " ] . concat ( new_event )
2016-05-18 17:13:22 +00:00
end
2016-10-05 20:43:43 +00:00
issue [ " events " ] = issue [ " events " ] . map { | h | stringify_keys_deep ( h . to_hash ) }
2016-05-18 17:13:22 +00:00
print_in_same_line ( " Fetching events for issues and PR: #{ i + 1 } / #{ issues . count } " )
i += 1
end
end
threads . each ( & :join )
threads = [ ]
end
# to clear line from prev print
print_empty_line
Helper . log . info " Fetching events for issues and PR: #{ i } "
end
# Fetch tag time from repo
#
2016-07-02 13:29:23 +00:00
# @param [Hash] tag GitHub data item about a Tag
#
2016-05-18 17:13:22 +00:00
# @return [Time] time of specified tag
def fetch_date_of_tag ( tag )
2016-05-25 19:46:47 +00:00
commit_data = check_github_response { @client . commit ( user_project , tag [ " commit " ] [ " sha " ] ) }
2016-10-05 20:43:43 +00:00
commit_data = stringify_keys_deep ( commit_data . to_hash )
2016-05-18 22:35:38 +00:00
2016-05-25 19:46:47 +00:00
commit_data [ " commit " ] [ " committer " ] [ " date " ]
2016-05-18 17:13:22 +00:00
end
# Fetch commit for specified event
#
# @return [Hash]
def fetch_commit ( event )
2016-05-18 22:35:38 +00:00
check_github_response do
2016-05-25 19:46:47 +00:00
commit = @client . commit ( user_project , event [ " commit_id " ] )
2016-10-05 20:43:43 +00:00
commit = stringify_keys_deep ( commit . to_hash )
2016-05-25 19:46:47 +00:00
commit
2016-05-18 22:35:38 +00:00
end
2016-05-18 17:13:22 +00:00
end
2016-05-18 17:44:50 +00:00
private
2016-10-05 20:43:43 +00:00
def stringify_keys_deep ( indata )
case indata
when Array
indata . map do | value |
stringify_keys_deep ( value )
end
when Hash
indata . each_with_object ( { } ) do | ( k , v ) , output |
output [ k . to_s ] = stringify_keys_deep ( v )
end
else
indata
end
end
2016-11-10 19:37:46 +00:00
MovedPermanentlyError = Class . new ( RuntimeError )
2016-05-18 17:44:50 +00:00
# Iterates through all pages until there are no more :next pages to follow
# yields the result per page
#
# @param [Octokit::Client] client
# @param [String] method (eg. 'tags')
2016-11-10 18:32:24 +00:00
#
# @yield [Sawyer::Resource] An OctoKit-provided response (which can be empty)
#
2016-05-18 17:44:50 +00:00
# @return [Integer] total number of pages
2016-05-25 19:46:47 +00:00
def iterate_pages ( client , method , * args )
2016-11-10 18:32:24 +00:00
request_opts = extract_request_args ( args )
args . push ( @request_options . merge ( request_opts ) )
2016-05-18 17:44:50 +00:00
2016-11-10 18:32:24 +00:00
number_of_pages = 1
2016-05-18 22:35:38 +00:00
2016-11-10 18:32:24 +00:00
check_github_response { client . send ( method , user_project , * args ) }
2016-05-18 17:44:50 +00:00
last_response = client . last_response
2016-11-10 19:37:46 +00:00
if last_response . status == 301
raise MovedPermanentlyError , last_response . data [ :url ]
end
2016-05-18 17:44:50 +00:00
2016-11-10 18:32:24 +00:00
yield ( last_response . data )
2016-05-18 17:44:50 +00:00
2016-05-25 19:46:47 +00:00
until ( next_one = last_response . rels [ :next ] ) . nil?
2016-11-10 18:32:24 +00:00
number_of_pages += 1
2016-05-18 17:44:50 +00:00
2016-05-18 22:35:38 +00:00
last_response = check_github_response { next_one . get }
2016-11-10 18:32:24 +00:00
yield ( last_response . data )
2016-05-18 17:44:50 +00:00
end
2016-11-10 18:32:24 +00:00
number_of_pages
end
def extract_request_args ( args )
if args . size == 1 && args . first . is_a? ( Hash )
args . delete_at ( 0 )
elsif args . size > 1 && args . last . is_a? ( Hash )
args . delete_at ( args . length - 1 )
else
{ }
end
2016-05-18 17:44:50 +00:00
end
# This is wrapper with rescue block
#
# @return [Object] returns exactly the same, what you put in the block, but wrap it with begin-rescue block
def check_github_response
2016-10-19 21:30:43 +00:00
Retriable . retriable ( retry_options ) do
yield
2016-05-18 17:44:50 +00:00
end
2016-11-10 19:37:46 +00:00
rescue MovedPermanentlyError = > e
Helper . log . error ( " #{ e . class } : #{ e . message } " )
sys_abort ( " The repository has moved, please update your configuration " )
2016-10-19 21:30:43 +00:00
rescue Octokit :: Forbidden = > e
Helper . log . error ( " #{ e . class } : #{ e . message } " )
sys_abort ( " Exceeded retry limit " )
rescue Octokit :: Unauthorized = > e
Helper . log . error ( " #{ e . class } : #{ e . message } " )
sys_abort ( " Error: wrong GitHub token " )
2016-05-18 17:44:50 +00:00
end
2016-10-19 21:30:43 +00:00
# Exponential backoff
def retry_options
{
2016-10-20 18:49:02 +00:00
on : [ Octokit :: Forbidden ] ,
tries : MAX_FORBIDDEN_RETRIES ,
base_interval : sleep_base_interval ,
multiplier : 1 . 0 ,
rand_factor : 0 . 0 ,
on_retry : retry_callback
2016-10-19 21:30:43 +00:00
}
2016-10-07 23:58:38 +00:00
end
2016-10-19 21:30:43 +00:00
def sleep_base_interval
1 . 0
2016-10-07 23:58:38 +00:00
end
2016-10-08 01:50:58 +00:00
2016-10-19 21:30:43 +00:00
def retry_callback
2016-10-20 18:49:02 +00:00
proc do | exception , try , elapsed_time , next_interval |
2016-10-19 21:30:43 +00:00
Helper . log . warn ( " RETRY - #{ exception . class } : ' #{ exception . message } ' " )
Helper . log . warn ( " #{ try } tries in #{ elapsed_time } seconds and #{ next_interval } seconds until the next try " )
Helper . log . warn GH_RATE_LIMIT_EXCEEDED_MSG
Helper . log . warn @client . rate_limit
end
end
def sys_abort ( msg )
abort ( msg )
2016-10-07 23:28:02 +00:00
end
2016-05-18 17:44:50 +00:00
# Print specified line on the same string
#
# @param [String] log_string
def print_in_same_line ( log_string )
print log_string + " \r "
end
# Print long line with spaces on same line to clear prev message
def print_empty_line
print_in_same_line ( " " )
end
# Returns GitHub token. First try to use variable, provided by --token option,
# otherwise try to fetch it from CHANGELOG_GITHUB_TOKEN env variable.
#
# @return [String]
def fetch_github_token
env_var = @options [ :token ] ? @options [ :token ] : ( ENV . fetch CHANGELOG_GITHUB_TOKEN , nil )
2016-09-27 21:44:49 +00:00
Helper . log . warn NO_TOKEN_PROVIDED unless env_var
2016-05-18 17:44:50 +00:00
env_var
end
# @return [String] helper to return Github "user/project"
def user_project
" #{ @options [ :user ] } / #{ @options [ :project ] } "
end
2016-05-18 17:51:04 +00:00
2016-10-01 17:23:48 +00:00
# Returns Hash of all querystring variables in given URI.
2016-05-18 17:51:04 +00:00
#
# @param [String] uri eg. https://api.github.com/repositories/43914960/tags?page=37&foo=1
# @return [Hash] of all GET variables. eg. { 'page' => 37, 'foo' => 1 }
2016-10-01 17:23:48 +00:00
def querystring_as_hash ( uri )
Hash [ URI . decode_www_form ( URI ( uri ) . query || " " ) ]
2016-05-18 17:51:04 +00:00
end
2016-05-18 17:13:22 +00:00
end
end