
# Copyright (C) 2014 Simon Kainz <simon@familiekainz.at>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# he Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# On Debian GNU/Linux systems, the complete text of the GNU General
# Public License can be found in `/usr/share/common-licenses/GPL-2'.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.



use strict;
use warnings;
use lib '.';


package DUCK;
our $VERSION ='0.5';

use File::Which;
use WWW::Curl::Easy;
use strict;
use IPC::Open3;
use IO::Select;
use Net::DNS;
use Regexp::Common qw /Email::Address/;
use Email::Address;

my $callbacks;

my $self;
my $helpers={
    svn =>0,
    bzr =>0,
    git =>0,
    darcs =>1, # Uses WWW::Curl::Easy
    hg => 0,
    browser =>1 # This works as we use WWW::Curl::Easy;
};

my $tools=
{
    git => {
	cmd => 'git',
	args => ['ls-remote','%URL%']
    },
	    
    hg =>{
		cmd => 'hg',
		args => ['id','%URL%']
	},

    bzr => {
		cmd => 'bzr',
		args => ['-Ossl.cert_reqs=none','log','%URL%']
    },

    svn => {
	cmd => 'svn',
	args => ['--non-interactive','--trust-server-cert','info','%URL%']
}
	    
	     
};

sub new {
    my $class = shift;
     $self = {};
     bless $self, $class;
    $self->__find_helpers();
    return $self;
}

sub cb()
{
    $callbacks=
    {
	
	"Vcs-Browser" =>\&browser,
	"Vcs-Darcs" =>\&darcs,
	"Vcs-Git" =>\&git,
	"Vcs-Hg" =>\&hg,
	"Vcs-Svn" =>\&svn,
        "Vcs-Bzr" =>\&bzr,
	"Homepage" => \&browser,
	"URL" => \&browser,
	"Email" => \&email,
	"Maintainer" => \&maintainer,
	"Uploaders" => \&uploaders
	
    };
    
    return $callbacks;
}

sub __find_helpers()
{

    $helpers->{git}=1 unless !defined (which('git'));
    $helpers->{svn}=1 unless !defined (which('svn'));
    $helpers->{hg}=1 unless !defined (which('hg'));
    $helpers->{bzr}=1 unless !defined (which('bzr'));
    


}

sub git()
{
    my ($url)=@_;

    my @urlparts=split(/\s+/,$url);
    
    if ($urlparts[1])
    {
	if ($urlparts[1] eq "-b" && $urlparts[2])
	{
	    push(@{$tools->{'git'}->{'args'}},'-b '.$urlparts[2]);
	}
    }
    return __run_helper('git',$urlparts[0]);
}

sub bzr()
{
    my ($url)=@_;
    return __run_helper('bzr',$url);
}


sub hg()
{
    my ($url)=@_;
    return __run_helper('hg',$url);
}

sub svn()
{
    my ($url)=@_;
    return __run_helper('svn',$url);
}

sub browser()
{
    my ($url)=@_;
    
    $url =~ s/\.*$//g;

    return __run_browser($url);
}

sub darcs()
{
    my ($url)=@_;
    my $darcsurltemp=$url;
    $darcsurltemp =~ s/\/$//;
    $darcsurltemp.='/_darcs/hashed_inventory';
    return __run_browser($darcsurltemp);
}




sub uploaders()
{
    my ($line_uploaders)=@_;
    $line_uploaders =~ s/\n/ /g;
    my @emails;

    if ($line_uploaders =~ /@/)
    {
	@emails = ($line_uploaders =~ /($RE{Email}{Address})/g );
    }
    my $res;
    
    foreach my $email(@emails)
    {
	my $r=check_domain($email);
       
	if ($r->{retval}>0)
	{
	    if (!$res->{retval})
	    {
		$res=$r;
	    } else
	    {
		$res->{retval}=$r->{retval};
		$res->{response}.="\n".$r->{response};
		$res->{url}="foo";
	    }
	    
	}
	
    }
    
    if (!$res->{retval})
    {
	$res->{'retval'}=0;
	$res->{'response'}="";
	$res->{'url'}=$line_uploaders;
    }
    return $res;

}

sub maintainer()
{
    my ($email)=@_;
     return check_domain($email);
}



sub email()
{
    my ($email) =@_;
    return check_domain($email);
}

sub __run_browser {
#    my $job = shift;
#    my $url = $job->arg;

    my ($url)=@_;
    
    
    #check if URL is mailto: link
    
    if ($url =~/mailto:\s*.+@.+/)
    {
    return check_domain($url);
    }
    
    my $curl = WWW::Curl::Easy->new;
    
    my @website_moved_regexs=('new homepage','update your links','we have moved');
    my @website_moved_whitelist=('anonscm.debian.org.*duck.git');
    
    $curl->setopt(CURLOPT_HEADER,0);
    $curl->setopt(CURLOPT_SSL_VERIFYPEER,0);
    $curl->setopt(CURLOPT_FOLLOWLOCATION,1);
    $curl->setopt(CURLOPT_SSLVERSION,3);
    $curl->setopt(CURLOPT_MAXREDIRS,10);     
    $curl->setopt(CURLOPT_TIMEOUT,60);
    $curl->setopt(CURLOPT_USERAGENT,'Mozilla/5.0 (X11; Linux x86_64; rv:10.0.4) Gecko/20100101 Firefox/10.0.4 Iceweasel/10.0.4');
    $curl->setopt(CURLOPT_URL, $url);


    my $response_body;
    $curl->setopt(CURLOPT_WRITEDATA,\$response_body);
    
    # Starts the actual request
    my $retcode = $curl->perform;
    
    # Looking at the results...
    my $status=0;
    my $disp=0;

    my $response_code = $curl->getinfo(CURLINFO_HTTP_CODE);
    my $response=$curl->strerror($retcode)." ".$curl->errbuf."\n";


    if ($retcode == 0) # no curl error, but maybe a http error
    {
	#default to error
	$status=1;
	$disp=1;

	#handle ok cases, 200 is ok for sure
	if ($response_code ==200 )
	{
	    $status=0;
	    $disp=0;
	}


	if ($response_code ==226 )
	{
	    $status=0;
	    $disp=0;
	}

	if ($response_code ==227 )
	{
	    $status=0;
	    $disp=0;
	}

	if ($response_code ==302 ) #temporary redirect is ok
	{
	    $status=0;
	    $disp=0;
	}

	if ($response_code ==403)
	{
	    ## special case for sourceforge.net sites
	    ## sourceforge seems to always return correct pages wit http code 40.
	    
	    if ( $url =~ m/(sourceforge|sf).net/i)
	    {
		# print "Sourceforge site, so hande special!!";
		$status=0;
		$disp=0;
	    }


	}
	my $whitelisted=0;

	foreach my $whitelist_url (@website_moved_whitelist)
	{
	    if ( $url =~ m/$whitelist_url/i)
	
	    {$whitelisted=1;}

	}
	if ($whitelisted == 0)
	  {  
	      foreach my $regex (@website_moved_regexs)
	      {
		  #   print "$regex\n";
		  if ($response_body =~ m/$regex/i )
		  {
		      $disp=1;
		      $response.="Website seems to be outdated. Please update your links!";
		      last;
		  }
	      }
	  }
	
    }
    else {  # we have a curl error, so we show this entry for sure
	$status=1;
	$disp=1;
    }


    my $ret;
    $ret->{'retval'}=$disp;
    $ret->{'response'}="Curl:$retcode HTTP:$response_code $response";
    $ret->{'url'}=$url;

    return $ret; 

    

}

sub __run_helper {
    
    my ($tool,$url)=@_;
   return undef unless $helpers->{$tool} == 1;
   return undef unless defined $tools->{$tool};

   my @args=@{$tools->{$tool}->{'args'}};

   for(@args){s/\%URL\%/$url/g}

   my $pid=open3(\*WRITE,\*READ,0,$tools->{$tool}->{'cmd'},@args);

   my @results = <READ>;
   waitpid ($pid,0);
   close READ;

   my $retval=$?;
   my $ret;
   $ret->{'retval'}=$retval;
   $ret->{'response'}=join("\n",@results);
   $ret->{'url'}=$url;
   return $ret;
}




sub check_domain($)
		 {
		     my $res = Net::DNS::Resolver->new;
		     my ($email) = @_;
		     my @emails=Email::Address->parse($email);
		     $email=$emails[0];
		     my @domain = ( $email =~ m/^[^@]*@([^?^&^>]*).*/);
		     
		     my @queries=('MX','A','AAAA');
		     my @results;
		     my $iserror=1;
		     foreach my $query (@queries)
		     {
			 my $q=$res->query($domain[0],$query);
			 
			 if ($q)
			 {
			     my @answers=$q->answer;
			     my $mxcount=scalar @answers;
			     push (@results,$mxcount." ".$query." entries found.");
			     $iserror=0;
			     last;
			 } else
			 {
			     push (@results,"$email: No ".$query." entry found.");
			 }
			 
		     }
		     
		     
		     my $ret;
		     $ret->{'retval'}=$iserror;
		     $ret->{'response'}=join("\n",@results);
		     $ret->{'url'}=$email;
		     return $ret;
		     
		     
		 }





1;
