better downloader in perl

March 15th, 2010 Categories: 应用

这两天写了一段perl程序,输入url地址,下载其中的文件,计算MD5值,查该文件是否是病毒;如果无记录,则调用另外一个perl脚本将其上传到某一网站作详细测试。我想说的是,这些功能可能使用bash来编程,会更直接;用perl来做bash的事情总有些越俎代庖。不过,目前我对perl极感兴趣;有机会就用它;另外,将不成熟的代码贴出来,留给未来的自己一个鄙视现在的自己的机会也好:)

一个小小的发现是,qx可以将所包含的语句当作bash命令来执行,并把结果返回。另外书中交待,eval也是极有用的,不过这次没用上,下次找机会牛刀小试一把。

之所以没有使用curl, md5等等模块,而是使用shell命令,是因为我所用的虚拟机里没有安装,但是它们是bash下标准的可执行文件。这样写来,效率会有点折扣,但是绿色便携。

无废话,贴代码。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#!/usr/bin/perl -w

#this script offers a better download service
#integreting virustotal infor, send-sample.pl
#by rex.zhang
#on 03-11-2010 in Shanghai
#updated @003122010;

my ($url)=$ARGV[0];

#it is assumed that the filename is the last part of the url,
#just after the last / and before the $
main($url);

sub main
{
    my ($url)=@_;

    #print help message and quit if no url input;
    help() if (not $url);
   
    my ($md5,$size,$name);
    $url =~ /\/([^\/]+)$/;

    #get the filename from the url;
    $name=$1;

    #get filesize from the url; if no size got, quit.
    $size=get_filesize($url);
    exit if $size;

    #try 5 times at most to get the file.
    my ($try)=5;
    my $ok;
    while($try--)
    {
      $ok=download_file($url);
      last if $ok;
    }
    if (not $ok)
    {
        print "can not download the file, quit!\n";
        exit();
    }

    #get the md5 locally
    $md5=get_md5($name);

    #and the url link from $virustotal;
    my ($link)=get_vt_link($md5);
    if ($link)
    {
        #and even the virus infor;
        my ($info)=get_vt_info($link);
        if (not $info)
        {
            v_test($name,$md5);
            print "\nSample has been sent to vtest. \nthanks for using.\n\n";
        }
    }
}

#return the md5 value of the file.
#the filename is in the current directory
sub get_md5
{
    my ($filename)=@_;
    my $md5=`md5sum $filename`;
    $md5 =~ /^(\w{32})/;
    print "\nthe md5 of the $filename is:\t $1.\n";
    return $1;
}

#get the virustotal link with the given md5 value;
sub get_vt_link
{
    my($md5)=@_;
    my ($link)=`curl -s -e "https://www.virustotal.com" -d "x=80&y=23&hash=$md5" "http://www.virustotal.com/vt/en/consultamd5" | grep href`;
    my ($bool)= $link =~ /href="([^"]+)"/i;
        if ($bool)
        {
            return "http://www.virustotal.com".$1;
        }
        else
        {
            return 0;
        }
}

#get the virus infor according to a virus total infor link
sub get_vt_info
{
    my($url)=@_;
    my $line;
    my $sophos =0;     #if sophos has no detection, do the vtest.
        foreach (qx{curl -s $url})
        {
            $line.=$_;
        }
    my (@result) = $line=~ m!<tr[^>]*>\s*<td>(?:Sophos|Symantec|TrendMicro|McAfee)</td>.*?</tr>!sig;
    if (not @result)
    {
        print "\nNo record in VirusTotal. Sending v-test...\n";
        return $sophos;
    }

    print "\nvirustotal record found as following:\n";
    foreach (@result)
    {
        my $tmp=$_;
        $tmp =~ s/(<[^>]+>\s*)+/\t/sig;
        print $tmp."\n";
        if ($tmp =~ m/(?:Sophos[.\s0-9]+)(?!-)(\S+)\s*$/i)
        {
            $sophos=$1;
        }
    }
    print "\nSophos has detection as $sophos, no v-test needed.\n" if $sophos;
    print "you can read the virus details here:\n";
    print "\t$url\n\n";

    exit() if $sophos;
}

#get the filesize by using the curl -I option.
#print the filesize if it is greater than a given value, 2MB by default.
sub get_filesize{

    my ($url)=@_;
    my ($size,$unit);
    foreach (qx{curl -s -I $url})
    {
        if (/Content-Length:\s(\d+)/)
        {
            $size=$1;
        }
        if (/Accept-Ranges:\s(\w+)/)
        {
            $unit=$1;
        }
    }
    if (not $size)
    {
        print "can not get the length of the file, exit!\n";
        exit;
    }
    $size=int($size / 1024);

    my $flag=0;     #if flag=1 it is too large ;
    if ($size>1024 * 2)
    {
        $flag=1;
        print "the file is ${size}KB and greater than 2MB!\n";
        print "it is too large to be a virus. exit.\n";
    }
    else
    {
        print "\nThe file size is $size kb, downloading...\n\n";
    }
    $flag;
}

#simply download the file
sub download_file
{
    my ($url)=@_;
    $url =~ /([^\/]+)$/;
    my $filename=$1;
    `curl -O $url`;
    if (not -e $filename)
    {
        print "no filename, retrying...\n";
        return 0;
    }
    print "\nfile is downloaded and saved as $filename.\n";
    1;
}

#print the help message and exit if no url input
sub help
{
    print "Usage: ./dl http://.../file.exe\n";
    print "\tthe last part of the url is regarded as filename.\n";
    exit();
}

#send the v-test email, with md5 value in the subject.
sub v_test
{
    my ($file,$md5)=@_;
    `zip $file.zip $file`;
    `send-sample.pl -a $file.zip -s "$file.zip URL MD5 $md5"`;
}
Tags:
No comments yet.

Leave a Comment