Issue
my project requires hitting a url against each username stored in an array using curl multi execute.The size of username array is almost 45k and till now i have created another array of 45k urls i want to hit,then to effectively send the requests i have broken that url array into chunks of size 200 each.Then i have passed each chunked array to multi_curl_execute to get the response,but the issue is it takes too much time to receive responses of all 45k requests.I have printed the response array and it was continuously increasing as expected but to print all the responses its taking too much time.Kindly help me as i have to achieve my target by tomorrow.I shall be Much obliged below given is my code
$array1=[1,2,3,4,5,6.....45000];
now creating url with each username as query string
foreach($array1 as $arr)
{
$url[]='abc.com?u='.$arr;
}
//creating chunks
$chunk[]=array_chunk($url,200,true);
//now sending each chunk
for($i=0;$i<sizeof($chunk);$i++)
{
foreach($chunk[$i] as $c_arr)
{
array_push($res,multiRequest($c_arr));
}
}
//my multi_curl function
function multiRequest($data,$options = array())
{
$curly = array();
$result = array();
$mh = curl_multi_init();
$ua = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.A.B.C Safari/525.13';
foreach ($data as $id => $d)
{
$curly[$id]= curl_init();
curl_setopt($curly[$id], CURLOPT_URL,$d);
curl_setopt($curly[$id], CURLOPT_RETURNTRANSFER,true);
curl_setopt($curly[$id], CURLOPT_USERAGENT, $ua);
curl_setopt($curly[$id], CURLOPT_AUTOREFERER, true);
curl_setopt($curly[$id], CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curly[$id], CURLOPT_MAXREDIRS, 20);
curl_setopt($curly[$id], CURLOPT_HTTPGET, true);
curl_setopt($curly[$id], CURLOPT_HEADER,0);
curl_setopt($curly[$id], CURLOPT_RETURNTRANSFER,1);
curl_multi_add_handle($mh, $curly[$id]);
}
$running = null;
do {
curl_multi_exec($mh, $running);
} while($running > 0);
foreach($curly as $id => $c)
{
$result[$id] = curl_multi_getcontent($c);
curl_multi_remove_handle($mh, $c);
}
curl_multi_close($mh);
return $result;
}
Kindly tell me what should i do as it took almost 25-30 minutes to deliver the response of all 45000 requests.And right now i am running this script on my local machine whereas later on it will be scheduled as a cron job on live server
Solution
have you tried multiprocessing instead of curl_multi? maybe that's faster? wouldn't be the first time.
try
<?php
$code = <<<'CODE'
<?php
$ch=curl_init();
curl_setopt_array($ch,array(
CURLOPT_URL=>'abc.com?u='.urlencode($argv[1]),
CURLOPT_ENCODING=>"",
CURLOPT_USERAGENT=>'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.A.B.C Safari/525.13',
CURLOPT_AUTOREFERER=>true,
CURLOPT_FOLLOWLOCATION=>true,
CURLOPT_MAXREDIRS=>20
));
curl_exec($ch);
curl_close($ch);
CODE;
$jobFileh = tmpfile ();
$jobFile = stream_get_meta_data ( $jobFileh ) ['uri'];
file_put_contents ( $jobFile, $code );
$jobs = array ();
for($i = 1; $i <= 45000; ++ $i) {
$jobs [] = '/usr/bin/php ' . escapeshellarg ( $jobFile ) . ' ' . escapeshellarg ( ( string ) $i );
}
$starttime = microtime ( true );
$ret = hhb_exec_multi1 ( $jobs, 200 );
$seconds_used = microtime ( true ) - $starttime;
var_dump ( $ret, $seconds_used );
die ();
class hhb_exec_multi1_ret {
public $cmd;
public $ret;
public $stdout;
public $stderr;
function __construct(array $attributes) {
foreach ( $attributes as $name => $val ) {
$this->$name = $val;
}
}
}
/**
*
* @param string[] $cmds
* @param int $max_concurrent
* @throws InvalidArgumentException
* @return hhb_exec_multi1_ret[]
*/
function hhb_exec_multi1(array $cmds, int $max_concurrent = 10, $finished_callback = NULL): array {
// TODO: more error checking, if proc_create fail, out of ram, tmpfile() fail, etc
{
// input validation
if ($max_concurrent < 1) {
throw new InvalidArgumentException ( '$max_concurrent must be above 0... and less or equal to' . PHP_INT_MAX );
}
foreach ( $cmds as $tmp ) {
if (! is_string ( $tmp )) {
throw new InvalidArgumentException ( '$cmds must be an array of strings!' );
}
}
}
$ret = array ();
$running = array ();
foreach ( $cmds as $key => $cmd ) {
$current = array (
'cmd' => $cmd,
'ret' => - 1,
'stdout' => tmpfile (),
'stderr' => tmpfile (),
'key' => $key
);
$pipes = [ ];
$descriptorspec = array (
0 => array (
"pipe",
"rb"
),
1 => array (
"file",
stream_get_meta_data ( $current ['stdout'] ) ['uri'],
"wb"
),
2 => array (
"file",
stream_get_meta_data ( $current ['stderr'] ) ['uri'],
"wb"
) // stderr is a file to write to
);
while ( count ( $running ) >= $max_concurrent ) {
// echo ".";
usleep ( 100 * 1000 );
foreach ( $running as $runningkey => $check ) {
$stat = proc_get_status ( $check ['proc'] );
if ($stat ['running']) {
continue;
}
proc_close ( $check ['proc'] );
$check ['ret'] = $stat ['exitcode'];
$stdout = file_get_contents ( stream_get_meta_data ( $check ['stdout'] ) ['uri'] );
fclose ( $check ['stdout'] );
$check ['stdout'] = $stdout;
$stderr = file_get_contents ( stream_get_meta_data ( $check ['stderr'] ) ['uri'] );
fclose ( $check ['stderr'] );
$check ['stderr'] = $stderr;
$checkkey = $check ['key'];
unset ( $check ['key'] );
unset ( $check ['proc'] );
$tmp = ($ret [$checkkey] = new hhb_exec_multi1_ret ( $check ));
unset ( $running [$runningkey] );
if (! empty ( $finished_callback )) {
$finished_callback ( $tmp );
}
}
}
$current ['proc'] = proc_open ( $cmd, $descriptorspec, $pipes );
fclose ( $pipes [0] ); // do it like this because we don't want the children to inherit our stdin, which is the default behaviour if [0] is not defined.
$running [] = $current;
}
while ( count ( $running ) > 0 ) {
// echo ",";
usleep ( 100 * 1000 );
foreach ( $running as $runningkey => $check ) {
$stat = proc_get_status ( $check ['proc'] );
if ($stat ['running']) {
continue;
}
proc_close ( $check ['proc'] );
$check ['ret'] = $stat ['exitcode'];
$stdout = file_get_contents ( stream_get_meta_data ( $check ['stdout'] ) ['uri'] );
fclose ( $check ['stdout'] );
$check ['stdout'] = $stdout;
$stderr = file_get_contents ( stream_get_meta_data ( $check ['stderr'] ) ['uri'] );
fclose ( $check ['stderr'] );
$check ['stderr'] = $stderr;
$checkkey = $check ['key'];
unset ( $check ['key'] );
unset ( $check ['proc'] );
$tmp = ($ret [$checkkey] = new hhb_exec_multi1_ret ( $check ));
unset ( $running [$runningkey] );
if (! empty ( $finished_callback )) {
$finished_callback ( $tmp );
}
}
}
return $ret;
}
when i ran this code on my laptop to a local nginx server, it executed in 6 minutes and 39 seconds (399 seconds) with the loop set to 45000.
edit: wups, forgot to write the code to the job file (file_put_contents), fixed.
Answered By - hanshenrik