<?php
function remove_html_tag($needle, $haystack) {
    $needleStart = "<$needle";
    $needleEnd   = "</$needle>";

    while ( strpos($haystack, $needleStart) ) {
        $begin = strpos($haystack, $needleStart);
        $end   = strpos($haystack, $needleEnd);

        $a = substr($haystack, 0, $begin - 1);
        $b = substr($haystack, $end + strlen($needleEnd));

        $haystack = $a.$b;
    }

    return $haystack;
}

$datetime = date("Y-m-d H:i:s");
$txt  = "\n$datetime - ".$_SERVER['PHP_SELF']."\n";
$txt .= print_r($user, true);
$txt .= print_r($_POST, true);

if ( !$user['superAdmin'] ) {
    $txt .= "403 not superAdmin\n";
    $txt .= "done\n\n\n\n";
    $file = fopen(PATH_LOGS.'/btb.new-automatic-selection-item.log', "a");
    fwrite($file, $txt);
    fclose($file);

    http_response_code(403);
    echo json_encode('You do not have permission to create selection items');
    exit;
}

if ( !$_POST['url'] ) {
    $txt .= "400 no url\n";
    $txt .= "done\n\n\n\n";
    $file = fopen(PATH_LOGS.'/btb.new-automatic-selection-item.log', "a");
    fwrite($file, $txt);
    fclose($file);

    http_response_code(400);
    echo json_encode('URL is required');
    exit;
}

$ch = curl_init();

curl_setopt($ch, CURLOPT_URL, 'https://api.brightdata.com/request');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'POST');
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode([
    'zone' => 'web_unlocker1',
    'url' => $_POST['url'],
    'format' => 'raw'
]));
curl_setopt($ch, CURLOPT_HTTPHEADER, [
    'Content-Type: application/json',
    'Authorization: Bearer '.BRIGHTDATA_API_TOKEN,
]);

$response     = curl_exec($ch);
$responseCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);

if ( $responseCode != 200 ) {
    $txt .= "non-200 response from bright data:\n";
    $txt .= print_r($response, true);
    $txt .= "\ndone\n\n\n\n";
    $file = fopen(PATH_LOGS.'/btb.new-automatic-selection-item.log', "a");
    fwrite($file, $txt);
    fclose($file);

    http_response_code($responseCode);
    echo json_encode('Automatic import failed');
    exit;
}

$txt .= "totalchars after scrape: ".strlen($response)."\n";

$begin = strpos($response, '<body');
$end   = strpos($response, '</body');
$html  = substr($response, $begin, $end - $begin);

// $begin = strpos($html, '{["');
// $end   = strpos($html, ']}"');
// $html  = substr($html, $begin, $end - $begin);

// $html  = remove_html_tag('a', $html);
$html  = remove_html_tag('aside', $html);
$html  = remove_html_tag('footer', $html);
$html  = remove_html_tag('header', $html);
$html  = remove_html_tag('iframe', $html);
$html  = remove_html_tag('nav', $html);
$html  = remove_html_tag('script', $html);
$html  = remove_html_tag('style', $html);
$html  = remove_html_tag('svg', $html);
$html  = str_replace(['\n', '\r', '\t'], '', $html);

$txt .= "totalchars after tag strip: ".strlen($html)."\n";

if ( strlen($html) > 450000 ) {
    $txt .= "greater than 200k chars\n";
    $html = substr($html, 0, 450000);
    $txt .= "new length: ".strlen($html)."\n";
}

$txt .= "html: $html\n";

$payload = [
    'max_tokens' => 4096,
    'messages' => [
        [
            'role' => 'user',
            'content' => [
                [
                    'type' => 'text',
                    'text' => 'Create a JSON output of product details from the provided HTML, following these specifications:
                        1. Format Specifications:
                        - Structure: 
                        {
                            "Key": "Value"
                        }
                        - Look for values for all of the following keys: "title", "vendor", "description", "selectionItemImageUrl", "cost", "unit", "notes"
                        - Title is the title of the product as shown in the HTML. Match the case as shown in the HTML
                        - Vendor is the name of the company selling the product. This is typically the website that is selling the product, not the product manufacturer. For example, a light fixture may be manufactured by LG and be sold by Home Depot. The correct value for the vendor field would be Home Depot.
                        - Description is the product description as shown in the HTML. Match the case as shown in the HTML
                        - Selection Item Image URL is a URL pointing to the highest-quality version of the product image. This URL should always begin with a protocol (either "HTTP" or "HTTPS").
                        - Cost is the price of the product. Usually, this will be called "Price" or "List Price". Some pages will show multiple prices. Do your best to use the price associated with the title of the product. Usually, this is the first price listed on the page. If you cannot determine the price, use the first number that is preceded by a dollar sign.
                        - Unit is the quantity by which the product is sold. For example, flooring is often sold by the square foot, whereas toilets are sold individually. Attempt to use a two-letter or three-letter abbreviation for unit. Here are some common abbreviations: Square Foot/Feet - SF, Perimeter Foot/Feet - PF, Each - EA, Lineal Foot/Feet - LF, Gallon - GAL
                        - If you cannot provide a value for a given key, leave the value empty
                        - If you cannot provide a value for the Cost key, do not provide a value for the Unit key
                        - Notes is where you provide your reasoning and justification for each key/value pair. For each key/value pair, explain why you chose the value that you did. Provide reasoning and justification for each field, even when the value is empty.

                        2. Example Format:
                        {
                            "title": "Kohler Premium Faucet",
                            "vendor": "Lowe\'s"
                            "description": "This Kohler Premium Faucet is designed for those who enjoy the finer things in life.",
                            "selectionItemImageUrl": "https://mobileimages.lowes.com/productimages/b6e54c51-a71e-4ef4-8494-07a9fff0024f/11255262.jpg?size=pdhism"
                            "cost": "39.99",
                            "unit": "EA",
                            "notes": "I chose the title because the HTML markup makes it clear that this is the correct product. I chose Lowes as the vendor, because the meta tags in the HTML... "
                        }

                        3. Requirements:
                        - Output must be valid JSON only
                        - No additional text or explanations
                        - Do not include currency designations in prices. In other words, do not include a dollar sign

                        Please analyze the following HTML and provide the JSON output following these specifications: 
                    '.$html
                ]
            ]
        ]
    ],
    'model' => 'claude-3-5-sonnet-20241022',
    'system' => 'Your job is to read HTML code and provide exactly what is requested in a format that can be used programmatically'
];

$txt .= "payload set:\n";
// $txt .= print_r($payload, true);

$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "https://api.anthropic.com/v1/messages");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json', 'x-api-key: '.CLAUDE_API_TOKEN, 'Anthropic-Version: 2023-06-01', 'anthropic-beta: pdfs-2024-09-25,prompt-caching-2024-07-31']);
curl_setopt($ch, CURLOPT_POSTFIELDS, json_encode($payload));

$response = json_decode(curl_exec($ch));
$responseCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);

$txt .= "curl sent\n"; 
$txt .= print_r($response, true);

if ( $responseCode !== 200 ) {
    $txt .= "non-200 response from anthropic ai\n";
    $txt .= "done\n\n\n\n";
    $file = fopen(PATH_LOGS.'/btb.new-automatic-selection-item.log', "a");
    fwrite($file, $txt);
    fclose($file);

    http_response_code($responseCode);
    echo json_encode("AI analysis failed");
    exit;
}

$txt .= "done\n\n\n\n";
$file = fopen(PATH_LOGS.'/btb.new-automatic-selection-item.log', "a");
fwrite($file, $txt);
fclose($file);

$text = $response->content[0]->text;
$array = json_decode($text, true);
$array['link'] = $_POST['url'];
// $array['selectionImage'] = file_get_contents($array['selectionImageUrl']);
  
// include 'new-selection-item-form.php';

echo json_encode([
    'title' => $array['title'],
    'vendor' => $array['vendor'],
    'description' => $array['description'],
    'selectionItemImageUrl' => $array['selectionItemImageUrl'],
    'cost' => $array['cost'],
    'unit' => $array['unit'],
    'link' => $array['link']
]);
