How to Scrape the Web in Flutter

A web scraper is a program that extracts data from a website. In Flutter, there are several ways to scrape websites. One way is to use the built-in HTTP Library to make requests to websites and extract the data you need. Another way is to use the Dom library to parse the HTML of websites.

In this snippet, we’re going to scrape the web using Flutter with the help of the web_scraper package.

import 'package:flutter/material.dart';
import 'package:web_scraper/web_scraper.dart';

class ExamplePage extends StatefulWidget {
  const ExamplePage({Key? key}) : super(key: key);

  @override
  State<ExamplePage> createState() => _ExamplePageState();
}

class _ExamplePageState extends State<ExamplePage> {
  final TextEditingController _controller = TextEditingController();
  List<String> _result = [];

  _extractURLs() async {
    if (_controller.text != '') {
      List<String> urls = URLHelper.textToLines(_controller.text);
      for (var url in urls) {
        if (Uri.tryParse(url)!.hasAbsolutePath) {
          final webScraper = WebScraper(URLHelper.domain(url));
          if (await webScraper.loadWebPage(URLHelper.page(url))) {
            List<Map<String, dynamic>> elements = webScraper.getElement('h3', []);
            for (var e in elements) {
              _result.add(e['title'].trim());
            }
            setState(() {});
          }
        }
      }
    }
  }

  @override
  void dispose() {
    _controller.dispose();
    super.dispose();
  }

  @override
  Widget build(BuildContext context) {
    return Scaffold(
      appBar: AppBar(
        title: const Text("URL Extract"),
      ),
      body: Padding(
        padding: const EdgeInsets.all(8.0),
        child: Column(
          mainAxisAlignment: MainAxisAlignment.start,
          crossAxisAlignment: CrossAxisAlignment.start,
          children: <Widget>[
            TextField(
              controller: _controller,
              minLines: 3,
              maxLines: 5,
              decoration: const InputDecoration(label: Text("URLs")),
            ),
            Padding(
              padding: const EdgeInsets.all(16.0),
              child: Center(
                child: OutlinedButton(
                  onPressed: () async {
                    _extractURLs();
                  },
                  child: const Text("Extract"),
                ),
              ),
            ),
            ..._result.map((e) => Text(e)),

          ],
        ),
      ),
      
    );
  }
}


class URLHelper {

  static List<String> textToLines(String text){
    List<String> urls = text.split("\n");
    urls.removeWhere( (item) => item.isEmpty );
    return urls;
  }

  static String domain(String url){
    return Uri.parse(url.trim()).origin;
  }

  static String page(String url){
    return Uri.parse(url.trim()).path;
  }
}

By continuing to use the site, you agree to the use of cookies. more information

The cookie settings on this website are set to "allow cookies" to give you the best browsing experience possible. If you continue to use this website without changing your cookie settings or you click "Accept" below then you are consenting to this.

Close